6728865: Provide a better heuristics for Collections.disjoint method
Reviewed-by: alanb, dholmes, chegar, forax
--- a/jdk/src/share/classes/java/util/Collections.java Thu Jan 13 16:33:07 2011 +0000
+++ b/jdk/src/share/classes/java/util/Collections.java Thu Jan 13 20:32:12 2011 -0800
@@ -3713,45 +3713,91 @@
}
/**
- * Returns <tt>true</tt> if the two specified collections have no
+ * Returns {@code true} if the two specified collections have no
* elements in common.
*
* <p>Care must be exercised if this method is used on collections that
- * do not comply with the general contract for <tt>Collection</tt>.
+ * do not comply with the general contract for {@code Collection}.
* Implementations may elect to iterate over either collection and test
* for containment in the other collection (or to perform any equivalent
* computation). If either collection uses a nonstandard equality test
- * (as does a {@link SortedSet} whose ordering is not <i>compatible with
- * equals</i>, or the key set of an {@link IdentityHashMap}), both
+ * (as does a {@link SortedSet} whose ordering is not <em>compatible with
+ * equals</em>, or the key set of an {@link IdentityHashMap}), both
* collections must use the same nonstandard equality test, or the
* result of this method is undefined.
*
+ * <p>Care must also be exercised when using collections that have
+ * restrictions on the elements that they may contain. Collection
+ * implementations are allowed to throw exceptions for any operation
+ * involving elements they deem ineligible. For absolute safety the
+ * specified collections should contain only elements which are
+ * eligible elements for both collections.
+ *
* <p>Note that it is permissible to pass the same collection in both
- * parameters, in which case the method will return true if and only if
- * the collection is empty.
+ * parameters, in which case the method will return {@code true} if and
+ * only if the collection is empty.
*
* @param c1 a collection
* @param c2 a collection
- * @throws NullPointerException if either collection is null
+ * @return {@code true} if the two specified collections have no
+ * elements in common.
+ * @throws NullPointerException if either collection is {@code null}.
+ * @throws NullPointerException if one collection contains a {@code null}
+ * element and {@code null} is not an eligible element for the other collection.
+ * (optional)
+ * @throws ClassCastException if one collection contains an element that is
+ * of a type which is ineligible for the other collection. (optional)
* @since 1.5
*/
public static boolean disjoint(Collection<?> c1, Collection<?> c2) {
- /*
- * We're going to iterate through c1 and test for inclusion in c2.
- * If c1 is a Set and c2 isn't, swap the collections. Otherwise,
- * place the shorter collection in c1. Hopefully this heuristic
- * will minimize the cost of the operation.
- */
- if ((c1 instanceof Set) && !(c2 instanceof Set) ||
- (c1.size() > c2.size())) {
- Collection<?> tmp = c1;
- c1 = c2;
- c2 = tmp;
+ // The collection to be used for contains(). Preference is given to
+ // the collection who's contains() has lower O() complexity.
+ Collection<?> contains = c2;
+ // The collection to be iterated. If the collections' contains() impl
+ // are of different O() complexity, the collection with slower
+ // contains() will be used for iteration. For collections who's
+ // contains() are of the same complexity then best performance is
+ // achieved by iterating the smaller collection.
+ Collection<?> iterate = c1;
+
+ // Performance optimization cases. The heuristics:
+ // 1. Generally iterate over c1.
+ // 2. If c1 is a Set then iterate over c2.
+ // 3. If either collection is empty then result is always true.
+ // 4. Iterate over the smaller Collection.
+ if (c1 instanceof Set) {
+ // Use c1 for contains as a Set's contains() is expected to perform
+ // better than O(N/2)
+ iterate = c2;
+ contains = c1;
+ } else if (!(c2 instanceof Set)) {
+ // Both are mere Collections. Iterate over smaller collection.
+ // Example: If c1 contains 3 elements and c2 contains 50 elements and
+ // assuming contains() requires ceiling(N/2) comparisons then
+ // checking for all c1 elements in c2 would require 75 comparisons
+ // (3 * ceiling(50/2)) vs. checking all c2 elements in c1 requiring
+ // 100 comparisons (50 * ceiling(3/2)).
+ int c1size = c1.size();
+ int c2size = c2.size();
+ if (c1size == 0 || c2size == 0) {
+ // At least one collection is empty. Nothing will match.
+ return true;
+ }
+
+ if (c1size > c2size) {
+ iterate = c2;
+ contains = c1;
+ }
}
- for (Object e : c1)
- if (c2.contains(e))
+ for (Object e : iterate) {
+ if (contains.contains(e)) {
+ // Found a common element. Collections are not disjoint.
return false;
+ }
+ }
+
+ // No common elements were found.
return true;
}