# HG changeset patch # User psandoz # Date 1378280065 -7200 # Node ID dda89341ee2d3c52c38b12e92cd4198c76238e43 # Parent 39ccb0972a2fb712612b7fe5adc7775603dfaf99 8023463: Improvements to HashMap/LinkedHashMap use of bins/buckets and trees (red/black) 8012913: LinkedHashMap key/value/entry spliterators should report ORDERED Reviewed-by: mduigou, forax, bchristi, alanb Contributed-by: Doug Lea , Paul Sandoz diff -r 39ccb0972a2f -r dda89341ee2d jdk/src/share/classes/java/util/HashMap.java --- a/jdk/src/share/classes/java/util/HashMap.java Mon Aug 12 12:22:10 2013 +0200 +++ b/jdk/src/share/classes/java/util/HashMap.java Wed Sep 04 09:34:25 2013 +0200 @@ -25,13 +25,14 @@ package java.util; -import java.io.*; +import java.io.IOException; +import java.io.InvalidObjectException; +import java.io.Serializable; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; -import java.util.concurrent.ThreadLocalRandom; import java.util.function.BiConsumer; +import java.util.function.BiFunction; import java.util.function.Consumer; -import java.util.function.BiFunction; import java.util.function.Function; /** @@ -63,20 +64,25 @@ * structures are rebuilt) so that the hash table has approximately twice the * number of buckets. * - *

As a general rule, the default load factor (.75) offers a good tradeoff - * between time and space costs. Higher values decrease the space overhead - * but increase the lookup cost (reflected in most of the operations of the - * HashMap class, including get and put). The - * expected number of entries in the map and its load factor should be taken - * into account when setting its initial capacity, so as to minimize the - * number of rehash operations. If the initial capacity is greater - * than the maximum number of entries divided by the load factor, no - * rehash operations will ever occur. + *

As a general rule, the default load factor (.75) offers a good + * tradeoff between time and space costs. Higher values decrease the + * space overhead but increase the lookup cost (reflected in most of + * the operations of the HashMap class, including + * get and put). The expected number of entries in + * the map and its load factor should be taken into account when + * setting its initial capacity, so as to minimize the number of + * rehash operations. If the initial capacity is greater than the + * maximum number of entries divided by the load factor, no rehash + * operations will ever occur. * - *

If many mappings are to be stored in a HashMap instance, - * creating it with a sufficiently large capacity will allow the mappings to - * be stored more efficiently than letting it perform automatic rehashing as - * needed to grow the table. + *

If many mappings are to be stored in a HashMap + * instance, creating it with a sufficiently large capacity will allow + * the mappings to be stored more efficiently than letting it perform + * automatic rehashing as needed to grow the table. Note that using + * many keys with the same {@code hashCode()} is a sure way to slow + * down performance of any hash table. To ameliorate impact, when keys + * are {@link Comparable}, this class may use comparison order among + * keys to help break ties. * *

Note that this implementation is not synchronized. * If multiple threads access a hash map concurrently, and at least one of @@ -128,11 +134,100 @@ * @see Hashtable * @since 1.2 */ +public class HashMap extends AbstractMap + implements Map, Cloneable, Serializable { -public class HashMap - extends AbstractMap - implements Map, Cloneable, Serializable -{ + private static final long serialVersionUID = 362498820763181265L; + + /* + * Implementation notes. + * + * This map usually acts as a binned (bucketed) hash table, but + * when bins get too large, they are transformed into bins of + * TreeNodes, each structured similarly to those in + * java.util.TreeMap. Most methods try to use normal bins, but + * relay to TreeNode methods when applicable (simply by checking + * instanceof a node). Bins of TreeNodes may be traversed and + * used like any others, but additionally support faster lookup + * when overpopulated. However, since the vast majority of bins in + * normal use are not overpopulated, checking for existence of + * tree bins may be delayed in the course of table methods. + * + * Tree bins (i.e., bins whose elements are all TreeNodes) are + * ordered primarily by hashCode, but in the case of ties, if two + * elements are of the same "class C implements Comparable", + * type then their compareTo method is used for ordering. (We + * conservatively check generic types via reflection to validate + * this -- see method comparableClassFor). The added complexity + * of tree bins is worthwhile in providing worst-case O(log n) + * operations when keys either have distinct hashes or are + * orderable, Thus, performance degrades gracefully under + * accidental or malicious usages in which hashCode() methods + * return values that are poorly distributed, as well as those in + * which many keys share a hashCode, so long as they are also + * Comparable. (If neither of these apply, we may waste about a + * factor of two in time and space compared to taking no + * precautions. But the only known cases stem from poor user + * programming practices that are already so slow that this makes + * little difference.) + * + * Because TreeNodes are about twice the size of regular nodes, we + * use them only when bins contain enough nodes to warrant use + * (see TREEIFY_THRESHOLD). And when they become too small (due to + * removal or resizing) they are converted back to plain bins. In + * usages with well-distributed user hashCodes, tree bins are + * rarely used. Ideally, under random hashCodes, the frequency of + * nodes in bins follows a Poisson distribution + * (http://en.wikipedia.org/wiki/Poisson_distribution) with a + * parameter of about 0.5 on average for the default resizing + * threshold of 0.75, although with a large variance because of + * resizing granularity. Ignoring variance, the expected + * occurrences of list size k are (exp(-0.5) * pow(0.5, k) / + * factorial(k)). The first values are: + * + * 0: 0.60653066 + * 1: 0.30326533 + * 2: 0.07581633 + * 3: 0.01263606 + * 4: 0.00157952 + * 5: 0.00015795 + * 6: 0.00001316 + * 7: 0.00000094 + * 8: 0.00000006 + * more: less than 1 in ten million + * + * The root of a tree bin is normally its first node. However, + * sometimes (currently only upon Iterator.remove), the root might + * be elsewhere, but can be recovered following parent links + * (method TreeNode.root()). + * + * All applicable internal methods accept a hash code as an + * argument (as normally supplied from a public method), allowing + * them to call each other without recomputing user hashCodes. + * Most internal methods also accept a "tab" argument, that is + * normally the current table, but may be a new or old one when + * resizing or converting. + * + * When bin lists are treeified, split, or untreeified, we keep + * them in the same relative access/traversal order (i.e., field + * Node.next) to better preserve locality, and to slightly + * simplify handling of splits and traversals that invoke + * iterator.remove. When using comparators on insertion, to keep a + * total ordering (or as close as is required here) across + * rebalancings, we compare classes and identityHashCodes as + * tie-breakers. + * + * The use and transitions among plain vs tree modes is + * complicated by the existence of subclass LinkedHashMap. See + * below for hook methods defined to be invoked upon insertion, + * removal and access that allow LinkedHashMap internals to + * otherwise remain independent of these mechanics. (This also + * requires that a map instance be passed to some utility methods + * that may create new nodes.) + * + * The concurrent-programming-like SSA-based coding style helps + * avoid aliasing errors amid all of the twisty pointer operations. + */ /** * The default initial capacity - MUST be a power of two. @@ -152,14 +247,158 @@ static final float DEFAULT_LOAD_FACTOR = 0.75f; /** - * An empty table instance to share when the table is not inflated. + * The bin count threshold for using a tree rather than list for a + * bin. Bins are converted to trees when adding an element to a + * bin with at least this many nodes. The value must be greater + * than 2 and should be at least 8 to mesh with assumptions in + * tree removal about conversion back to plain bins upon + * shrinkage. + */ + static final int TREEIFY_THRESHOLD = 8; + + /** + * The bin count threshold for untreeifying a (split) bin during a + * resize operation. Should be less than TREEIFY_THRESHOLD, and at + * most 6 to mesh with shrinkage detection under removal. + */ + static final int UNTREEIFY_THRESHOLD = 6; + + /** + * The smallest table capacity for which bins may be treeified. + * (Otherwise the table is resized if too many nodes in a bin.) + * Should be at least 4 * TREEIFY_THRESHOLD to avoid conflicts + * between resizing and treeification thresholds. + */ + static final int MIN_TREEIFY_CAPACITY = 64; + + /** + * Basic hash bin node, used for most entries. (See below for + * TreeNode subclass, and in LinkedHashMap for its Entry subclass.) */ - static final Object[] EMPTY_TABLE = {}; + static class Node implements Map.Entry { + final int hash; + final K key; + V value; + Node next; + + Node(int hash, K key, V value, Node next) { + this.hash = hash; + this.key = key; + this.value = value; + this.next = next; + } + + public final K getKey() { return key; } + public final V getValue() { return value; } + public final String toString() { return key + "=" + value; } + + public final int hashCode() { + return Objects.hashCode(key) ^ Objects.hashCode(value); + } + + public final V setValue(V newValue) { + V oldValue = value; + value = newValue; + return oldValue; + } + + public final boolean equals(Object o) { + if (o == this) + return true; + if (o instanceof Map.Entry) { + Map.Entry e = (Map.Entry)o; + if (Objects.equals(key, e.getKey()) && + Objects.equals(value, e.getValue())) + return true; + } + return false; + } + } + + /* ---------------- Static utilities -------------- */ /** - * The table, resized as necessary. Length MUST Always be a power of two. + * Computes key.hashCode() and spreads (XORs) higher bits of hash + * to lower. Because the table uses power-of-two masking, sets of + * hashes that vary only in bits above the current mask will + * always collide. (Among known examples are sets of Float keys + * holding consecutive whole numbers in small tables.) So we + * apply a transform that spreads the impact of higher bits + * downward. There is a tradeoff between speed, utility, and + * quality of bit-spreading. Because many common sets of hashes + * are already reasonably distributed (so don't benefit from + * spreading), and because we use trees to handle large sets of + * collisions in bins, we just XOR some shifted bits in the + * cheapest possible way to reduce systematic lossage, as well as + * to incorporate impact of the highest bits that would otherwise + * never be used in index calculations because of table bounds. + */ + static final int hash(Object key) { + int h; + return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); + } + + /** + * Returns x's Class if it is of the form "class C implements + * Comparable", else null. */ - transient Object[] table = EMPTY_TABLE; + static Class comparableClassFor(Object x) { + if (x instanceof Comparable) { + Class c; Type[] ts, as; Type t; ParameterizedType p; + if ((c = x.getClass()) == String.class) // bypass checks + return c; + if ((ts = c.getGenericInterfaces()) != null) { + for (int i = 0; i < ts.length; ++i) { + if (((t = ts[i]) instanceof ParameterizedType) && + ((p = (ParameterizedType)t).getRawType() == + Comparable.class) && + (as = p.getActualTypeArguments()) != null && + as.length == 1 && as[0] == c) // type arg is c + return c; + } + } + } + return null; + } + + /** + * Returns k.compareTo(x) if x matches kc (k's screened comparable + * class), else 0. + */ + @SuppressWarnings({"rawtypes","unchecked"}) // for cast to Comparable + static int compareComparables(Class kc, Object k, Object x) { + return (x == null || x.getClass() != kc ? 0 : + ((Comparable)k).compareTo(x)); + } + + /** + * Returns a power of two size for the given target capacity. + */ + static final int tableSizeFor(int cap) { + int n = cap - 1; + n |= n >>> 1; + n |= n >>> 2; + n |= n >>> 4; + n |= n >>> 8; + n |= n >>> 16; + return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; + } + + /* ---------------- Fields -------------- */ + + /** + * The table, initialized on first use, and resized as + * necessary. When allocated, length is always a power of two. + * (We also tolerate length zero in some operations to allow + * bootstrapping mechanics that are currently not needed.) + */ + transient Node[] table; + + /** + * Holds cached entrySet(). Note that AbstractMap fields are used + * for keySet() and values(). + */ + transient Set> entrySet; /** * The number of key-value mappings contained in this map. @@ -167,21 +406,6 @@ transient int size; /** - * The next size value at which to resize (capacity * load factor). - * @serial - */ - // If table == EMPTY_TABLE then this is the initial capacity at which the - // table will be created when inflated. - int threshold; - - /** - * The load factor for the hash table. - * - * @serial - */ - final float loadFactor; - - /** * The number of times this HashMap has been structurally modified * Structural modifications are those that change the number of mappings in * the HashMap or otherwise modify its internal structure (e.g., @@ -191,627 +415,24 @@ transient int modCount; /** - * Holds values which can't be initialized until after VM is booted. + * The next size value at which to resize (capacity * load factor). + * + * @serial */ - private static class Holder { - static final sun.misc.Unsafe UNSAFE; - - /** - * Offset of "final" hashSeed field we must set in - * readObject() method. - */ - static final long HASHSEED_OFFSET; - - static final boolean USE_HASHSEED; - - static { - String hashSeedProp = java.security.AccessController.doPrivileged( - new sun.security.action.GetPropertyAction( - "jdk.map.useRandomSeed")); - boolean localBool = (null != hashSeedProp) - ? Boolean.parseBoolean(hashSeedProp) : false; - USE_HASHSEED = localBool; - - if (USE_HASHSEED) { - try { - UNSAFE = sun.misc.Unsafe.getUnsafe(); - HASHSEED_OFFSET = UNSAFE.objectFieldOffset( - HashMap.class.getDeclaredField("hashSeed")); - } catch (NoSuchFieldException | SecurityException e) { - throw new InternalError("Failed to record hashSeed offset", e); - } - } else { - UNSAFE = null; - HASHSEED_OFFSET = 0; - } - } - } - - /* - * A randomizing value associated with this instance that is applied to - * hash code of keys to make hash collisions harder to find. - * - * Non-final so it can be set lazily, but be sure not to set more than once. - */ - transient final int hashSeed; - - /* - * TreeBin/TreeNode code from CHM doesn't handle the null key. Store the - * null key entry here. - */ - transient Entry nullKeyEntry = null; - - /* - * In order to improve performance under high hash-collision conditions, - * HashMap will switch to storing a bin's entries in a balanced tree - * (TreeBin) instead of a linked-list once the number of entries in the bin - * passes a certain threshold (TreeBin.TREE_THRESHOLD), if at least one of - * the keys in the bin implements Comparable. This technique is borrowed - * from ConcurrentHashMap. - */ - - /* - * Code based on CHMv8 - * - * Node type for TreeBin - */ - final static class TreeNode { - TreeNode parent; // red-black tree links - TreeNode left; - TreeNode right; - TreeNode prev; // needed to unlink next upon deletion - boolean red; - final HashMap.Entry entry; - - TreeNode(HashMap.Entry entry, Object next, TreeNode parent) { - this.entry = entry; - this.entry.next = next; - this.parent = parent; - } - } + // (The javadoc description is true upon serialization. + // Additionally, if the table array has not been allocated, this + // field holds the initial array capacity, or zero signifying + // DEFAULT_INITIAL_CAPACITY.) + int threshold; /** - * Returns a Class for the given object of the form "class C - * implements Comparable", if one exists, else null. See the TreeBin - * docs, below, for explanation. - */ - static Class comparableClassFor(Object x) { - Class c, s, cmpc; Type[] ts, as; Type t; ParameterizedType p; - if ((c = x.getClass()) == String.class) // bypass checks - return c; - if ((cmpc = Comparable.class).isAssignableFrom(c)) { - while (cmpc.isAssignableFrom(s = c.getSuperclass())) - c = s; // find topmost comparable class - if ((ts = c.getGenericInterfaces()) != null) { - for (int i = 0; i < ts.length; ++i) { - if (((t = ts[i]) instanceof ParameterizedType) && - ((p = (ParameterizedType)t).getRawType() == cmpc) && - (as = p.getActualTypeArguments()) != null && - as.length == 1 && as[0] == c) // type arg is c - return c; - } - } - } - return null; - } - - /* - * Code based on CHMv8 + * The load factor for the hash table. * - * A specialized form of red-black tree for use in bins - * whose size exceeds a threshold. - * - * TreeBins use a special form of comparison for search and - * related operations (which is the main reason we cannot use - * existing collections such as TreeMaps). TreeBins contain - * Comparable elements, but may contain others, as well as - * elements that are Comparable but not necessarily Comparable - * for the same T, so we cannot invoke compareTo among them. To - * handle this, the tree is ordered primarily by hash value, then - * by Comparable.compareTo order if applicable. On lookup at a - * node, if elements are not comparable or compare as 0 then both - * left and right children may need to be searched in the case of - * tied hash values. (This corresponds to the full list search - * that would be necessary if all elements were non-Comparable and - * had tied hashes.) The red-black balancing code is updated from - * pre-jdk-collections - * (http://gee.cs.oswego.edu/dl/classes/collections/RBCell.java) - * based in turn on Cormen, Leiserson, and Rivest "Introduction to - * Algorithms" (CLR). + * @serial */ - final class TreeBin { - /* - * The bin count threshold for using a tree rather than list for a bin. The - * value reflects the approximate break-even point for using tree-based - * operations. - */ - static final int TREE_THRESHOLD = 16; - - TreeNode root; // root of tree - TreeNode first; // head of next-pointer list - - /* - * Split a TreeBin into lo and hi parts and install in given table. - * - * Existing Entrys are re-used, which maintains the before/after links for - * LinkedHashMap.Entry. - * - * No check for Comparable, though this is the same as CHM. - */ - final void splitTreeBin(Object[] newTable, int i, TreeBin loTree, TreeBin hiTree) { - TreeBin oldTree = this; - int bit = newTable.length >>> 1; - int loCount = 0, hiCount = 0; - TreeNode e = oldTree.first; - TreeNode next; - - // This method is called when the table has just increased capacity, - // so indexFor() is now taking one additional bit of hash into - // account ("bit"). Entries in this TreeBin now belong in one of - // two bins, "i" or "i+bit", depending on if the new top bit of the - // hash is set. The trees for the two bins are loTree and hiTree. - // If either tree ends up containing fewer than TREE_THRESHOLD - // entries, it is converted back to a linked list. - while (e != null) { - // Save entry.next - it will get overwritten in putTreeNode() - next = (TreeNode)e.entry.next; - - int h = e.entry.hash; - K k = (K) e.entry.key; - V v = e.entry.value; - if ((h & bit) == 0) { - ++loCount; - // Re-using e.entry - loTree.putTreeNode(h, k, v, e.entry); - } else { - ++hiCount; - hiTree.putTreeNode(h, k, v, e.entry); - } - // Iterate using the saved 'next' - e = next; - } - if (loCount < TREE_THRESHOLD) { // too small, convert back to list - HashMap.Entry loEntry = null; - TreeNode p = loTree.first; - while (p != null) { - @SuppressWarnings("unchecked") - TreeNode savedNext = (TreeNode) p.entry.next; - p.entry.next = loEntry; - loEntry = p.entry; - p = savedNext; - } - // assert newTable[i] == null; - newTable[i] = loEntry; - } else { - // assert newTable[i] == null; - newTable[i] = loTree; - } - if (hiCount < TREE_THRESHOLD) { // too small, convert back to list - HashMap.Entry hiEntry = null; - TreeNode p = hiTree.first; - while (p != null) { - @SuppressWarnings("unchecked") - TreeNode savedNext = (TreeNode) p.entry.next; - p.entry.next = hiEntry; - hiEntry = p.entry; - p = savedNext; - } - // assert newTable[i + bit] == null; - newTable[i + bit] = hiEntry; - } else { - // assert newTable[i + bit] == null; - newTable[i + bit] = hiTree; - } - } - - /* - * Popuplate the TreeBin with entries from the linked list e - * - * Assumes 'this' is a new/empty TreeBin - * - * Note: no check for Comparable - * Note: I believe this changes iteration order - */ - @SuppressWarnings("unchecked") - void populate(HashMap.Entry e) { - // assert root == null; - // assert first == null; - HashMap.Entry next; - while (e != null) { - // Save entry.next - it will get overwritten in putTreeNode() - next = (HashMap.Entry)e.next; - // Re-using Entry e will maintain before/after in LinkedHM - putTreeNode(e.hash, (K)e.key, (V)e.value, e); - // Iterate using the saved 'next' - e = next; - } - } - - /** - * Copied from CHMv8 - * From CLR - */ - private void rotateLeft(TreeNode p) { - if (p != null) { - TreeNode r = p.right, pp, rl; - if ((rl = p.right = r.left) != null) { - rl.parent = p; - } - if ((pp = r.parent = p.parent) == null) { - root = r; - } else if (pp.left == p) { - pp.left = r; - } else { - pp.right = r; - } - r.left = p; - p.parent = r; - } - } - - /** - * Copied from CHMv8 - * From CLR - */ - private void rotateRight(TreeNode p) { - if (p != null) { - TreeNode l = p.left, pp, lr; - if ((lr = p.left = l.right) != null) { - lr.parent = p; - } - if ((pp = l.parent = p.parent) == null) { - root = l; - } else if (pp.right == p) { - pp.right = l; - } else { - pp.left = l; - } - l.right = p; - p.parent = l; - } - } - - /** - * Returns the TreeNode (or null if not found) for the given - * key. A front-end for recursive version. - */ - final TreeNode getTreeNode(int h, K k) { - return getTreeNode(h, k, root, comparableClassFor(k)); - } - - /** - * Returns the TreeNode (or null if not found) for the given key - * starting at given root. - */ - @SuppressWarnings("unchecked") - final TreeNode getTreeNode (int h, K k, TreeNode p, Class cc) { - // assert k != null; - while (p != null) { - int dir, ph; Object pk; - if ((ph = p.entry.hash) != h) - dir = (h < ph) ? -1 : 1; - else if ((pk = p.entry.key) == k || k.equals(pk)) - return p; - else if (cc == null || comparableClassFor(pk) != cc || - (dir = ((Comparable