# HG changeset patch
# User psandoz
# Date 1378280065 -7200
# Node ID dda89341ee2d3c52c38b12e92cd4198c76238e43
# Parent 39ccb0972a2fb712612b7fe5adc7775603dfaf99
8023463: Improvements to HashMap/LinkedHashMap use of bins/buckets and trees (red/black)
8012913: LinkedHashMap key/value/entry spliterators should report ORDERED
Reviewed-by: mduigou, forax, bchristi, alanb
Contributed-by: Doug Lea
, Paul Sandoz
diff -r 39ccb0972a2f -r dda89341ee2d jdk/src/share/classes/java/util/HashMap.java
--- a/jdk/src/share/classes/java/util/HashMap.java Mon Aug 12 12:22:10 2013 +0200
+++ b/jdk/src/share/classes/java/util/HashMap.java Wed Sep 04 09:34:25 2013 +0200
@@ -25,13 +25,14 @@
package java.util;
-import java.io.*;
+import java.io.IOException;
+import java.io.InvalidObjectException;
+import java.io.Serializable;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
-import java.util.concurrent.ThreadLocalRandom;
import java.util.function.BiConsumer;
+import java.util.function.BiFunction;
import java.util.function.Consumer;
-import java.util.function.BiFunction;
import java.util.function.Function;
/**
@@ -63,20 +64,25 @@
* structures are rebuilt) so that the hash table has approximately twice the
* number of buckets.
*
- *
As a general rule, the default load factor (.75) offers a good tradeoff
- * between time and space costs. Higher values decrease the space overhead
- * but increase the lookup cost (reflected in most of the operations of the
- * HashMap class, including get and put). The
- * expected number of entries in the map and its load factor should be taken
- * into account when setting its initial capacity, so as to minimize the
- * number of rehash operations. If the initial capacity is greater
- * than the maximum number of entries divided by the load factor, no
- * rehash operations will ever occur.
+ *
As a general rule, the default load factor (.75) offers a good
+ * tradeoff between time and space costs. Higher values decrease the
+ * space overhead but increase the lookup cost (reflected in most of
+ * the operations of the HashMap class, including
+ * get and put). The expected number of entries in
+ * the map and its load factor should be taken into account when
+ * setting its initial capacity, so as to minimize the number of
+ * rehash operations. If the initial capacity is greater than the
+ * maximum number of entries divided by the load factor, no rehash
+ * operations will ever occur.
*
- *
If many mappings are to be stored in a HashMap instance,
- * creating it with a sufficiently large capacity will allow the mappings to
- * be stored more efficiently than letting it perform automatic rehashing as
- * needed to grow the table.
+ *
If many mappings are to be stored in a HashMap
+ * instance, creating it with a sufficiently large capacity will allow
+ * the mappings to be stored more efficiently than letting it perform
+ * automatic rehashing as needed to grow the table. Note that using
+ * many keys with the same {@code hashCode()} is a sure way to slow
+ * down performance of any hash table. To ameliorate impact, when keys
+ * are {@link Comparable}, this class may use comparison order among
+ * keys to help break ties.
*
*
Note that this implementation is not synchronized.
* If multiple threads access a hash map concurrently, and at least one of
@@ -128,11 +134,100 @@
* @see Hashtable
* @since 1.2
*/
+public class HashMap extends AbstractMap
+ implements Map, Cloneable, Serializable {
-public class HashMap
- extends AbstractMap
- implements Map, Cloneable, Serializable
-{
+ private static final long serialVersionUID = 362498820763181265L;
+
+ /*
+ * Implementation notes.
+ *
+ * This map usually acts as a binned (bucketed) hash table, but
+ * when bins get too large, they are transformed into bins of
+ * TreeNodes, each structured similarly to those in
+ * java.util.TreeMap. Most methods try to use normal bins, but
+ * relay to TreeNode methods when applicable (simply by checking
+ * instanceof a node). Bins of TreeNodes may be traversed and
+ * used like any others, but additionally support faster lookup
+ * when overpopulated. However, since the vast majority of bins in
+ * normal use are not overpopulated, checking for existence of
+ * tree bins may be delayed in the course of table methods.
+ *
+ * Tree bins (i.e., bins whose elements are all TreeNodes) are
+ * ordered primarily by hashCode, but in the case of ties, if two
+ * elements are of the same "class C implements Comparable",
+ * type then their compareTo method is used for ordering. (We
+ * conservatively check generic types via reflection to validate
+ * this -- see method comparableClassFor). The added complexity
+ * of tree bins is worthwhile in providing worst-case O(log n)
+ * operations when keys either have distinct hashes or are
+ * orderable, Thus, performance degrades gracefully under
+ * accidental or malicious usages in which hashCode() methods
+ * return values that are poorly distributed, as well as those in
+ * which many keys share a hashCode, so long as they are also
+ * Comparable. (If neither of these apply, we may waste about a
+ * factor of two in time and space compared to taking no
+ * precautions. But the only known cases stem from poor user
+ * programming practices that are already so slow that this makes
+ * little difference.)
+ *
+ * Because TreeNodes are about twice the size of regular nodes, we
+ * use them only when bins contain enough nodes to warrant use
+ * (see TREEIFY_THRESHOLD). And when they become too small (due to
+ * removal or resizing) they are converted back to plain bins. In
+ * usages with well-distributed user hashCodes, tree bins are
+ * rarely used. Ideally, under random hashCodes, the frequency of
+ * nodes in bins follows a Poisson distribution
+ * (http://en.wikipedia.org/wiki/Poisson_distribution) with a
+ * parameter of about 0.5 on average for the default resizing
+ * threshold of 0.75, although with a large variance because of
+ * resizing granularity. Ignoring variance, the expected
+ * occurrences of list size k are (exp(-0.5) * pow(0.5, k) /
+ * factorial(k)). The first values are:
+ *
+ * 0: 0.60653066
+ * 1: 0.30326533
+ * 2: 0.07581633
+ * 3: 0.01263606
+ * 4: 0.00157952
+ * 5: 0.00015795
+ * 6: 0.00001316
+ * 7: 0.00000094
+ * 8: 0.00000006
+ * more: less than 1 in ten million
+ *
+ * The root of a tree bin is normally its first node. However,
+ * sometimes (currently only upon Iterator.remove), the root might
+ * be elsewhere, but can be recovered following parent links
+ * (method TreeNode.root()).
+ *
+ * All applicable internal methods accept a hash code as an
+ * argument (as normally supplied from a public method), allowing
+ * them to call each other without recomputing user hashCodes.
+ * Most internal methods also accept a "tab" argument, that is
+ * normally the current table, but may be a new or old one when
+ * resizing or converting.
+ *
+ * When bin lists are treeified, split, or untreeified, we keep
+ * them in the same relative access/traversal order (i.e., field
+ * Node.next) to better preserve locality, and to slightly
+ * simplify handling of splits and traversals that invoke
+ * iterator.remove. When using comparators on insertion, to keep a
+ * total ordering (or as close as is required here) across
+ * rebalancings, we compare classes and identityHashCodes as
+ * tie-breakers.
+ *
+ * The use and transitions among plain vs tree modes is
+ * complicated by the existence of subclass LinkedHashMap. See
+ * below for hook methods defined to be invoked upon insertion,
+ * removal and access that allow LinkedHashMap internals to
+ * otherwise remain independent of these mechanics. (This also
+ * requires that a map instance be passed to some utility methods
+ * that may create new nodes.)
+ *
+ * The concurrent-programming-like SSA-based coding style helps
+ * avoid aliasing errors amid all of the twisty pointer operations.
+ */
/**
* The default initial capacity - MUST be a power of two.
@@ -152,14 +247,158 @@
static final float DEFAULT_LOAD_FACTOR = 0.75f;
/**
- * An empty table instance to share when the table is not inflated.
+ * The bin count threshold for using a tree rather than list for a
+ * bin. Bins are converted to trees when adding an element to a
+ * bin with at least this many nodes. The value must be greater
+ * than 2 and should be at least 8 to mesh with assumptions in
+ * tree removal about conversion back to plain bins upon
+ * shrinkage.
+ */
+ static final int TREEIFY_THRESHOLD = 8;
+
+ /**
+ * The bin count threshold for untreeifying a (split) bin during a
+ * resize operation. Should be less than TREEIFY_THRESHOLD, and at
+ * most 6 to mesh with shrinkage detection under removal.
+ */
+ static final int UNTREEIFY_THRESHOLD = 6;
+
+ /**
+ * The smallest table capacity for which bins may be treeified.
+ * (Otherwise the table is resized if too many nodes in a bin.)
+ * Should be at least 4 * TREEIFY_THRESHOLD to avoid conflicts
+ * between resizing and treeification thresholds.
+ */
+ static final int MIN_TREEIFY_CAPACITY = 64;
+
+ /**
+ * Basic hash bin node, used for most entries. (See below for
+ * TreeNode subclass, and in LinkedHashMap for its Entry subclass.)
*/
- static final Object[] EMPTY_TABLE = {};
+ static class Node implements Map.Entry {
+ final int hash;
+ final K key;
+ V value;
+ Node next;
+
+ Node(int hash, K key, V value, Node next) {
+ this.hash = hash;
+ this.key = key;
+ this.value = value;
+ this.next = next;
+ }
+
+ public final K getKey() { return key; }
+ public final V getValue() { return value; }
+ public final String toString() { return key + "=" + value; }
+
+ public final int hashCode() {
+ return Objects.hashCode(key) ^ Objects.hashCode(value);
+ }
+
+ public final V setValue(V newValue) {
+ V oldValue = value;
+ value = newValue;
+ return oldValue;
+ }
+
+ public final boolean equals(Object o) {
+ if (o == this)
+ return true;
+ if (o instanceof Map.Entry) {
+ Map.Entry,?> e = (Map.Entry,?>)o;
+ if (Objects.equals(key, e.getKey()) &&
+ Objects.equals(value, e.getValue()))
+ return true;
+ }
+ return false;
+ }
+ }
+
+ /* ---------------- Static utilities -------------- */
/**
- * The table, resized as necessary. Length MUST Always be a power of two.
+ * Computes key.hashCode() and spreads (XORs) higher bits of hash
+ * to lower. Because the table uses power-of-two masking, sets of
+ * hashes that vary only in bits above the current mask will
+ * always collide. (Among known examples are sets of Float keys
+ * holding consecutive whole numbers in small tables.) So we
+ * apply a transform that spreads the impact of higher bits
+ * downward. There is a tradeoff between speed, utility, and
+ * quality of bit-spreading. Because many common sets of hashes
+ * are already reasonably distributed (so don't benefit from
+ * spreading), and because we use trees to handle large sets of
+ * collisions in bins, we just XOR some shifted bits in the
+ * cheapest possible way to reduce systematic lossage, as well as
+ * to incorporate impact of the highest bits that would otherwise
+ * never be used in index calculations because of table bounds.
+ */
+ static final int hash(Object key) {
+ int h;
+ return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
+ }
+
+ /**
+ * Returns x's Class if it is of the form "class C implements
+ * Comparable", else null.
*/
- transient Object[] table = EMPTY_TABLE;
+ static Class> comparableClassFor(Object x) {
+ if (x instanceof Comparable) {
+ Class> c; Type[] ts, as; Type t; ParameterizedType p;
+ if ((c = x.getClass()) == String.class) // bypass checks
+ return c;
+ if ((ts = c.getGenericInterfaces()) != null) {
+ for (int i = 0; i < ts.length; ++i) {
+ if (((t = ts[i]) instanceof ParameterizedType) &&
+ ((p = (ParameterizedType)t).getRawType() ==
+ Comparable.class) &&
+ (as = p.getActualTypeArguments()) != null &&
+ as.length == 1 && as[0] == c) // type arg is c
+ return c;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Returns k.compareTo(x) if x matches kc (k's screened comparable
+ * class), else 0.
+ */
+ @SuppressWarnings({"rawtypes","unchecked"}) // for cast to Comparable
+ static int compareComparables(Class> kc, Object k, Object x) {
+ return (x == null || x.getClass() != kc ? 0 :
+ ((Comparable)k).compareTo(x));
+ }
+
+ /**
+ * Returns a power of two size for the given target capacity.
+ */
+ static final int tableSizeFor(int cap) {
+ int n = cap - 1;
+ n |= n >>> 1;
+ n |= n >>> 2;
+ n |= n >>> 4;
+ n |= n >>> 8;
+ n |= n >>> 16;
+ return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
+ }
+
+ /* ---------------- Fields -------------- */
+
+ /**
+ * The table, initialized on first use, and resized as
+ * necessary. When allocated, length is always a power of two.
+ * (We also tolerate length zero in some operations to allow
+ * bootstrapping mechanics that are currently not needed.)
+ */
+ transient Node[] table;
+
+ /**
+ * Holds cached entrySet(). Note that AbstractMap fields are used
+ * for keySet() and values().
+ */
+ transient Set> entrySet;
/**
* The number of key-value mappings contained in this map.
@@ -167,21 +406,6 @@
transient int size;
/**
- * The next size value at which to resize (capacity * load factor).
- * @serial
- */
- // If table == EMPTY_TABLE then this is the initial capacity at which the
- // table will be created when inflated.
- int threshold;
-
- /**
- * The load factor for the hash table.
- *
- * @serial
- */
- final float loadFactor;
-
- /**
* The number of times this HashMap has been structurally modified
* Structural modifications are those that change the number of mappings in
* the HashMap or otherwise modify its internal structure (e.g.,
@@ -191,627 +415,24 @@
transient int modCount;
/**
- * Holds values which can't be initialized until after VM is booted.
+ * The next size value at which to resize (capacity * load factor).
+ *
+ * @serial
*/
- private static class Holder {
- static final sun.misc.Unsafe UNSAFE;
-
- /**
- * Offset of "final" hashSeed field we must set in
- * readObject() method.
- */
- static final long HASHSEED_OFFSET;
-
- static final boolean USE_HASHSEED;
-
- static {
- String hashSeedProp = java.security.AccessController.doPrivileged(
- new sun.security.action.GetPropertyAction(
- "jdk.map.useRandomSeed"));
- boolean localBool = (null != hashSeedProp)
- ? Boolean.parseBoolean(hashSeedProp) : false;
- USE_HASHSEED = localBool;
-
- if (USE_HASHSEED) {
- try {
- UNSAFE = sun.misc.Unsafe.getUnsafe();
- HASHSEED_OFFSET = UNSAFE.objectFieldOffset(
- HashMap.class.getDeclaredField("hashSeed"));
- } catch (NoSuchFieldException | SecurityException e) {
- throw new InternalError("Failed to record hashSeed offset", e);
- }
- } else {
- UNSAFE = null;
- HASHSEED_OFFSET = 0;
- }
- }
- }
-
- /*
- * A randomizing value associated with this instance that is applied to
- * hash code of keys to make hash collisions harder to find.
- *
- * Non-final so it can be set lazily, but be sure not to set more than once.
- */
- transient final int hashSeed;
-
- /*
- * TreeBin/TreeNode code from CHM doesn't handle the null key. Store the
- * null key entry here.
- */
- transient Entry nullKeyEntry = null;
-
- /*
- * In order to improve performance under high hash-collision conditions,
- * HashMap will switch to storing a bin's entries in a balanced tree
- * (TreeBin) instead of a linked-list once the number of entries in the bin
- * passes a certain threshold (TreeBin.TREE_THRESHOLD), if at least one of
- * the keys in the bin implements Comparable. This technique is borrowed
- * from ConcurrentHashMap.
- */
-
- /*
- * Code based on CHMv8
- *
- * Node type for TreeBin
- */
- final static class TreeNode {
- TreeNode parent; // red-black tree links
- TreeNode left;
- TreeNode right;
- TreeNode prev; // needed to unlink next upon deletion
- boolean red;
- final HashMap.Entry entry;
-
- TreeNode(HashMap.Entry entry, Object next, TreeNode parent) {
- this.entry = entry;
- this.entry.next = next;
- this.parent = parent;
- }
- }
+ // (The javadoc description is true upon serialization.
+ // Additionally, if the table array has not been allocated, this
+ // field holds the initial array capacity, or zero signifying
+ // DEFAULT_INITIAL_CAPACITY.)
+ int threshold;
/**
- * Returns a Class for the given object of the form "class C
- * implements Comparable", if one exists, else null. See the TreeBin
- * docs, below, for explanation.
- */
- static Class> comparableClassFor(Object x) {
- Class> c, s, cmpc; Type[] ts, as; Type t; ParameterizedType p;
- if ((c = x.getClass()) == String.class) // bypass checks
- return c;
- if ((cmpc = Comparable.class).isAssignableFrom(c)) {
- while (cmpc.isAssignableFrom(s = c.getSuperclass()))
- c = s; // find topmost comparable class
- if ((ts = c.getGenericInterfaces()) != null) {
- for (int i = 0; i < ts.length; ++i) {
- if (((t = ts[i]) instanceof ParameterizedType) &&
- ((p = (ParameterizedType)t).getRawType() == cmpc) &&
- (as = p.getActualTypeArguments()) != null &&
- as.length == 1 && as[0] == c) // type arg is c
- return c;
- }
- }
- }
- return null;
- }
-
- /*
- * Code based on CHMv8
+ * The load factor for the hash table.
*
- * A specialized form of red-black tree for use in bins
- * whose size exceeds a threshold.
- *
- * TreeBins use a special form of comparison for search and
- * related operations (which is the main reason we cannot use
- * existing collections such as TreeMaps). TreeBins contain
- * Comparable elements, but may contain others, as well as
- * elements that are Comparable but not necessarily Comparable
- * for the same T, so we cannot invoke compareTo among them. To
- * handle this, the tree is ordered primarily by hash value, then
- * by Comparable.compareTo order if applicable. On lookup at a
- * node, if elements are not comparable or compare as 0 then both
- * left and right children may need to be searched in the case of
- * tied hash values. (This corresponds to the full list search
- * that would be necessary if all elements were non-Comparable and
- * had tied hashes.) The red-black balancing code is updated from
- * pre-jdk-collections
- * (http://gee.cs.oswego.edu/dl/classes/collections/RBCell.java)
- * based in turn on Cormen, Leiserson, and Rivest "Introduction to
- * Algorithms" (CLR).
+ * @serial
*/
- final class TreeBin {
- /*
- * The bin count threshold for using a tree rather than list for a bin. The
- * value reflects the approximate break-even point for using tree-based
- * operations.
- */
- static final int TREE_THRESHOLD = 16;
-
- TreeNode root; // root of tree
- TreeNode first; // head of next-pointer list
-
- /*
- * Split a TreeBin into lo and hi parts and install in given table.
- *
- * Existing Entrys are re-used, which maintains the before/after links for
- * LinkedHashMap.Entry.
- *
- * No check for Comparable, though this is the same as CHM.
- */
- final void splitTreeBin(Object[] newTable, int i, TreeBin loTree, TreeBin hiTree) {
- TreeBin oldTree = this;
- int bit = newTable.length >>> 1;
- int loCount = 0, hiCount = 0;
- TreeNode e = oldTree.first;
- TreeNode next;
-
- // This method is called when the table has just increased capacity,
- // so indexFor() is now taking one additional bit of hash into
- // account ("bit"). Entries in this TreeBin now belong in one of
- // two bins, "i" or "i+bit", depending on if the new top bit of the
- // hash is set. The trees for the two bins are loTree and hiTree.
- // If either tree ends up containing fewer than TREE_THRESHOLD
- // entries, it is converted back to a linked list.
- while (e != null) {
- // Save entry.next - it will get overwritten in putTreeNode()
- next = (TreeNode)e.entry.next;
-
- int h = e.entry.hash;
- K k = (K) e.entry.key;
- V v = e.entry.value;
- if ((h & bit) == 0) {
- ++loCount;
- // Re-using e.entry
- loTree.putTreeNode(h, k, v, e.entry);
- } else {
- ++hiCount;
- hiTree.putTreeNode(h, k, v, e.entry);
- }
- // Iterate using the saved 'next'
- e = next;
- }
- if (loCount < TREE_THRESHOLD) { // too small, convert back to list
- HashMap.Entry loEntry = null;
- TreeNode p = loTree.first;
- while (p != null) {
- @SuppressWarnings("unchecked")
- TreeNode savedNext = (TreeNode) p.entry.next;
- p.entry.next = loEntry;
- loEntry = p.entry;
- p = savedNext;
- }
- // assert newTable[i] == null;
- newTable[i] = loEntry;
- } else {
- // assert newTable[i] == null;
- newTable[i] = loTree;
- }
- if (hiCount < TREE_THRESHOLD) { // too small, convert back to list
- HashMap.Entry hiEntry = null;
- TreeNode p = hiTree.first;
- while (p != null) {
- @SuppressWarnings("unchecked")
- TreeNode savedNext = (TreeNode) p.entry.next;
- p.entry.next = hiEntry;
- hiEntry = p.entry;
- p = savedNext;
- }
- // assert newTable[i + bit] == null;
- newTable[i + bit] = hiEntry;
- } else {
- // assert newTable[i + bit] == null;
- newTable[i + bit] = hiTree;
- }
- }
-
- /*
- * Popuplate the TreeBin with entries from the linked list e
- *
- * Assumes 'this' is a new/empty TreeBin
- *
- * Note: no check for Comparable
- * Note: I believe this changes iteration order
- */
- @SuppressWarnings("unchecked")
- void populate(HashMap.Entry e) {
- // assert root == null;
- // assert first == null;
- HashMap.Entry next;
- while (e != null) {
- // Save entry.next - it will get overwritten in putTreeNode()
- next = (HashMap.Entry)e.next;
- // Re-using Entry e will maintain before/after in LinkedHM
- putTreeNode(e.hash, (K)e.key, (V)e.value, e);
- // Iterate using the saved 'next'
- e = next;
- }
- }
-
- /**
- * Copied from CHMv8
- * From CLR
- */
- private void rotateLeft(TreeNode p) {
- if (p != null) {
- TreeNode r = p.right, pp, rl;
- if ((rl = p.right = r.left) != null) {
- rl.parent = p;
- }
- if ((pp = r.parent = p.parent) == null) {
- root = r;
- } else if (pp.left == p) {
- pp.left = r;
- } else {
- pp.right = r;
- }
- r.left = p;
- p.parent = r;
- }
- }
-
- /**
- * Copied from CHMv8
- * From CLR
- */
- private void rotateRight(TreeNode p) {
- if (p != null) {
- TreeNode l = p.left, pp, lr;
- if ((lr = p.left = l.right) != null) {
- lr.parent = p;
- }
- if ((pp = l.parent = p.parent) == null) {
- root = l;
- } else if (pp.right == p) {
- pp.right = l;
- } else {
- pp.left = l;
- }
- l.right = p;
- p.parent = l;
- }
- }
-
- /**
- * Returns the TreeNode (or null if not found) for the given
- * key. A front-end for recursive version.
- */
- final TreeNode getTreeNode(int h, K k) {
- return getTreeNode(h, k, root, comparableClassFor(k));
- }
-
- /**
- * Returns the TreeNode (or null if not found) for the given key
- * starting at given root.
- */
- @SuppressWarnings("unchecked")
- final TreeNode getTreeNode (int h, K k, TreeNode p, Class> cc) {
- // assert k != null;
- while (p != null) {
- int dir, ph; Object pk;
- if ((ph = p.entry.hash) != h)
- dir = (h < ph) ? -1 : 1;
- else if ((pk = p.entry.key) == k || k.equals(pk))
- return p;
- else if (cc == null || comparableClassFor(pk) != cc ||
- (dir = ((Comparable