# HG changeset patch # User redestad # Date 1554890750 -7200 # Node ID 7fd299216e970e75b8245e8f9f3124d8f73fd74a # Parent ddc19ea5059c229f98bfad27ab7bc1c61c427e93 8221836: Avoid recalculating String.hash when zero Reviewed-by: jrose, adinn Contributed-by: peter.levart@gmail.com, claes.redestad@oracle.com diff -r ddc19ea5059c -r 7fd299216e97 src/hotspot/share/classfile/javaClasses.cpp --- a/src/hotspot/share/classfile/javaClasses.cpp Wed Apr 10 08:51:38 2019 +0200 +++ b/src/hotspot/share/classfile/javaClasses.cpp Wed Apr 10 12:05:50 2019 +0200 @@ -160,6 +160,7 @@ int java_lang_String::value_offset = 0; int java_lang_String::hash_offset = 0; +int java_lang_String::hashIsZero_offset = 0; int java_lang_String::coder_offset = 0; bool java_lang_String::initialized = false; @@ -179,7 +180,8 @@ #define STRING_FIELDS_DO(macro) \ macro(value_offset, k, vmSymbols::value_name(), byte_array_signature, false); \ macro(hash_offset, k, "hash", int_signature, false); \ - macro(coder_offset, k, "coder", byte_signature, false) + macro(hashIsZero_offset, k, "hashIsZero", bool_signature, false); \ + macro(coder_offset, k, "coder", byte_signature, false); void java_lang_String::compute_offsets() { if (initialized) { @@ -507,18 +509,38 @@ } unsigned int java_lang_String::hash_code(oop java_string) { - typeArrayOop value = java_lang_String::value(java_string); - int length = java_lang_String::length(java_string, value); - // Zero length string will hash to zero with String.hashCode() function. - if (length == 0) return 0; - - bool is_latin1 = java_lang_String::is_latin1(java_string); - - if (is_latin1) { - return java_lang_String::hash_code(value->byte_at_addr(0), length); + // The hash and hashIsZero fields are subject to a benign data race, + // making it crucial to ensure that any observable result of the + // calculation in this method stays correct under any possible read of + // these fields. Necessary restrictions to allow this to be correct + // without explicit memory fences or similar concurrency primitives is + // that we can ever only write to one of these two fields for a given + // String instance, and that the computation is idempotent and derived + // from immutable state + assert(initialized && (hash_offset > 0) && (hashIsZero_offset > 0), "Must be initialized"); + if (java_lang_String::hash_is_set(java_string)) { + return java_string->int_field(hash_offset); + } + + typeArrayOop value = java_lang_String::value(java_string); + int length = java_lang_String::length(java_string, value); + bool is_latin1 = java_lang_String::is_latin1(java_string); + + unsigned int hash = 0; + if (length > 0) { + if (is_latin1) { + hash = java_lang_String::hash_code(value->byte_at_addr(0), length); + } else { + hash = java_lang_String::hash_code(value->char_at_addr(0), length); + } + } + + if (hash != 0) { + java_string->int_field_put(hash_offset, hash); } else { - return java_lang_String::hash_code(value->char_at_addr(0), length); + java_string->bool_field_put(hashIsZero_offset, true); } + return hash; } char* java_lang_String::as_quoted_ascii(oop java_string) { diff -r ddc19ea5059c -r 7fd299216e97 src/hotspot/share/classfile/javaClasses.hpp --- a/src/hotspot/share/classfile/javaClasses.hpp Wed Apr 10 08:51:38 2019 +0200 +++ b/src/hotspot/share/classfile/javaClasses.hpp Wed Apr 10 12:05:50 2019 +0200 @@ -94,6 +94,7 @@ private: static int value_offset; static int hash_offset; + static int hashIsZero_offset; static int coder_offset; static bool initialized; @@ -132,6 +133,10 @@ assert(initialized && (hash_offset > 0), "Must be initialized"); return hash_offset; } + static int hashIsZero_offset_in_bytes() { + assert(initialized && (hashIsZero_offset > 0), "Must be initialized"); + return hashIsZero_offset; + } static int coder_offset_in_bytes() { assert(initialized && (coder_offset > 0), "Must be initialized"); return coder_offset; @@ -139,12 +144,11 @@ static inline void set_value_raw(oop string, typeArrayOop buffer); static inline void set_value(oop string, typeArrayOop buffer); - static inline void set_hash(oop string, unsigned int hash); // Accessors static inline typeArrayOop value(oop java_string); static inline typeArrayOop value_no_keepalive(oop java_string); - static inline unsigned int hash(oop java_string); + static inline bool hash_is_set(oop string); static inline bool is_latin1(oop java_string); static inline int length(oop java_string); static inline int length(oop java_string, typeArrayOop string_value); diff -r ddc19ea5059c -r 7fd299216e97 src/hotspot/share/classfile/javaClasses.inline.hpp --- a/src/hotspot/share/classfile/javaClasses.inline.hpp Wed Apr 10 08:51:38 2019 +0200 +++ b/src/hotspot/share/classfile/javaClasses.inline.hpp Wed Apr 10 12:05:50 2019 +0200 @@ -45,9 +45,9 @@ string->obj_field_put(value_offset, (oop)buffer); } -void java_lang_String::set_hash(oop string, unsigned int hash) { - assert(initialized && (hash_offset > 0), "Must be initialized"); - string->int_field_put(hash_offset, hash); +bool java_lang_String::hash_is_set(oop java_string) { + assert(initialized && (hash_offset > 0) && (hashIsZero_offset > 0), "Must be initialized"); + return java_string->int_field(hash_offset) != 0 || java_string->bool_field(hashIsZero_offset) != 0; } // Accessors @@ -71,12 +71,6 @@ return (typeArrayOop) java_string->obj_field_access(value_offset); } -unsigned int java_lang_String::hash(oop java_string) { - assert(initialized && (hash_offset > 0), "Must be initialized"); - assert(is_instance(java_string), "must be java_string"); - return java_string->int_field(hash_offset); -} - bool java_lang_String::is_latin1(oop java_string) { assert(initialized && (coder_offset > 0), "Must be initialized"); assert(is_instance(java_string), "must be java_string"); diff -r ddc19ea5059c -r 7fd299216e97 src/hotspot/share/classfile/stringTable.cpp --- a/src/hotspot/share/classfile/stringTable.cpp Wed Apr 10 08:51:38 2019 +0200 +++ b/src/hotspot/share/classfile/stringTable.cpp Wed Apr 10 12:05:50 2019 +0200 @@ -761,8 +761,6 @@ return true; } unsigned int hash = java_lang_String::hash_code(s); - - java_lang_String::set_hash(s, hash); oop new_s = StringTable::create_archived_string(s, Thread::current()); if (new_s == NULL) { return true; diff -r ddc19ea5059c -r 7fd299216e97 src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp --- a/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp Wed Apr 10 08:51:38 2019 +0200 +++ b/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp Wed Apr 10 12:05:50 2019 +0200 @@ -351,19 +351,14 @@ unsigned int hash = 0; if (use_java_hash()) { - // Get hash code from cache - hash = java_lang_String::hash(java_string); - } - - if (hash == 0) { + if (!java_lang_String::hash_is_set(java_string)) { + stat->inc_hashed(); + } + hash = java_lang_String::hash_code(java_string); + } else { // Compute hash hash = hash_code(value, latin1); stat->inc_hashed(); - - if (use_java_hash() && hash != 0) { - // Store hash code in cache - java_lang_String::set_hash(java_string, hash); - } } typeArrayOop existing_value = lookup_or_add(value, latin1, hash); diff -r ddc19ea5059c -r 7fd299216e97 src/java.base/share/classes/java/lang/String.java --- a/src/java.base/share/classes/java/lang/String.java Wed Apr 10 08:51:38 2019 +0200 +++ b/src/java.base/share/classes/java/lang/String.java Wed Apr 10 12:05:50 2019 +0200 @@ -164,6 +164,12 @@ /** Cache the hash code for the string */ private int hash; // Default to 0 + /** + * Cache if the hash has been calculated as actually being zero, enabling + * us to avoid recalculating this. + */ + private boolean hashIsZero; // Default to false; + /** use serialVersionUID from JDK 1.0.2 for interoperability */ private static final long serialVersionUID = -6849794470754667710L; @@ -1508,14 +1514,21 @@ * @return a hash code value for this object. */ public int hashCode() { + // The hash or hashIsZero fields are subject to a benign data race, + // making it crucial to ensure that any observable result of the + // calculation in this method stays correct under any possible read of + // these fields. Necessary restrictions to allow this to be correct + // without explicit memory fences or similar concurrency primitives is + // that we can ever only write to one of these two fields for a given + // String instance, and that the computation is idempotent and derived + // from immutable state int h = hash; - if (h == 0 && value.length > 0) { + if (h == 0 && !hashIsZero) { h = isLatin1() ? StringLatin1.hashCode(value) : StringUTF16.hashCode(value); - // Avoid issuing a store if the calculated value is also zero: - // in addition to a minor performance benefit, this allows storing - // Strings with zero hash code in read-only memory. - if (h != 0) { + if (h == 0) { + hashIsZero = true; + } else { hash = h; } }