8221836: Avoid recalculating String.hash when zero
Reviewed-by: jrose, adinn
Contributed-by: peter.levart@gmail.com, claes.redestad@oracle.com
--- a/src/hotspot/share/classfile/javaClasses.cpp Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.cpp Wed Apr 10 12:05:50 2019 +0200
@@ -160,6 +160,7 @@
int java_lang_String::value_offset = 0;
int java_lang_String::hash_offset = 0;
+int java_lang_String::hashIsZero_offset = 0;
int java_lang_String::coder_offset = 0;
bool java_lang_String::initialized = false;
@@ -179,7 +180,8 @@
#define STRING_FIELDS_DO(macro) \
macro(value_offset, k, vmSymbols::value_name(), byte_array_signature, false); \
macro(hash_offset, k, "hash", int_signature, false); \
- macro(coder_offset, k, "coder", byte_signature, false)
+ macro(hashIsZero_offset, k, "hashIsZero", bool_signature, false); \
+ macro(coder_offset, k, "coder", byte_signature, false);
void java_lang_String::compute_offsets() {
if (initialized) {
@@ -507,18 +509,38 @@
}
unsigned int java_lang_String::hash_code(oop java_string) {
- typeArrayOop value = java_lang_String::value(java_string);
- int length = java_lang_String::length(java_string, value);
- // Zero length string will hash to zero with String.hashCode() function.
- if (length == 0) return 0;
-
- bool is_latin1 = java_lang_String::is_latin1(java_string);
-
- if (is_latin1) {
- return java_lang_String::hash_code(value->byte_at_addr(0), length);
+ // The hash and hashIsZero fields are subject to a benign data race,
+ // making it crucial to ensure that any observable result of the
+ // calculation in this method stays correct under any possible read of
+ // these fields. Necessary restrictions to allow this to be correct
+ // without explicit memory fences or similar concurrency primitives is
+ // that we can ever only write to one of these two fields for a given
+ // String instance, and that the computation is idempotent and derived
+ // from immutable state
+ assert(initialized && (hash_offset > 0) && (hashIsZero_offset > 0), "Must be initialized");
+ if (java_lang_String::hash_is_set(java_string)) {
+ return java_string->int_field(hash_offset);
+ }
+
+ typeArrayOop value = java_lang_String::value(java_string);
+ int length = java_lang_String::length(java_string, value);
+ bool is_latin1 = java_lang_String::is_latin1(java_string);
+
+ unsigned int hash = 0;
+ if (length > 0) {
+ if (is_latin1) {
+ hash = java_lang_String::hash_code(value->byte_at_addr(0), length);
+ } else {
+ hash = java_lang_String::hash_code(value->char_at_addr(0), length);
+ }
+ }
+
+ if (hash != 0) {
+ java_string->int_field_put(hash_offset, hash);
} else {
- return java_lang_String::hash_code(value->char_at_addr(0), length);
+ java_string->bool_field_put(hashIsZero_offset, true);
}
+ return hash;
}
char* java_lang_String::as_quoted_ascii(oop java_string) {
--- a/src/hotspot/share/classfile/javaClasses.hpp Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.hpp Wed Apr 10 12:05:50 2019 +0200
@@ -94,6 +94,7 @@
private:
static int value_offset;
static int hash_offset;
+ static int hashIsZero_offset;
static int coder_offset;
static bool initialized;
@@ -132,6 +133,10 @@
assert(initialized && (hash_offset > 0), "Must be initialized");
return hash_offset;
}
+ static int hashIsZero_offset_in_bytes() {
+ assert(initialized && (hashIsZero_offset > 0), "Must be initialized");
+ return hashIsZero_offset;
+ }
static int coder_offset_in_bytes() {
assert(initialized && (coder_offset > 0), "Must be initialized");
return coder_offset;
@@ -139,12 +144,11 @@
static inline void set_value_raw(oop string, typeArrayOop buffer);
static inline void set_value(oop string, typeArrayOop buffer);
- static inline void set_hash(oop string, unsigned int hash);
// Accessors
static inline typeArrayOop value(oop java_string);
static inline typeArrayOop value_no_keepalive(oop java_string);
- static inline unsigned int hash(oop java_string);
+ static inline bool hash_is_set(oop string);
static inline bool is_latin1(oop java_string);
static inline int length(oop java_string);
static inline int length(oop java_string, typeArrayOop string_value);
--- a/src/hotspot/share/classfile/javaClasses.inline.hpp Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.inline.hpp Wed Apr 10 12:05:50 2019 +0200
@@ -45,9 +45,9 @@
string->obj_field_put(value_offset, (oop)buffer);
}
-void java_lang_String::set_hash(oop string, unsigned int hash) {
- assert(initialized && (hash_offset > 0), "Must be initialized");
- string->int_field_put(hash_offset, hash);
+bool java_lang_String::hash_is_set(oop java_string) {
+ assert(initialized && (hash_offset > 0) && (hashIsZero_offset > 0), "Must be initialized");
+ return java_string->int_field(hash_offset) != 0 || java_string->bool_field(hashIsZero_offset) != 0;
}
// Accessors
@@ -71,12 +71,6 @@
return (typeArrayOop) java_string->obj_field_access<AS_NO_KEEPALIVE>(value_offset);
}
-unsigned int java_lang_String::hash(oop java_string) {
- assert(initialized && (hash_offset > 0), "Must be initialized");
- assert(is_instance(java_string), "must be java_string");
- return java_string->int_field(hash_offset);
-}
-
bool java_lang_String::is_latin1(oop java_string) {
assert(initialized && (coder_offset > 0), "Must be initialized");
assert(is_instance(java_string), "must be java_string");
--- a/src/hotspot/share/classfile/stringTable.cpp Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/stringTable.cpp Wed Apr 10 12:05:50 2019 +0200
@@ -761,8 +761,6 @@
return true;
}
unsigned int hash = java_lang_String::hash_code(s);
-
- java_lang_String::set_hash(s, hash);
oop new_s = StringTable::create_archived_string(s, Thread::current());
if (new_s == NULL) {
return true;
--- a/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp Wed Apr 10 12:05:50 2019 +0200
@@ -351,19 +351,14 @@
unsigned int hash = 0;
if (use_java_hash()) {
- // Get hash code from cache
- hash = java_lang_String::hash(java_string);
- }
-
- if (hash == 0) {
+ if (!java_lang_String::hash_is_set(java_string)) {
+ stat->inc_hashed();
+ }
+ hash = java_lang_String::hash_code(java_string);
+ } else {
// Compute hash
hash = hash_code(value, latin1);
stat->inc_hashed();
-
- if (use_java_hash() && hash != 0) {
- // Store hash code in cache
- java_lang_String::set_hash(java_string, hash);
- }
}
typeArrayOop existing_value = lookup_or_add(value, latin1, hash);
--- a/src/java.base/share/classes/java/lang/String.java Wed Apr 10 08:51:38 2019 +0200
+++ b/src/java.base/share/classes/java/lang/String.java Wed Apr 10 12:05:50 2019 +0200
@@ -164,6 +164,12 @@
/** Cache the hash code for the string */
private int hash; // Default to 0
+ /**
+ * Cache if the hash has been calculated as actually being zero, enabling
+ * us to avoid recalculating this.
+ */
+ private boolean hashIsZero; // Default to false;
+
/** use serialVersionUID from JDK 1.0.2 for interoperability */
private static final long serialVersionUID = -6849794470754667710L;
@@ -1508,14 +1514,21 @@
* @return a hash code value for this object.
*/
public int hashCode() {
+ // The hash or hashIsZero fields are subject to a benign data race,
+ // making it crucial to ensure that any observable result of the
+ // calculation in this method stays correct under any possible read of
+ // these fields. Necessary restrictions to allow this to be correct
+ // without explicit memory fences or similar concurrency primitives is
+ // that we can ever only write to one of these two fields for a given
+ // String instance, and that the computation is idempotent and derived
+ // from immutable state
int h = hash;
- if (h == 0 && value.length > 0) {
+ if (h == 0 && !hashIsZero) {
h = isLatin1() ? StringLatin1.hashCode(value)
: StringUTF16.hashCode(value);
- // Avoid issuing a store if the calculated value is also zero:
- // in addition to a minor performance benefit, this allows storing
- // Strings with zero hash code in read-only memory.
- if (h != 0) {
+ if (h == 0) {
+ hashIsZero = true;
+ } else {
hash = h;
}
}