8221836: Avoid recalculating String.hash when zero
authorredestad
Wed, 10 Apr 2019 12:05:50 +0200
changeset 54486 7fd299216e97
parent 54485 ddc19ea5059c
child 54487 4fa1fd8bc21e
8221836: Avoid recalculating String.hash when zero Reviewed-by: jrose, adinn Contributed-by: peter.levart@gmail.com, claes.redestad@oracle.com
src/hotspot/share/classfile/javaClasses.cpp
src/hotspot/share/classfile/javaClasses.hpp
src/hotspot/share/classfile/javaClasses.inline.hpp
src/hotspot/share/classfile/stringTable.cpp
src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp
src/java.base/share/classes/java/lang/String.java
--- a/src/hotspot/share/classfile/javaClasses.cpp	Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.cpp	Wed Apr 10 12:05:50 2019 +0200
@@ -160,6 +160,7 @@
 
 int java_lang_String::value_offset  = 0;
 int java_lang_String::hash_offset   = 0;
+int java_lang_String::hashIsZero_offset = 0;
 int java_lang_String::coder_offset  = 0;
 
 bool java_lang_String::initialized  = false;
@@ -179,7 +180,8 @@
 #define STRING_FIELDS_DO(macro) \
   macro(value_offset, k, vmSymbols::value_name(), byte_array_signature, false); \
   macro(hash_offset,  k, "hash",                  int_signature,        false); \
-  macro(coder_offset, k, "coder",                 byte_signature,       false)
+  macro(hashIsZero_offset, k, "hashIsZero",       bool_signature,       false); \
+  macro(coder_offset, k, "coder",                 byte_signature,       false);
 
 void java_lang_String::compute_offsets() {
   if (initialized) {
@@ -507,18 +509,38 @@
 }
 
 unsigned int java_lang_String::hash_code(oop java_string) {
-  typeArrayOop value  = java_lang_String::value(java_string);
-  int          length = java_lang_String::length(java_string, value);
-  // Zero length string will hash to zero with String.hashCode() function.
-  if (length == 0) return 0;
-
-  bool      is_latin1 = java_lang_String::is_latin1(java_string);
-
-  if (is_latin1) {
-    return java_lang_String::hash_code(value->byte_at_addr(0), length);
+  // The hash and hashIsZero fields are subject to a benign data race,
+  // making it crucial to ensure that any observable result of the
+  // calculation in this method stays correct under any possible read of
+  // these fields. Necessary restrictions to allow this to be correct
+  // without explicit memory fences or similar concurrency primitives is
+  // that we can ever only write to one of these two fields for a given
+  // String instance, and that the computation is idempotent and derived
+  // from immutable state
+  assert(initialized && (hash_offset > 0) && (hashIsZero_offset > 0), "Must be initialized");
+  if (java_lang_String::hash_is_set(java_string)) {
+    return java_string->int_field(hash_offset);
+  }
+
+  typeArrayOop value = java_lang_String::value(java_string);
+  int         length = java_lang_String::length(java_string, value);
+  bool     is_latin1 = java_lang_String::is_latin1(java_string);
+
+  unsigned int hash = 0;
+  if (length > 0) {
+    if (is_latin1) {
+      hash = java_lang_String::hash_code(value->byte_at_addr(0), length);
+    } else {
+      hash = java_lang_String::hash_code(value->char_at_addr(0), length);
+    }
+  }
+
+  if (hash != 0) {
+    java_string->int_field_put(hash_offset, hash);
   } else {
-    return java_lang_String::hash_code(value->char_at_addr(0), length);
+    java_string->bool_field_put(hashIsZero_offset, true);
   }
+  return hash;
 }
 
 char* java_lang_String::as_quoted_ascii(oop java_string) {
--- a/src/hotspot/share/classfile/javaClasses.hpp	Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.hpp	Wed Apr 10 12:05:50 2019 +0200
@@ -94,6 +94,7 @@
  private:
   static int value_offset;
   static int hash_offset;
+  static int hashIsZero_offset;
   static int coder_offset;
 
   static bool initialized;
@@ -132,6 +133,10 @@
     assert(initialized && (hash_offset > 0), "Must be initialized");
     return hash_offset;
   }
+  static int hashIsZero_offset_in_bytes()   {
+    assert(initialized && (hashIsZero_offset > 0), "Must be initialized");
+    return hashIsZero_offset;
+  }
   static int coder_offset_in_bytes()   {
     assert(initialized && (coder_offset > 0), "Must be initialized");
     return coder_offset;
@@ -139,12 +144,11 @@
 
   static inline void set_value_raw(oop string, typeArrayOop buffer);
   static inline void set_value(oop string, typeArrayOop buffer);
-  static inline void set_hash(oop string, unsigned int hash);
 
   // Accessors
   static inline typeArrayOop value(oop java_string);
   static inline typeArrayOop value_no_keepalive(oop java_string);
-  static inline unsigned int hash(oop java_string);
+  static inline bool hash_is_set(oop string);
   static inline bool is_latin1(oop java_string);
   static inline int length(oop java_string);
   static inline int length(oop java_string, typeArrayOop string_value);
--- a/src/hotspot/share/classfile/javaClasses.inline.hpp	Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/javaClasses.inline.hpp	Wed Apr 10 12:05:50 2019 +0200
@@ -45,9 +45,9 @@
   string->obj_field_put(value_offset, (oop)buffer);
 }
 
-void java_lang_String::set_hash(oop string, unsigned int hash) {
-  assert(initialized && (hash_offset > 0), "Must be initialized");
-  string->int_field_put(hash_offset, hash);
+bool java_lang_String::hash_is_set(oop java_string) {
+  assert(initialized && (hash_offset > 0) && (hashIsZero_offset > 0), "Must be initialized");
+  return java_string->int_field(hash_offset) != 0 || java_string->bool_field(hashIsZero_offset) != 0;
 }
 
 // Accessors
@@ -71,12 +71,6 @@
   return (typeArrayOop) java_string->obj_field_access<AS_NO_KEEPALIVE>(value_offset);
 }
 
-unsigned int java_lang_String::hash(oop java_string) {
-  assert(initialized && (hash_offset > 0), "Must be initialized");
-  assert(is_instance(java_string), "must be java_string");
-  return java_string->int_field(hash_offset);
-}
-
 bool java_lang_String::is_latin1(oop java_string) {
   assert(initialized && (coder_offset > 0), "Must be initialized");
   assert(is_instance(java_string), "must be java_string");
--- a/src/hotspot/share/classfile/stringTable.cpp	Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/classfile/stringTable.cpp	Wed Apr 10 12:05:50 2019 +0200
@@ -761,8 +761,6 @@
       return true;
     }
     unsigned int hash = java_lang_String::hash_code(s);
-
-    java_lang_String::set_hash(s, hash);
     oop new_s = StringTable::create_archived_string(s, Thread::current());
     if (new_s == NULL) {
       return true;
--- a/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp	Wed Apr 10 08:51:38 2019 +0200
+++ b/src/hotspot/share/gc/shared/stringdedup/stringDedupTable.cpp	Wed Apr 10 12:05:50 2019 +0200
@@ -351,19 +351,14 @@
   unsigned int hash = 0;
 
   if (use_java_hash()) {
-    // Get hash code from cache
-    hash = java_lang_String::hash(java_string);
-  }
-
-  if (hash == 0) {
+    if (!java_lang_String::hash_is_set(java_string)) {
+      stat->inc_hashed();
+    }
+    hash = java_lang_String::hash_code(java_string);
+  } else {
     // Compute hash
     hash = hash_code(value, latin1);
     stat->inc_hashed();
-
-    if (use_java_hash() && hash != 0) {
-      // Store hash code in cache
-      java_lang_String::set_hash(java_string, hash);
-    }
   }
 
   typeArrayOop existing_value = lookup_or_add(value, latin1, hash);
--- a/src/java.base/share/classes/java/lang/String.java	Wed Apr 10 08:51:38 2019 +0200
+++ b/src/java.base/share/classes/java/lang/String.java	Wed Apr 10 12:05:50 2019 +0200
@@ -164,6 +164,12 @@
     /** Cache the hash code for the string */
     private int hash; // Default to 0
 
+    /**
+     * Cache if the hash has been calculated as actually being zero, enabling
+     * us to avoid recalculating this.
+     */
+    private boolean hashIsZero; // Default to false;
+
     /** use serialVersionUID from JDK 1.0.2 for interoperability */
     private static final long serialVersionUID = -6849794470754667710L;
 
@@ -1508,14 +1514,21 @@
      * @return  a hash code value for this object.
      */
     public int hashCode() {
+        // The hash or hashIsZero fields are subject to a benign data race,
+        // making it crucial to ensure that any observable result of the
+        // calculation in this method stays correct under any possible read of
+        // these fields. Necessary restrictions to allow this to be correct
+        // without explicit memory fences or similar concurrency primitives is
+        // that we can ever only write to one of these two fields for a given
+        // String instance, and that the computation is idempotent and derived
+        // from immutable state
         int h = hash;
-        if (h == 0 && value.length > 0) {
+        if (h == 0 && !hashIsZero) {
             h = isLatin1() ? StringLatin1.hashCode(value)
                            : StringUTF16.hashCode(value);
-            // Avoid issuing a store if the calculated value is also zero:
-            // in addition to a minor performance benefit, this allows storing
-            // Strings with zero hash code in read-only memory.
-            if (h != 0) {
+            if (h == 0) {
+                hashIsZero = true;
+            } else {
                 hash = h;
             }
         }