hotspot/src/share/vm/classfile/symbolTable.cpp
changeset 13087 673ea6efaf18
parent 12263 d20640f4f8fe
child 13097 c146b608d91f
--- a/hotspot/src/share/vm/classfile/symbolTable.cpp	Mon Jun 11 13:10:14 2012 -0400
+++ b/hotspot/src/share/vm/classfile/symbolTable.cpp	Wed Jun 13 19:52:59 2012 -0400
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
@@ -34,12 +35,15 @@
 #include "oops/oop.inline2.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
+#include "utilities/numberSeq.hpp"
 
 // --------------------------------------------------------------------------
 
 SymbolTable* SymbolTable::_the_table = NULL;
 // Static arena for symbols that are not deallocated
 Arena* SymbolTable::_arena = NULL;
+bool SymbolTable::_needs_rehashing = false;
+jint SymbolTable::_seed = 0;
 
 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
   // Don't allow symbols to be created which cannot fit in a Symbol*.
@@ -121,12 +125,41 @@
   }
 }
 
+unsigned int SymbolTable::new_hash(Symbol* sym) {
+  ResourceMark rm;
+  // Use alternate hashing algorithm on this symbol.
+  return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
+}
+
+// Create a new table and using alternate hash code, populate the new table
+// with the existing strings.   Set flag to use the alternate hash code afterwards.
+void SymbolTable::rehash_table() {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+  assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
+  // Create a new symbol table
+  SymbolTable* new_table = new SymbolTable();
+
+  // Initialize the global seed for hashing.
+  _seed = AltHashing::compute_seed();
+  assert(seed() != 0, "shouldn't be zero");
+
+  the_table()->move_to(new_table);
+
+  // Delete the table and buckets (entries are reused in new table).
+  delete _the_table;
+  // Don't check if we need rehashing until the table gets unbalanced again.
+  // Then rehash with a new global seed.
+  _needs_rehashing = false;
+  _the_table = new_table;
+}
 
 // Lookup a symbol in a bucket.
 
 Symbol* SymbolTable::lookup(int index, const char* name,
                               int len, unsigned int hash) {
+  int count = 0;
   for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
+    count++;  // count all entries in this bucket, not just ones with same hash
     if (e->hash() == hash) {
       Symbol* sym = e->literal();
       if (sym->equals(name, len)) {
@@ -136,9 +169,21 @@
       }
     }
   }
+  // If the bucket size is too deep check if this hash code is insufficient.
+  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+    _needs_rehashing = check_rehash_table(count);
+  }
   return NULL;
 }
 
+// Pick hashing algorithm, but return value already given if not using a new
+// hash algorithm.
+unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) {
+  return use_alternate_hashcode() ?
+           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
+           (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
+}
+
 
 // We take care not to be blocking while holding the
 // SymbolTable_lock. Otherwise, the system might deadlock, since the
@@ -287,13 +332,17 @@
 }
 
 Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
-                               unsigned int hashValue, bool c_heap, TRAPS) {
+                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
          "proposed name of symbol must be stable");
 
   // Grab SymbolTable_lock first.
   MutexLocker ml(SymbolTable_lock, THREAD);
 
+  // Check if the symbol table has been rehashed, if so, need to recalculate
+  // the hash value.
+  unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg);
+
   // Since look-up was done lock-free, we need to check if another
   // thread beat us in the race to insert the symbol.
   Symbol* test = lookup(index, (char*)name, len, hashValue);
@@ -332,10 +381,13 @@
   MutexLocker ml(SymbolTable_lock, THREAD);
 
   for (int i=0; i<names_count; i++) {
+    // Check if the symbol table has been rehashed, if so, need to recalculate
+    // the hash value.
+    unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]);
     // Since look-up was done lock-free, we need to check if another
     // thread beat us in the race to insert the symbol.
-    int index = hash_to_index(hashValues[i]);
-    Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]);
+    int index = hash_to_index(hashValue);
+    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
     if (test != NULL) {
       // A race occurred and another thread introduced the symbol, this one
       // will be dropped and collected. Use test instead.
@@ -347,7 +399,7 @@
       bool c_heap = class_loader() != NULL;
       Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
       assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
-      HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym);
+      HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
       add_entry(index, entry);
       cp->symbol_at_put(cp_indices[i], sym);
     }
@@ -370,6 +422,24 @@
   }
 }
 
+void SymbolTable::dump(outputStream* st) {
+  NumberSeq summary;
+  for (int i = 0; i < the_table()->table_size(); ++i) {
+    int count = 0;
+    for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
+       e != NULL; e = e->next()) {
+      count++;
+    }
+    summary.add((double)count);
+  }
+  st->print_cr("SymbolTable statistics:");
+  st->print_cr("Number of buckets       : %7d", summary.num());
+  st->print_cr("Average bucket size     : %7.0f", summary.avg());
+  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
+  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
+  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
+}
+
 
 //---------------------------------------------------------------------------
 // Non-product code
@@ -468,7 +538,6 @@
     }
   }
 }
-
 #endif // PRODUCT
 
 // --------------------------------------------------------------------------
@@ -514,21 +583,36 @@
 // --------------------------------------------------------------------------
 StringTable* StringTable::_the_table = NULL;
 
+bool StringTable::_needs_rehashing = false;
+jint StringTable::_seed = 0;
+
+// Pick hashing algorithm
+unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) {
+  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
+            (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
+}
+
 oop StringTable::lookup(int index, jchar* name,
                         int len, unsigned int hash) {
+  int count = 0;
   for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
+    count++;
     if (l->hash() == hash) {
       if (java_lang_String::equals(l->literal(), name, len)) {
         return l->literal();
       }
     }
   }
+  // If the bucket size is too deep check if this hash code is insufficient.
+  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+    _needs_rehashing = check_rehash_table(count);
+  }
   return NULL;
 }
 
 
 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
-                           int len, unsigned int hashValue, TRAPS) {
+                           int len, unsigned int hashValue_arg, TRAPS) {
   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
          "proposed name of symbol must be stable");
@@ -547,6 +631,10 @@
   assert(java_lang_String::equals(string(), name, len),
          "string must be properly initialized");
 
+  // Check if the symbol table has been rehashed, if so, need to recalculate
+  // the hash value before second lookup.
+  unsigned int hashValue = hash_string(name, len, hashValue_arg);
+
   // Since look-up was done lock-free, we need to check if another
   // thread beat us in the race to insert the symbol.
 
@@ -566,7 +654,7 @@
   ResourceMark rm;
   int length;
   jchar* chars = symbol->as_unicode(length);
-  unsigned int hashValue = java_lang_String::hash_string(chars, length);
+  unsigned int hashValue = hash_string(chars, length);
   int index = the_table()->hash_to_index(hashValue);
   return the_table()->lookup(index, chars, length, hashValue);
 }
@@ -574,7 +662,7 @@
 
 oop StringTable::intern(Handle string_or_null, jchar* name,
                         int len, TRAPS) {
-  unsigned int hashValue = java_lang_String::hash_string(name, len);
+  unsigned int hashValue = hash_string(name, len);
   int index = the_table()->hash_to_index(hashValue);
   oop string = the_table()->lookup(index, name, len, hashValue);
 
@@ -675,3 +763,52 @@
     }
   }
 }
+
+void StringTable::dump(outputStream* st) {
+  NumberSeq summary;
+  for (int i = 0; i < the_table()->table_size(); ++i) {
+    HashtableEntry<oop>* p = the_table()->bucket(i);
+    int count = 0;
+    for ( ; p != NULL; p = p->next()) {
+      count++;
+    }
+    summary.add((double)count);
+  }
+  st->print_cr("StringTable statistics:");
+  st->print_cr("Number of buckets       : %7d", summary.num());
+  st->print_cr("Average bucket size     : %7.0f", summary.avg());
+  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
+  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
+  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
+}
+
+
+unsigned int StringTable::new_hash(oop string) {
+  ResourceMark rm;
+  int length;
+  jchar* chars = java_lang_String::as_unicode_string(string, length);
+  // Use alternate hashing algorithm on the string
+  return AltHashing::murmur3_32(seed(), chars, length);
+}
+
+// Create a new table and using alternate hash code, populate the new table
+// with the existing strings.   Set flag to use the alternate hash code afterwards.
+void StringTable::rehash_table() {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+  assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
+  StringTable* new_table = new StringTable();
+
+  // Initialize new global seed for hashing.
+  _seed = AltHashing::compute_seed();
+  assert(seed() != 0, "shouldn't be zero");
+
+  // Rehash the table
+  the_table()->move_to(new_table);
+
+  // Delete the table and buckets (entries are reused in new table).
+  delete _the_table;
+  // Don't check if we need rehashing until the table gets unbalanced again.
+  // Then rehash with a new global seed.
+  _needs_rehashing = false;
+  _the_table = new_table;
+}