--- a/hotspot/src/share/vm/classfile/symbolTable.cpp Mon Jun 11 13:10:14 2012 -0400
+++ b/hotspot/src/share/vm/classfile/symbolTable.cpp Wed Jun 13 19:52:59 2012 -0400
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
#include "classfile/javaClasses.hpp"
#include "classfile/symbolTable.hpp"
#include "classfile/systemDictionary.hpp"
@@ -34,12 +35,15 @@
#include "oops/oop.inline2.hpp"
#include "runtime/mutexLocker.hpp"
#include "utilities/hashtable.inline.hpp"
+#include "utilities/numberSeq.hpp"
// --------------------------------------------------------------------------
SymbolTable* SymbolTable::_the_table = NULL;
// Static arena for symbols that are not deallocated
Arena* SymbolTable::_arena = NULL;
+bool SymbolTable::_needs_rehashing = false;
+jint SymbolTable::_seed = 0;
Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
// Don't allow symbols to be created which cannot fit in a Symbol*.
@@ -121,12 +125,41 @@
}
}
+unsigned int SymbolTable::new_hash(Symbol* sym) {
+ ResourceMark rm;
+ // Use alternate hashing algorithm on this symbol.
+ return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
+}
+
+// Create a new table and using alternate hash code, populate the new table
+// with the existing strings. Set flag to use the alternate hash code afterwards.
+void SymbolTable::rehash_table() {
+ assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+ assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
+ // Create a new symbol table
+ SymbolTable* new_table = new SymbolTable();
+
+ // Initialize the global seed for hashing.
+ _seed = AltHashing::compute_seed();
+ assert(seed() != 0, "shouldn't be zero");
+
+ the_table()->move_to(new_table);
+
+ // Delete the table and buckets (entries are reused in new table).
+ delete _the_table;
+ // Don't check if we need rehashing until the table gets unbalanced again.
+ // Then rehash with a new global seed.
+ _needs_rehashing = false;
+ _the_table = new_table;
+}
// Lookup a symbol in a bucket.
Symbol* SymbolTable::lookup(int index, const char* name,
int len, unsigned int hash) {
+ int count = 0;
for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
+ count++; // count all entries in this bucket, not just ones with same hash
if (e->hash() == hash) {
Symbol* sym = e->literal();
if (sym->equals(name, len)) {
@@ -136,9 +169,21 @@
}
}
}
+ // If the bucket size is too deep check if this hash code is insufficient.
+ if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+ _needs_rehashing = check_rehash_table(count);
+ }
return NULL;
}
+// Pick hashing algorithm, but return value already given if not using a new
+// hash algorithm.
+unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) {
+ return use_alternate_hashcode() ?
+ AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
+ (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
+}
+
// We take care not to be blocking while holding the
// SymbolTable_lock. Otherwise, the system might deadlock, since the
@@ -287,13 +332,17 @@
}
Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
- unsigned int hashValue, bool c_heap, TRAPS) {
+ unsigned int hashValue_arg, bool c_heap, TRAPS) {
assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
"proposed name of symbol must be stable");
// Grab SymbolTable_lock first.
MutexLocker ml(SymbolTable_lock, THREAD);
+ // Check if the symbol table has been rehashed, if so, need to recalculate
+ // the hash value.
+ unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg);
+
// Since look-up was done lock-free, we need to check if another
// thread beat us in the race to insert the symbol.
Symbol* test = lookup(index, (char*)name, len, hashValue);
@@ -332,10 +381,13 @@
MutexLocker ml(SymbolTable_lock, THREAD);
for (int i=0; i<names_count; i++) {
+ // Check if the symbol table has been rehashed, if so, need to recalculate
+ // the hash value.
+ unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]);
// Since look-up was done lock-free, we need to check if another
// thread beat us in the race to insert the symbol.
- int index = hash_to_index(hashValues[i]);
- Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]);
+ int index = hash_to_index(hashValue);
+ Symbol* test = lookup(index, names[i], lengths[i], hashValue);
if (test != NULL) {
// A race occurred and another thread introduced the symbol, this one
// will be dropped and collected. Use test instead.
@@ -347,7 +399,7 @@
bool c_heap = class_loader() != NULL;
Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be???
- HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym);
+ HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
add_entry(index, entry);
cp->symbol_at_put(cp_indices[i], sym);
}
@@ -370,6 +422,24 @@
}
}
+void SymbolTable::dump(outputStream* st) {
+ NumberSeq summary;
+ for (int i = 0; i < the_table()->table_size(); ++i) {
+ int count = 0;
+ for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
+ e != NULL; e = e->next()) {
+ count++;
+ }
+ summary.add((double)count);
+ }
+ st->print_cr("SymbolTable statistics:");
+ st->print_cr("Number of buckets : %7d", summary.num());
+ st->print_cr("Average bucket size : %7.0f", summary.avg());
+ st->print_cr("Variance of bucket size : %7.0f", summary.variance());
+ st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
+ st->print_cr("Maximum bucket size : %7.0f", summary.maximum());
+}
+
//---------------------------------------------------------------------------
// Non-product code
@@ -468,7 +538,6 @@
}
}
}
-
#endif // PRODUCT
// --------------------------------------------------------------------------
@@ -514,21 +583,36 @@
// --------------------------------------------------------------------------
StringTable* StringTable::_the_table = NULL;
+bool StringTable::_needs_rehashing = false;
+jint StringTable::_seed = 0;
+
+// Pick hashing algorithm
+unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) {
+ return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
+ (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
+}
+
oop StringTable::lookup(int index, jchar* name,
int len, unsigned int hash) {
+ int count = 0;
for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
+ count++;
if (l->hash() == hash) {
if (java_lang_String::equals(l->literal(), name, len)) {
return l->literal();
}
}
}
+ // If the bucket size is too deep check if this hash code is insufficient.
+ if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+ _needs_rehashing = check_rehash_table(count);
+ }
return NULL;
}
oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
- int len, unsigned int hashValue, TRAPS) {
+ int len, unsigned int hashValue_arg, TRAPS) {
debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
"proposed name of symbol must be stable");
@@ -547,6 +631,10 @@
assert(java_lang_String::equals(string(), name, len),
"string must be properly initialized");
+ // Check if the symbol table has been rehashed, if so, need to recalculate
+ // the hash value before second lookup.
+ unsigned int hashValue = hash_string(name, len, hashValue_arg);
+
// Since look-up was done lock-free, we need to check if another
// thread beat us in the race to insert the symbol.
@@ -566,7 +654,7 @@
ResourceMark rm;
int length;
jchar* chars = symbol->as_unicode(length);
- unsigned int hashValue = java_lang_String::hash_string(chars, length);
+ unsigned int hashValue = hash_string(chars, length);
int index = the_table()->hash_to_index(hashValue);
return the_table()->lookup(index, chars, length, hashValue);
}
@@ -574,7 +662,7 @@
oop StringTable::intern(Handle string_or_null, jchar* name,
int len, TRAPS) {
- unsigned int hashValue = java_lang_String::hash_string(name, len);
+ unsigned int hashValue = hash_string(name, len);
int index = the_table()->hash_to_index(hashValue);
oop string = the_table()->lookup(index, name, len, hashValue);
@@ -675,3 +763,52 @@
}
}
}
+
+void StringTable::dump(outputStream* st) {
+ NumberSeq summary;
+ for (int i = 0; i < the_table()->table_size(); ++i) {
+ HashtableEntry<oop>* p = the_table()->bucket(i);
+ int count = 0;
+ for ( ; p != NULL; p = p->next()) {
+ count++;
+ }
+ summary.add((double)count);
+ }
+ st->print_cr("StringTable statistics:");
+ st->print_cr("Number of buckets : %7d", summary.num());
+ st->print_cr("Average bucket size : %7.0f", summary.avg());
+ st->print_cr("Variance of bucket size : %7.0f", summary.variance());
+ st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
+ st->print_cr("Maximum bucket size : %7.0f", summary.maximum());
+}
+
+
+unsigned int StringTable::new_hash(oop string) {
+ ResourceMark rm;
+ int length;
+ jchar* chars = java_lang_String::as_unicode_string(string, length);
+ // Use alternate hashing algorithm on the string
+ return AltHashing::murmur3_32(seed(), chars, length);
+}
+
+// Create a new table and using alternate hash code, populate the new table
+// with the existing strings. Set flag to use the alternate hash code afterwards.
+void StringTable::rehash_table() {
+ assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+ assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
+ StringTable* new_table = new StringTable();
+
+ // Initialize new global seed for hashing.
+ _seed = AltHashing::compute_seed();
+ assert(seed() != 0, "shouldn't be zero");
+
+ // Rehash the table
+ the_table()->move_to(new_table);
+
+ // Delete the table and buckets (entries are reused in new table).
+ delete _the_table;
+ // Don't check if we need rehashing until the table gets unbalanced again.
+ // Then rehash with a new global seed.
+ _needs_rehashing = false;
+ _the_table = new_table;
+}