src/hotspot/share/classfile/compactHashtable.hpp
changeset 47216 71c04702a3d5
parent 46742 24ec8a039c90
child 49364 601146c66cad
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/classfile/compactHashtable.hpp	Tue Sep 12 19:03:39 2017 +0200
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
+#define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
+
+#include "oops/array.hpp"
+#include "oops/symbol.hpp"
+#include "utilities/hashtable.hpp"
+
+template <class T, class N> class CompactHashtable;
+class NumberSeq;
+class SimpleCompactHashtable;
+class SerializeClosure;
+
+// Stats for symbol tables in the CDS archive
+class CompactHashtableStats VALUE_OBJ_CLASS_SPEC {
+public:
+  int hashentry_count;
+  int hashentry_bytes;
+  int bucket_count;
+  int bucket_bytes;
+};
+
+/////////////////////////////////////////////////////////////////////////
+//
+// The compact hash table writer. Used at dump time for writing out
+// the compact table to the shared archive.
+//
+// At dump time, the CompactHashtableWriter obtains all entries from the
+// symbol/string table and adds them to a new temporary hash table. The hash
+// table size (number of buckets) is calculated using
+// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
+// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
+// 4 is chosen because it produces smaller sized bucket on average for
+// faster lookup. It also has relatively small number of empty buckets and
+// good distribution of the entries.
+//
+// We use a simple hash function (hash % num_bucket) for the table.
+// The new table is compacted when written out. Please see comments
+// above the CompactHashtable class for the table layout detail. The bucket
+// offsets are written to the archive as part of the compact table. The
+// bucket offset is encoded in the low 30-bit (0-29) and the bucket type
+// (regular or compact) are encoded in bit[31, 30]. For buckets with more
+// than one entry, both hash and entry offset are written to the
+// table. For buckets with only one entry, only the entry offset is written
+// to the table and the buckets are tagged as compact in their type bits.
+// Buckets without entry are skipped from the table. Their offsets are
+// still written out for faster lookup.
+//
+class CompactHashtableWriter: public StackObj {
+public:
+  class Entry VALUE_OBJ_CLASS_SPEC {
+    unsigned int _hash;
+    u4 _value;
+
+  public:
+    Entry() {}
+    Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
+
+    u4 value() {
+      return _value;
+    }
+    unsigned int hash() {
+      return _hash;
+    }
+
+    bool operator==(const CompactHashtableWriter::Entry& other) {
+      return (_value == other._value && _hash == other._hash);
+    }
+  }; // class CompactHashtableWriter::Entry
+
+private:
+  int _num_entries;
+  int _num_buckets;
+  int _num_empty_buckets;
+  int _num_value_only_buckets;
+  int _num_other_buckets;
+  GrowableArray<Entry>** _buckets;
+  CompactHashtableStats* _stats;
+  Array<u4>* _compact_buckets;
+  Array<u4>* _compact_entries;
+
+public:
+  // This is called at dump-time only
+  CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats);
+  ~CompactHashtableWriter();
+
+  void add(unsigned int hash, u4 value);
+  void add(u4 value) {
+    add((unsigned int)value, value);
+  }
+
+private:
+  void allocate_table();
+  void dump_table(NumberSeq* summary);
+
+public:
+  void dump(SimpleCompactHashtable *cht, const char* table_name);
+  const char* table_name();
+};
+
+class CompactSymbolTableWriter: public CompactHashtableWriter {
+public:
+  CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) :
+    CompactHashtableWriter(num_buckets, stats) {}
+  void add(unsigned int hash, Symbol *symbol);
+  void dump(CompactHashtable<Symbol*, char> *cht);
+};
+
+class CompactStringTableWriter: public CompactHashtableWriter {
+public:
+  CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) :
+    CompactHashtableWriter(num_entries, stats) {}
+  void add(unsigned int hash, oop string);
+  void dump(CompactHashtable<oop, char> *cht);
+};
+
+#define REGULAR_BUCKET_TYPE       0
+#define VALUE_ONLY_BUCKET_TYPE    1
+#define TABLEEND_BUCKET_TYPE      3
+#define BUCKET_OFFSET_MASK        0x3FFFFFFF
+#define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
+#define BUCKET_TYPE_SHIFT         30
+#define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
+#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// CompactHashtable is used to stored the CDS archive's symbol/string table. Used
+// at runtime only to access the compact table from the archive.
+//
+// Because these tables are read-only (no entries can be added/deleted) at run-time
+// and tend to have large number of entries, we try to minimize the footprint
+// cost per entry.
+//
+// The CompactHashtable is split into two arrays
+//
+//   u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
+//   u4 entries[<variable size>]
+//
+// The size of buckets[] is 'num_buckets + 1'. Each entry of
+// buckets[] is a 32-bit encoding of the bucket type and bucket offset,
+// with the type in the left-most 2-bit and offset in the remaining 30-bit.
+// The last entry is a special type. It contains the end of the last
+// bucket.
+//
+// There are two types of buckets, regular buckets and value_only buckets. The
+// value_only buckets have '01' in their highest 2-bit, and regular buckets have
+// '00' in their highest 2-bit.
+//
+// For normal buckets, each entry is 8 bytes in the entries[]:
+//   u4 hash;    /* symbol/string hash */
+//   union {
+//     u4 offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
+//     narrowOop str; /* String narrowOop encoding */
+//   }
+//
+//
+// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
+//
+// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
+//            is skipped.
+// buckets[0, 4, 5, ....]
+//         |  |  |
+//         |  |  +---+
+//         |  |      |
+//         |  +----+ |
+//         v       v v
+// entries[H,O,H,O,O,H,O,H,O.....]
+//
+// See CompactHashtable::lookup() for how the table is searched at runtime.
+// See CompactHashtableWriter::dump() for how the table is written at CDS
+// dump time.
+//
+class SimpleCompactHashtable VALUE_OBJ_CLASS_SPEC {
+protected:
+  address  _base_address;
+  u4  _bucket_count;
+  u4  _entry_count;
+  u4* _buckets;
+  u4* _entries;
+
+public:
+  SimpleCompactHashtable() {
+    _entry_count = 0;
+    _bucket_count = 0;
+    _buckets = 0;
+    _entries = 0;
+  }
+
+  void reset() {
+    _bucket_count = 0;
+    _entry_count = 0;
+    _buckets = 0;
+    _entries = 0;
+  }
+
+  void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
+    _base_address = base_address;
+    _bucket_count = bucket_count;
+    _entry_count = entry_count;
+    _buckets = buckets;
+    _entries = entries;
+  }
+
+  template <class I> inline void iterate(const I& iterator);
+
+  bool exists(u4 value);
+
+  // For reading from/writing to the CDS archive
+  void serialize(SerializeClosure* soc);
+};
+
+template <class T, class N> class CompactHashtable : public SimpleCompactHashtable {
+  friend class VMStructs;
+
+public:
+  enum CompactHashtableType {
+    _symbol_table = 0,
+    _string_table = 1
+  };
+
+private:
+  u4 _type;
+
+  inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t,
+                              u4 offset, const char* name, int len);
+
+  inline oop decode_entry(CompactHashtable<oop, char>* const t,
+                          u4 offset, const char* name, int len);
+public:
+  CompactHashtable() : SimpleCompactHashtable() {}
+
+  void set_type(CompactHashtableType type) {
+    _type = (u4)type;
+  }
+
+  // Lookup an entry from the compact table
+  inline T lookup(const N* name, unsigned int hash, int len);
+
+  // iterate over symbols
+  void symbols_do(SymbolClosure *cl);
+
+  // iterate over strings
+  void oops_do(OopClosure* f);
+
+  // For reading from/writing to the CDS archive
+  void serialize(SerializeClosure* soc);
+
+  uintx base_address() {
+    return (uintx) _base_address;
+  }
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+// Read/Write the contents of a hashtable textual dump (created by
+// SymbolTable::dump and StringTable::dump).
+// Because the dump file may be big (hundred of MB in extreme cases),
+// we use mmap for fast access when reading it.
+//
+class HashtableTextDump VALUE_OBJ_CLASS_SPEC {
+  int _fd;
+  const char* _base;
+  const char* _p;
+  const char* _end;
+  const char* _filename;
+  size_t      _size;
+  int         _prefix_type;
+  int         _line_no;
+public:
+  HashtableTextDump(const char* filename);
+  ~HashtableTextDump();
+
+  enum {
+    SymbolPrefix = 1 << 0,
+    StringPrefix = 1 << 1,
+    Unknown = 1 << 2
+  };
+
+  void quit(const char* err, const char* msg);
+
+  inline int remain() {
+    return (int)(_end - _p);
+  }
+
+  void corrupted(const char *p, const char *msg);
+
+  inline void corrupted_if(bool cond, const char *msg) {
+    if (cond) {
+      corrupted(_p, msg);
+    }
+  }
+
+  bool skip_newline();
+  int skip(char must_be_char);
+  void skip_past(char c);
+  void check_version(const char* ver);
+
+  inline void get_num(char delim, int *num) {
+    const char* p   = _p;
+    const char* end = _end;
+    u8 n = 0;
+
+    while (p < end) {
+      char c = *p++;
+      if ('0' <= c && c <= '9') {
+        n = n * 10 + (c - '0');
+        if (n > (u8)INT_MAX) {
+          corrupted(_p, "Num overflow");
+        }
+      } else if (c == delim) {
+        _p = p;
+        *num = (int)n;
+        return;
+      } else {
+        // Not [0-9], not 'delim'
+        corrupted(_p, "Unrecognized format");;
+      }
+    }
+
+    corrupted(_end, "Incorrect format");
+    ShouldNotReachHere();
+  }
+
+  void scan_prefix_type();
+  int scan_prefix(int* utf8_length);
+  int scan_string_prefix();
+  int scan_symbol_prefix();
+
+  jchar unescape(const char* from, const char* end, int count);
+  void get_utf8(char* utf8_buffer, int utf8_length);
+  static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
+};
+
+#endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP