--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/classfile/compactHashtable.hpp Tue Sep 12 19:03:39 2017 +0200
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
+#define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
+
+#include "oops/array.hpp"
+#include "oops/symbol.hpp"
+#include "utilities/hashtable.hpp"
+
+template <class T, class N> class CompactHashtable;
+class NumberSeq;
+class SimpleCompactHashtable;
+class SerializeClosure;
+
+// Stats for symbol tables in the CDS archive
+class CompactHashtableStats VALUE_OBJ_CLASS_SPEC {
+public:
+ int hashentry_count;
+ int hashentry_bytes;
+ int bucket_count;
+ int bucket_bytes;
+};
+
+/////////////////////////////////////////////////////////////////////////
+//
+// The compact hash table writer. Used at dump time for writing out
+// the compact table to the shared archive.
+//
+// At dump time, the CompactHashtableWriter obtains all entries from the
+// symbol/string table and adds them to a new temporary hash table. The hash
+// table size (number of buckets) is calculated using
+// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
+// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
+// 4 is chosen because it produces smaller sized bucket on average for
+// faster lookup. It also has relatively small number of empty buckets and
+// good distribution of the entries.
+//
+// We use a simple hash function (hash % num_bucket) for the table.
+// The new table is compacted when written out. Please see comments
+// above the CompactHashtable class for the table layout detail. The bucket
+// offsets are written to the archive as part of the compact table. The
+// bucket offset is encoded in the low 30-bit (0-29) and the bucket type
+// (regular or compact) are encoded in bit[31, 30]. For buckets with more
+// than one entry, both hash and entry offset are written to the
+// table. For buckets with only one entry, only the entry offset is written
+// to the table and the buckets are tagged as compact in their type bits.
+// Buckets without entry are skipped from the table. Their offsets are
+// still written out for faster lookup.
+//
+class CompactHashtableWriter: public StackObj {
+public:
+ class Entry VALUE_OBJ_CLASS_SPEC {
+ unsigned int _hash;
+ u4 _value;
+
+ public:
+ Entry() {}
+ Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
+
+ u4 value() {
+ return _value;
+ }
+ unsigned int hash() {
+ return _hash;
+ }
+
+ bool operator==(const CompactHashtableWriter::Entry& other) {
+ return (_value == other._value && _hash == other._hash);
+ }
+ }; // class CompactHashtableWriter::Entry
+
+private:
+ int _num_entries;
+ int _num_buckets;
+ int _num_empty_buckets;
+ int _num_value_only_buckets;
+ int _num_other_buckets;
+ GrowableArray<Entry>** _buckets;
+ CompactHashtableStats* _stats;
+ Array<u4>* _compact_buckets;
+ Array<u4>* _compact_entries;
+
+public:
+ // This is called at dump-time only
+ CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats);
+ ~CompactHashtableWriter();
+
+ void add(unsigned int hash, u4 value);
+ void add(u4 value) {
+ add((unsigned int)value, value);
+ }
+
+private:
+ void allocate_table();
+ void dump_table(NumberSeq* summary);
+
+public:
+ void dump(SimpleCompactHashtable *cht, const char* table_name);
+ const char* table_name();
+};
+
+class CompactSymbolTableWriter: public CompactHashtableWriter {
+public:
+ CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) :
+ CompactHashtableWriter(num_buckets, stats) {}
+ void add(unsigned int hash, Symbol *symbol);
+ void dump(CompactHashtable<Symbol*, char> *cht);
+};
+
+class CompactStringTableWriter: public CompactHashtableWriter {
+public:
+ CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) :
+ CompactHashtableWriter(num_entries, stats) {}
+ void add(unsigned int hash, oop string);
+ void dump(CompactHashtable<oop, char> *cht);
+};
+
+#define REGULAR_BUCKET_TYPE 0
+#define VALUE_ONLY_BUCKET_TYPE 1
+#define TABLEEND_BUCKET_TYPE 3
+#define BUCKET_OFFSET_MASK 0x3FFFFFFF
+#define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK)
+#define BUCKET_TYPE_SHIFT 30
+#define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
+#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// CompactHashtable is used to stored the CDS archive's symbol/string table. Used
+// at runtime only to access the compact table from the archive.
+//
+// Because these tables are read-only (no entries can be added/deleted) at run-time
+// and tend to have large number of entries, we try to minimize the footprint
+// cost per entry.
+//
+// The CompactHashtable is split into two arrays
+//
+// u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
+// u4 entries[<variable size>]
+//
+// The size of buckets[] is 'num_buckets + 1'. Each entry of
+// buckets[] is a 32-bit encoding of the bucket type and bucket offset,
+// with the type in the left-most 2-bit and offset in the remaining 30-bit.
+// The last entry is a special type. It contains the end of the last
+// bucket.
+//
+// There are two types of buckets, regular buckets and value_only buckets. The
+// value_only buckets have '01' in their highest 2-bit, and regular buckets have
+// '00' in their highest 2-bit.
+//
+// For normal buckets, each entry is 8 bytes in the entries[]:
+// u4 hash; /* symbol/string hash */
+// union {
+// u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */
+// narrowOop str; /* String narrowOop encoding */
+// }
+//
+//
+// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
+//
+// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
+// is skipped.
+// buckets[0, 4, 5, ....]
+// | | |
+// | | +---+
+// | | |
+// | +----+ |
+// v v v
+// entries[H,O,H,O,O,H,O,H,O.....]
+//
+// See CompactHashtable::lookup() for how the table is searched at runtime.
+// See CompactHashtableWriter::dump() for how the table is written at CDS
+// dump time.
+//
+class SimpleCompactHashtable VALUE_OBJ_CLASS_SPEC {
+protected:
+ address _base_address;
+ u4 _bucket_count;
+ u4 _entry_count;
+ u4* _buckets;
+ u4* _entries;
+
+public:
+ SimpleCompactHashtable() {
+ _entry_count = 0;
+ _bucket_count = 0;
+ _buckets = 0;
+ _entries = 0;
+ }
+
+ void reset() {
+ _bucket_count = 0;
+ _entry_count = 0;
+ _buckets = 0;
+ _entries = 0;
+ }
+
+ void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
+ _base_address = base_address;
+ _bucket_count = bucket_count;
+ _entry_count = entry_count;
+ _buckets = buckets;
+ _entries = entries;
+ }
+
+ template <class I> inline void iterate(const I& iterator);
+
+ bool exists(u4 value);
+
+ // For reading from/writing to the CDS archive
+ void serialize(SerializeClosure* soc);
+};
+
+template <class T, class N> class CompactHashtable : public SimpleCompactHashtable {
+ friend class VMStructs;
+
+public:
+ enum CompactHashtableType {
+ _symbol_table = 0,
+ _string_table = 1
+ };
+
+private:
+ u4 _type;
+
+ inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t,
+ u4 offset, const char* name, int len);
+
+ inline oop decode_entry(CompactHashtable<oop, char>* const t,
+ u4 offset, const char* name, int len);
+public:
+ CompactHashtable() : SimpleCompactHashtable() {}
+
+ void set_type(CompactHashtableType type) {
+ _type = (u4)type;
+ }
+
+ // Lookup an entry from the compact table
+ inline T lookup(const N* name, unsigned int hash, int len);
+
+ // iterate over symbols
+ void symbols_do(SymbolClosure *cl);
+
+ // iterate over strings
+ void oops_do(OopClosure* f);
+
+ // For reading from/writing to the CDS archive
+ void serialize(SerializeClosure* soc);
+
+ uintx base_address() {
+ return (uintx) _base_address;
+ }
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+// Read/Write the contents of a hashtable textual dump (created by
+// SymbolTable::dump and StringTable::dump).
+// Because the dump file may be big (hundred of MB in extreme cases),
+// we use mmap for fast access when reading it.
+//
+class HashtableTextDump VALUE_OBJ_CLASS_SPEC {
+ int _fd;
+ const char* _base;
+ const char* _p;
+ const char* _end;
+ const char* _filename;
+ size_t _size;
+ int _prefix_type;
+ int _line_no;
+public:
+ HashtableTextDump(const char* filename);
+ ~HashtableTextDump();
+
+ enum {
+ SymbolPrefix = 1 << 0,
+ StringPrefix = 1 << 1,
+ Unknown = 1 << 2
+ };
+
+ void quit(const char* err, const char* msg);
+
+ inline int remain() {
+ return (int)(_end - _p);
+ }
+
+ void corrupted(const char *p, const char *msg);
+
+ inline void corrupted_if(bool cond, const char *msg) {
+ if (cond) {
+ corrupted(_p, msg);
+ }
+ }
+
+ bool skip_newline();
+ int skip(char must_be_char);
+ void skip_past(char c);
+ void check_version(const char* ver);
+
+ inline void get_num(char delim, int *num) {
+ const char* p = _p;
+ const char* end = _end;
+ u8 n = 0;
+
+ while (p < end) {
+ char c = *p++;
+ if ('0' <= c && c <= '9') {
+ n = n * 10 + (c - '0');
+ if (n > (u8)INT_MAX) {
+ corrupted(_p, "Num overflow");
+ }
+ } else if (c == delim) {
+ _p = p;
+ *num = (int)n;
+ return;
+ } else {
+ // Not [0-9], not 'delim'
+ corrupted(_p, "Unrecognized format");;
+ }
+ }
+
+ corrupted(_end, "Incorrect format");
+ ShouldNotReachHere();
+ }
+
+ void scan_prefix_type();
+ int scan_prefix(int* utf8_length);
+ int scan_string_prefix();
+ int scan_symbol_prefix();
+
+ jchar unescape(const char* from, const char* end, int count);
+ void get_utf8(char* utf8_buffer, int utf8_length);
+ static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
+};
+
+#endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP