hotspot/src/share/vm/classfile/imageFile.hpp
changeset 31608 b5cb9a07591a
parent 27562 47f369e3c69c
--- a/hotspot/src/share/vm/classfile/imageFile.hpp	Thu Jun 25 13:23:36 2015 +0000
+++ b/hotspot/src/share/vm/classfile/imageFile.hpp	Thu Jun 25 18:25:19 2015 +0200
@@ -28,13 +28,15 @@
 #include "classfile/classLoader.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
+#include "utilities/endian.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/growableArray.hpp"
 
 // Image files are an alternate file format for storing classes and resources. The
-// goal is to supply file access which is faster and smaller that the jar format.
-// It should be noted that unlike jars information stored in an image is in native
-// endian format. This allows the image to be memory mapped into memory without
-// endian translation.  This also means that images are platform dependent.
+// goal is to supply file access which is faster and smaller than the jar format.
+// It should be noted that unlike jars, information stored in an image is in native
+// endian format. This allows the image to be mapped into memory without endian
+// translation.  This also means that images are platform dependent.
 //
 // Image files are structured as three sections;
 //
@@ -42,7 +44,7 @@
 //         |  Header   |
 //         +-----------+
 //         |           |
-//         | Directory |
+//         |   Index   |
 //         |           |
 //         +-----------+
 //         |           |
@@ -60,7 +62,11 @@
 //         +------------+------------+
 //         | Major Vers | Minor Vers |
 //         +------------+------------+
-//         |      Location Count     |
+//         |          Flags          |
+//         +-------------------------+
+//         |      Resource Count     |
+//         +-------------------------+
+//         |       Table Length      |
 //         +-------------------------+
 //         |      Attributes Size    |
 //         +-------------------------+
@@ -71,23 +77,24 @@
 //         special file extension.
 // Major vers, minor vers - differences in version numbers indicate structural
 //                          changes in the image.
-// Location count - number of locations/resources in the file.  This count is also
-//                  the length of lookup tables used in the directory.
+// Flags - various image wide flags (future).
+// Resource count - number of resources in the file.
+// Table length - the length of lookup tables used in the index.
 // Attributes size - number of bytes in the region used to store location attribute
 //                   streams.
 // Strings size - the size of the region used to store strings used by the
-//                directory and meta data.
+//                index and meta data.
 //
-// The directory contains information related to resource lookup. The algorithm
+// The index contains information related to resource lookup. The algorithm
 // used for lookup is "A Practical Minimal Perfect Hashing Method"
 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
-// in the form <package>/<base>.<extension>  return the resource location
+// in the form /<module>/<package>/<base>.<extension>  return the resource location
 // information;
 //
-//     redirectIndex = hash(path, DEFAULT_SEED) % count;
+//     redirectIndex = hash(path, DEFAULT_SEED) % table_length;
 //     redirect = redirectTable[redirectIndex];
 //     if (redirect == 0) return not found;
-//     locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count;
+//     locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length;
 //     location = locationTable[locationIndex];
 //     if (!verify(location, path)) return not found;
 //     return location;
@@ -97,7 +104,7 @@
 // other seeds. The verify function guarantees the found resource location is
 // indeed the resource we are looking for.
 //
-// The following is the format of the directory;
+// The following is the format of the index;
 //
 //         +-------------------+
 //         |   Redirect Table  |
@@ -117,54 +124,74 @@
 //                  offsets.  Zero indicates not found.
 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into
 //                     attribute data.  Attribute offsets can be iterated to do a
-//                     full survey of resources in the image.
+//                     full survey of resources in the image.  Offset of zero
+//                     indicates no attributes.
 // Attribute Data - Bytes representing compact attribute data for locations. (See
 //                  comments in ImageLocation.)
-// Strings - Collection of zero terminated UTF-8 strings used by the directory and
+// Strings - Collection of zero terminated UTF-8 strings used by the index and
 //           image meta data.  Each string is accessed by offset.  Each string is
 //           unique.  Offset zero is reserved for the empty string.
 //
-// Note that the memory mapped directory assumes 32 bit alignment of the image
-// header, the redirect table and the attribute offsets.
+// Note that the memory mapped index assumes 32 bit alignment of each component
+// in the index.
+//
+// Endianness of an image.
+// An image booted by hotspot is always in native endian.  However, it is possible
+// to read (by the JDK) in alternate endian format.  Primarily, this is during
+// cross platform scenarios.  Ex, where javac needs to read an embedded image
+// to access classes for crossing compilation.
 //
 
+class ImageFileReader; // forward declaration
 
 // Manage image file string table.
-class ImageStrings {
+class ImageStrings VALUE_OBJ_CLASS_SPEC {
 private:
-  // Data bytes for strings.
-  u1* _data;
-  // Number of bytes in the string table.
-  u4 _size;
-
+  u1* _data; // Data bytes for strings.
+  u4 _size; // Number of bytes in the string table.
 public:
-  // Prime used to generate hash for Perfect Hashing.
-  static const u4 HASH_MULTIPLIER = 0x01000193;
+  enum {
+    // Not found result from find routine.
+    NOT_FOUND = -1,
+    // Prime used to generate hash for Perfect Hashing.
+    HASH_MULTIPLIER = 0x01000193
+  };
 
   ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
 
   // Return the UTF-8 string beginning at offset.
   inline const char* get(u4 offset) const {
-    assert(offset < _size, "offset exceeds string table size");
+    guarantee(offset < _size, "offset exceeds string table size");
     return (const char*)(_data + offset);
   }
 
-  // Compute the Perfect Hashing hash code for the supplied string.
+  // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
   inline static u4 hash_code(const char* string) {
     return hash_code(string, HASH_MULTIPLIER);
   }
 
   // Compute the Perfect Hashing hash code for the supplied string, starting at seed.
-  static u4 hash_code(const char* string, u4 seed);
+  static s4 hash_code(const char* string, s4 seed);
 
-  // Test to see if string begins with start.  If so returns remaining portion
-  // of string.  Otherwise, NULL.  Used to test sections of a path without
-  // copying.
+  // Match up a string in a perfect hash table.  Result still needs validation
+  // for precise match.
+  static s4 find(Endian* endian, const char* name, s4* redirect, u4 length);
+
+  // Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
+  // return non-NULL address of remaining portion of string.  Otherwise, return
+  // NULL.  Used to test sections of a path without copying from image string
+  // table.
   static const char* starts_with(const char* string, const char* start);
 
+  // Test to see if UTF-8 string begins with start char.  If so, return non-NULL
+  // address of remaining portion of string.  Otherwise, return NULL.  Used
+  // to test a character of a path without copying.
+  inline static const char* starts_with(const char* string, const char ch) {
+    return *string == ch ? string + 1 : NULL;
+  }
 };
 
-// Manage image file location attribute streams.  Within an image, a location's
+// Manage image file location attribute data.  Within an image, a location's
 // attributes are compressed into a stream of bytes.  An attribute stream is
 // composed of individual attribute sequences.  Each attribute sequence begins with
 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the
@@ -188,7 +215,7 @@
 //    stream.
 //  - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
 //    storing the resources.  Thus, in an image this represents the number of bytes
-//    after the directory.
+//    after the index.
 //  - Currently, compressed resources are represented by having a non-zero
 //    ATTRIBUTE_COMPRESSED value.  This represents the number of bytes stored in the
 //    image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
@@ -198,17 +225,19 @@
 //    represented differently.
 //  - Package strings include trailing slash and extensions include prefix period.
 //
-class ImageLocation {
+class ImageLocation VALUE_OBJ_CLASS_SPEC {
 public:
-  // Attribute kind enumeration.
-  static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker
-  static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base
-  static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent
-  static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension
-  static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource
-  static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource
-  static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource
-  static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds
+  enum {
+    ATTRIBUTE_END,          // End of attribute stream marker
+    ATTRIBUTE_MODULE,       // String table offset of module name
+    ATTRIBUTE_PARENT,       // String table offset of resource path parent
+    ATTRIBUTE_BASE,         // String table offset of resource path base
+    ATTRIBUTE_EXTENSION,    // String table offset of resource path extension
+    ATTRIBUTE_OFFSET,       // Container byte offset of resource
+    ATTRIBUTE_COMPRESSED,   // In image byte size of the compressed resource
+    ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource
+    ATTRIBUTE_COUNT         // Number of attribute kinds
+  };
 
 private:
   // Values of inflated attributes.
@@ -222,30 +251,43 @@
   // Return the attribute kind.
   inline static u1 attribute_kind(u1 data) {
     u1 kind = data >> 3;
-    assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
+    guarantee(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
     return kind;
   }
 
   // Return the attribute length.
   inline static u8 attribute_value(u1* data, u1 n) {
-    assert(0 < n && n <= 8, "invalid attribute value length");
+    guarantee(0 < n && n <= 8, "invalid attribute value length");
     u8 value = 0;
-
     // Most significant bytes first.
     for (u1 i = 0; i < n; i++) {
       value <<= 8;
       value |= data[i];
     }
-
     return value;
   }
 
 public:
-  ImageLocation(u1* data);
+  ImageLocation() {
+    clear_data();
+  }
+
+  ImageLocation(u1* data) {
+    clear_data();
+    set_data(data);
+  }
+
+  // Inflates the attribute stream into individual values stored in the long
+  // array _attributes. This allows an attribute value to be quickly accessed by
+  // direct indexing. Unspecified values default to zero.
+  void set_data(u1* data);
+
+  // Zero all attribute values.
+  void clear_data();
 
   // Retrieve an attribute value from the inflated array.
   inline u8 get_attribute(u1 kind) const {
-    assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
+    guarantee(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
     return _attributes[kind];
   }
 
@@ -255,89 +297,306 @@
   }
 };
 
-// Manage the image file.
-class ImageFile: public CHeapObj<mtClass> {
-private:
-  // Image file marker.
-  static const u4 IMAGE_MAGIC = 0xCAFEDADA;
-  // Image file major version number.
-  static const u2 MAJOR_VERSION = 0;
-  // Image file minor version number.
-  static const u2 MINOR_VERSION = 1;
+//
+// NOTE: needs revision.
+// Each loader requires set of module meta data to identify which modules and
+// packages are managed by that loader.  Currently, there is one image file per
+// builtin loader, so only one  module meta data resource per file.
+//
+// Each element in the module meta data is a native endian 4 byte integer.  Note
+// that entries with zero offsets for string table entries should be ignored (
+// padding for hash table lookup.)
+//
+// Format:
+//    Count of package to module entries
+//    Count of module to package entries
+//    Perfect Hash redirect table[Count of package to module entries]
+//    Package to module entries[Count of package to module entries]
+//        Offset to package name in string table
+//        Offset to module name in string table
+//    Perfect Hash redirect table[Count of module to package entries]
+//    Module to package entries[Count of module to package entries]
+//        Offset to module name in string table
+//        Count of packages in module
+//        Offset to first package in packages table
+//    Packages[]
+//        Offset to package name in string table
+//
+// Manage the image module meta data.
+class ImageModuleData : public CHeapObj<mtClass> {
+  class Header VALUE_OBJ_CLASS_SPEC {
+  private:
+    u4 _ptm_count;      // Count of package to module entries
+    u4 _mtp_count;      // Count of module to package entries
+  public:
+    inline u4 ptm_count(Endian* endian) const { return endian->get(_ptm_count); }
+    inline u4 mtp_count(Endian* endian) const { return endian->get(_mtp_count); }
+  };
 
-  struct ImageHeader {
-    u4 _magic;          // Image file marker
-    u2 _major_version;  // Image file major version number
-    u2 _minor_version;  // Image file minor version number
-    u4 _location_count; // Number of locations managed in index.
-    u4 _locations_size; // Number of bytes in attribute table.
-    u4 _strings_size;   // Number of bytes in string table.
+  // Hashtable entry
+  class HashData VALUE_OBJ_CLASS_SPEC {
+  private:
+    u4 _name_offset;    // Name offset in string table
+  public:
+    inline s4 name_offset(Endian* endian) const { return endian->get(_name_offset); }
+  };
+
+  // Package to module hashtable entry
+  class PTMData : public HashData {
+  private:
+    u4 _module_name_offset; // Module name offset in string table
+  public:
+    inline s4 module_name_offset(Endian* endian) const { return endian->get(_module_name_offset); }
+  };
+
+  // Module to package hashtable entry
+  class MTPData : public HashData {
+  private:
+    u4 _package_count;     // Number of packages in module
+    u4 _package_offset;    // Offset in package list
+  public:
+    inline u4 package_count(Endian* endian)  const { return endian->get(_package_count); }
+    inline u4 package_offset(Endian* endian) const { return endian->get(_package_offset); }
   };
 
-  char* _name;          // Name of image
-  int _fd;              // File descriptor
-  bool _memory_mapped;  // Is file memory mapped
-  ImageHeader _header;  // Image header
-  u8 _index_size;       // Total size of index
-  u1* _index_data;      // Raw index data
-  s4* _redirect_table;  // Perfect hash redirect table
-  u4* _offsets_table;   // Location offset table
-  u1* _location_bytes;  // Location attributes
-  u1* _string_bytes;    // String table
+  const ImageFileReader* _image_file; // Source image file
+  Endian* _endian;                    // Endian handler
+  ImageStrings _strings;              // Image file strings
+  u1* _data;                          // Module data resource data
+  u8 _data_size;                      // Size of resource data
+  Header* _header;                    // Module data header
+  s4* _ptm_redirect;                  // Package to module hashtable redirect
+  PTMData* _ptm_data;                 // Package to module data
+  s4* _mtp_redirect;                  // Module to packages hashtable redirect
+  MTPData* _mtp_data;                 // Module to packages data
+  s4* _mtp_packages;                  // Package data (name offsets)
+
+  // Return a string from the string table.
+  inline const char* get_string(u4 offset) {
+    return _strings.get(offset);
+  }
+
+  inline u4 mtp_package(u4 index) {
+    return _endian->get(_mtp_packages[index]);
+  }
+
+public:
+  ImageModuleData(const ImageFileReader* image_file, const char* module_data_name);
+  ~ImageModuleData();
+
+  // Return the name of the module data resource.
+  static void module_data_name(char* buffer, const char* image_file_name);
+
+  // Return the module in which a package resides.  Returns NULL if not found.
+  const char* package_to_module(const char* package_name);
+
+  // Returns all the package names in a module.  Returns NULL if module not found.
+  GrowableArray<const char*>* module_to_packages(const char* module_name);
+};
+
+// Image file header, starting at offset 0.
+class ImageHeader VALUE_OBJ_CLASS_SPEC {
+private:
+  u4 _magic;           // Image file marker
+  u4 _version;         // Image file major version number
+  u4 _flags;           // Image file flags
+  u4 _resource_count;  // Number of resources in file
+  u4 _table_length;    // Number of slots in index tables
+  u4 _locations_size;  // Number of bytes in attribute table
+  u4 _strings_size;    // Number of bytes in string table
+
+public:
+  u4 magic() const { return _magic; }
+  u4 magic(Endian* endian) const { return endian->get(_magic); }
+  void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); }
+
+  u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; }
+  u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; }
+  void set_version(Endian* endian, u4 major_version, u4 minor_version) {
+    return endian->set(_version, major_version << 16 | minor_version);
+  }
+
+  u4 flags(Endian* endian) const { return endian->get(_flags); }
+  void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); }
+
+  u4 resource_count(Endian* endian) const { return endian->get(_resource_count); }
+  void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); }
+
+  u4 table_length(Endian* endian) const { return endian->get(_table_length); }
+  void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); }
+
+  u4 locations_size(Endian* endian) const { return endian->get(_locations_size); }
+  void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); }
+
+  u4 strings_size(Endian* endian) const { return endian->get(_strings_size); }
+  void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); }
+};
+
+// Max path length limit independent of platform.  Windows max path is 1024,
+// other platforms use 4096.  The JCK fails several tests when 1024 is used.
+#define IMAGE_MAX_PATH 4096
+
+// Manage the image file.
+// ImageFileReader manages the content of an image file.
+// Initially, the header of the image file is read for validation.  If valid,
+// values in the header are used calculate the size of the image index.  The
+// index is then memory mapped to allow load on demand and sharing.  The
+// -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.)
+// An image can be used by Hotspot and multiple reference points in the JDK, thus
+// it is desirable to share a reader.  To accomodate sharing, a share table is
+// defined (see ImageFileReaderTable in imageFile.cpp)  To track the number of
+// uses, ImageFileReader keeps a use count (_use).  Use is incremented when
+// 'opened' by reference point and decremented when 'closed'.  Use of zero
+// leads the ImageFileReader to be actually closed and discarded.
+class ImageFileReader : public CHeapObj<mtClass> {
+private:
+  // Manage a number of image files such that an image can be shared across
+  // multiple uses (ex. loader.)
+  static GrowableArray<ImageFileReader*>* _reader_table;
+
+  char* _name;         // Name of image
+  s4 _use;             // Use count
+  int _fd;             // File descriptor
+  Endian* _endian;     // Endian handler
+  u8 _file_size;       // File size in bytes
+  ImageHeader _header; // Image header
+  size_t _index_size;  // Total size of index
+  u1* _index_data;     // Raw index data
+  s4* _redirect_table; // Perfect hash redirect table
+  u4* _offsets_table;  // Location offset table
+  u1* _location_bytes; // Location attributes
+  u1* _string_bytes;   // String table
+
+  ImageFileReader(const char* name, bool big_endian);
+  ~ImageFileReader();
 
   // Compute number of bytes in image file index.
   inline u8 index_size() {
     return sizeof(ImageHeader) +
-    _header._location_count * sizeof(u4) * 2 +
-    _header._locations_size +
-    _header._strings_size;
+      table_length() * sizeof(u4) * 2 + locations_size() + strings_size();
   }
 
 public:
-  ImageFile(const char* name);
-  ~ImageFile();
+  enum {
+    // Image file marker.
+    IMAGE_MAGIC = 0xCAFEDADA,
+    // Endian inverted Image file marker.
+    IMAGE_MAGIC_INVERT = 0xDADAFECA,
+    // Image file major version number.
+    MAJOR_VERSION = 1,
+    // Image file minor version number.
+    MINOR_VERSION = 0
+  };
+
+  // Open an image file, reuse structure if file already open.
+  static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian());
 
-  // Open image file for access.
+  // Close an image file if the file is not in use elsewhere.
+  static void close(ImageFileReader *reader);
+
+  // Return an id for the specifed ImageFileReader.
+  static u8 readerToID(ImageFileReader *reader);
+
+  // Validate the image id.
+  static bool idCheck(u8 id);
+
+  // Return an id for the specifed ImageFileReader.
+  static ImageFileReader* idToReader(u8 id);
+
+  // Open image file for read access.
   bool open();
+
   // Close image file.
   void close();
 
+  // Read directly from the file.
+  bool read_at(u1* data, u8 size, u8 offset) const;
+
+  inline Endian* endian() const { return _endian; }
+
   // Retrieve name of image file.
   inline const char* name() const {
     return _name;
   }
 
+  // Retrieve size of image file.
+  inline u8 file_size() const {
+    return _file_size;
+  }
+
+  // Return first address of index data.
+  inline u1* get_index_address() const {
+    return _index_data;
+  }
+
+  // Return first address of resource data.
+  inline u1* get_data_address() const {
+    return _index_data + _index_size;
+  }
+
+  // Get the size of the index data.
+  size_t get_index_size() const {
+    return _index_size;
+  }
+
+  inline u4 table_length() const {
+    return _header.table_length(_endian);
+  }
+
+  inline u4 locations_size() const {
+    return _header.locations_size(_endian);
+  }
+
+  inline u4 strings_size()const  {
+    return _header.strings_size(_endian);
+  }
+
+  inline u4* offsets_table() const {
+    return _offsets_table;
+  }
+
+  // Increment use count.
+  inline void inc_use() {
+    _use++;
+  }
+
+  // Decrement use count.
+  inline bool dec_use() {
+    return --_use == 0;
+  }
+
   // Return a string table accessor.
   inline const ImageStrings get_strings() const {
-    return ImageStrings(_string_bytes, _header._strings_size);
+    return ImageStrings(_string_bytes, _header.strings_size(_endian));
   }
 
-  // Return number of locations in image file index.
-  inline u4 get_location_count() const {
-    return _header._location_count;
+  // Return location attribute stream at offset.
+  inline u1* get_location_offset_data(u4 offset) const {
+    guarantee((u4)offset < _header.locations_size(_endian),
+              "offset exceeds location attributes size");
+    return offset != 0 ? _location_bytes + offset : NULL;
   }
 
   // Return location attribute stream for location i.
-  inline u1* get_location_data(u4 i) const {
-    u4 offset = _offsets_table[i];
+  inline u1* get_location_data(u4 index) const {
+    guarantee((u4)index < _header.table_length(_endian),
+              "index exceeds location count");
+    u4 offset = _endian->get(_offsets_table[index]);
 
-    return offset != 0 ? _location_bytes + offset : NULL;
+    return get_location_offset_data(offset);
   }
 
-  // Return the attribute stream for a named resourced.
-  u1* find_location_data(const char* path) const;
+  // Find the location attributes associated with the path.  Returns true if
+  // the location is found, false otherwise.
+  bool find_location(const char* path, ImageLocation& location) const;
+
+  // Assemble the location path.
+  void location_path(ImageLocation& location, char* path, size_t max) const;
 
   // Verify that a found location matches the supplied path.
   bool verify_location(ImageLocation& location, const char* path) const;
 
-  // Return the resource for the supplied location info.
-  u1* get_resource(ImageLocation& location) const;
-
-  // Return the resource associated with the path else NULL if not found.
-  void get_resource(const char* path, u1*& buffer, u8& size) const;
-
-  // Return an array of packages for a given module
-  GrowableArray<const char*>* packages(const char* name);
+  // Return the resource for the supplied path.
+  void get_resource(ImageLocation& location, u1* uncompressed_data) const;
 };
-
 #endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP