--- a/hotspot/src/share/vm/classfile/imageFile.hpp Thu Jun 25 13:23:36 2015 +0000
+++ b/hotspot/src/share/vm/classfile/imageFile.hpp Thu Jun 25 18:25:19 2015 +0200
@@ -28,13 +28,15 @@
#include "classfile/classLoader.hpp"
#include "memory/allocation.hpp"
#include "memory/allocation.inline.hpp"
+#include "utilities/endian.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/growableArray.hpp"
// Image files are an alternate file format for storing classes and resources. The
-// goal is to supply file access which is faster and smaller that the jar format.
-// It should be noted that unlike jars information stored in an image is in native
-// endian format. This allows the image to be memory mapped into memory without
-// endian translation. This also means that images are platform dependent.
+// goal is to supply file access which is faster and smaller than the jar format.
+// It should be noted that unlike jars, information stored in an image is in native
+// endian format. This allows the image to be mapped into memory without endian
+// translation. This also means that images are platform dependent.
//
// Image files are structured as three sections;
//
@@ -42,7 +44,7 @@
// | Header |
// +-----------+
// | |
-// | Directory |
+// | Index |
// | |
// +-----------+
// | |
@@ -60,7 +62,11 @@
// +------------+------------+
// | Major Vers | Minor Vers |
// +------------+------------+
-// | Location Count |
+// | Flags |
+// +-------------------------+
+// | Resource Count |
+// +-------------------------+
+// | Table Length |
// +-------------------------+
// | Attributes Size |
// +-------------------------+
@@ -71,23 +77,24 @@
// special file extension.
// Major vers, minor vers - differences in version numbers indicate structural
// changes in the image.
-// Location count - number of locations/resources in the file. This count is also
-// the length of lookup tables used in the directory.
+// Flags - various image wide flags (future).
+// Resource count - number of resources in the file.
+// Table length - the length of lookup tables used in the index.
// Attributes size - number of bytes in the region used to store location attribute
// streams.
// Strings size - the size of the region used to store strings used by the
-// directory and meta data.
+// index and meta data.
//
-// The directory contains information related to resource lookup. The algorithm
+// The index contains information related to resource lookup. The algorithm
// used for lookup is "A Practical Minimal Perfect Hashing Method"
// (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
-// in the form <package>/<base>.<extension> return the resource location
+// in the form /<module>/<package>/<base>.<extension> return the resource location
// information;
//
-// redirectIndex = hash(path, DEFAULT_SEED) % count;
+// redirectIndex = hash(path, DEFAULT_SEED) % table_length;
// redirect = redirectTable[redirectIndex];
// if (redirect == 0) return not found;
-// locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count;
+// locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length;
// location = locationTable[locationIndex];
// if (!verify(location, path)) return not found;
// return location;
@@ -97,7 +104,7 @@
// other seeds. The verify function guarantees the found resource location is
// indeed the resource we are looking for.
//
-// The following is the format of the directory;
+// The following is the format of the index;
//
// +-------------------+
// | Redirect Table |
@@ -117,54 +124,74 @@
// offsets. Zero indicates not found.
// Attribute Offsets - Array of 32-bit unsigned values representing offsets into
// attribute data. Attribute offsets can be iterated to do a
-// full survey of resources in the image.
+// full survey of resources in the image. Offset of zero
+// indicates no attributes.
// Attribute Data - Bytes representing compact attribute data for locations. (See
// comments in ImageLocation.)
-// Strings - Collection of zero terminated UTF-8 strings used by the directory and
+// Strings - Collection of zero terminated UTF-8 strings used by the index and
// image meta data. Each string is accessed by offset. Each string is
// unique. Offset zero is reserved for the empty string.
//
-// Note that the memory mapped directory assumes 32 bit alignment of the image
-// header, the redirect table and the attribute offsets.
+// Note that the memory mapped index assumes 32 bit alignment of each component
+// in the index.
+//
+// Endianness of an image.
+// An image booted by hotspot is always in native endian. However, it is possible
+// to read (by the JDK) in alternate endian format. Primarily, this is during
+// cross platform scenarios. Ex, where javac needs to read an embedded image
+// to access classes for crossing compilation.
//
+class ImageFileReader; // forward declaration
// Manage image file string table.
-class ImageStrings {
+class ImageStrings VALUE_OBJ_CLASS_SPEC {
private:
- // Data bytes for strings.
- u1* _data;
- // Number of bytes in the string table.
- u4 _size;
-
+ u1* _data; // Data bytes for strings.
+ u4 _size; // Number of bytes in the string table.
public:
- // Prime used to generate hash for Perfect Hashing.
- static const u4 HASH_MULTIPLIER = 0x01000193;
+ enum {
+ // Not found result from find routine.
+ NOT_FOUND = -1,
+ // Prime used to generate hash for Perfect Hashing.
+ HASH_MULTIPLIER = 0x01000193
+ };
ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
// Return the UTF-8 string beginning at offset.
inline const char* get(u4 offset) const {
- assert(offset < _size, "offset exceeds string table size");
+ guarantee(offset < _size, "offset exceeds string table size");
return (const char*)(_data + offset);
}
- // Compute the Perfect Hashing hash code for the supplied string.
+ // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
inline static u4 hash_code(const char* string) {
return hash_code(string, HASH_MULTIPLIER);
}
// Compute the Perfect Hashing hash code for the supplied string, starting at seed.
- static u4 hash_code(const char* string, u4 seed);
+ static s4 hash_code(const char* string, s4 seed);
- // Test to see if string begins with start. If so returns remaining portion
- // of string. Otherwise, NULL. Used to test sections of a path without
- // copying.
+ // Match up a string in a perfect hash table. Result still needs validation
+ // for precise match.
+ static s4 find(Endian* endian, const char* name, s4* redirect, u4 length);
+
+ // Test to see if UTF-8 string begins with the start UTF-8 string. If so,
+ // return non-NULL address of remaining portion of string. Otherwise, return
+ // NULL. Used to test sections of a path without copying from image string
+ // table.
static const char* starts_with(const char* string, const char* start);
+ // Test to see if UTF-8 string begins with start char. If so, return non-NULL
+ // address of remaining portion of string. Otherwise, return NULL. Used
+ // to test a character of a path without copying.
+ inline static const char* starts_with(const char* string, const char ch) {
+ return *string == ch ? string + 1 : NULL;
+ }
};
-// Manage image file location attribute streams. Within an image, a location's
+// Manage image file location attribute data. Within an image, a location's
// attributes are compressed into a stream of bytes. An attribute stream is
// composed of individual attribute sequences. Each attribute sequence begins with
// a header byte containing the attribute 'kind' (upper 5 bits of header) and the
@@ -188,7 +215,7 @@
// stream.
// - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
// storing the resources. Thus, in an image this represents the number of bytes
-// after the directory.
+// after the index.
// - Currently, compressed resources are represented by having a non-zero
// ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the
// image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
@@ -198,17 +225,19 @@
// represented differently.
// - Package strings include trailing slash and extensions include prefix period.
//
-class ImageLocation {
+class ImageLocation VALUE_OBJ_CLASS_SPEC {
public:
- // Attribute kind enumeration.
- static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker
- static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base
- static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent
- static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension
- static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource
- static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource
- static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource
- static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds
+ enum {
+ ATTRIBUTE_END, // End of attribute stream marker
+ ATTRIBUTE_MODULE, // String table offset of module name
+ ATTRIBUTE_PARENT, // String table offset of resource path parent
+ ATTRIBUTE_BASE, // String table offset of resource path base
+ ATTRIBUTE_EXTENSION, // String table offset of resource path extension
+ ATTRIBUTE_OFFSET, // Container byte offset of resource
+ ATTRIBUTE_COMPRESSED, // In image byte size of the compressed resource
+ ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource
+ ATTRIBUTE_COUNT // Number of attribute kinds
+ };
private:
// Values of inflated attributes.
@@ -222,30 +251,43 @@
// Return the attribute kind.
inline static u1 attribute_kind(u1 data) {
u1 kind = data >> 3;
- assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
+ guarantee(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
return kind;
}
// Return the attribute length.
inline static u8 attribute_value(u1* data, u1 n) {
- assert(0 < n && n <= 8, "invalid attribute value length");
+ guarantee(0 < n && n <= 8, "invalid attribute value length");
u8 value = 0;
-
// Most significant bytes first.
for (u1 i = 0; i < n; i++) {
value <<= 8;
value |= data[i];
}
-
return value;
}
public:
- ImageLocation(u1* data);
+ ImageLocation() {
+ clear_data();
+ }
+
+ ImageLocation(u1* data) {
+ clear_data();
+ set_data(data);
+ }
+
+ // Inflates the attribute stream into individual values stored in the long
+ // array _attributes. This allows an attribute value to be quickly accessed by
+ // direct indexing. Unspecified values default to zero.
+ void set_data(u1* data);
+
+ // Zero all attribute values.
+ void clear_data();
// Retrieve an attribute value from the inflated array.
inline u8 get_attribute(u1 kind) const {
- assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
+ guarantee(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
return _attributes[kind];
}
@@ -255,89 +297,306 @@
}
};
-// Manage the image file.
-class ImageFile: public CHeapObj<mtClass> {
-private:
- // Image file marker.
- static const u4 IMAGE_MAGIC = 0xCAFEDADA;
- // Image file major version number.
- static const u2 MAJOR_VERSION = 0;
- // Image file minor version number.
- static const u2 MINOR_VERSION = 1;
+//
+// NOTE: needs revision.
+// Each loader requires set of module meta data to identify which modules and
+// packages are managed by that loader. Currently, there is one image file per
+// builtin loader, so only one module meta data resource per file.
+//
+// Each element in the module meta data is a native endian 4 byte integer. Note
+// that entries with zero offsets for string table entries should be ignored (
+// padding for hash table lookup.)
+//
+// Format:
+// Count of package to module entries
+// Count of module to package entries
+// Perfect Hash redirect table[Count of package to module entries]
+// Package to module entries[Count of package to module entries]
+// Offset to package name in string table
+// Offset to module name in string table
+// Perfect Hash redirect table[Count of module to package entries]
+// Module to package entries[Count of module to package entries]
+// Offset to module name in string table
+// Count of packages in module
+// Offset to first package in packages table
+// Packages[]
+// Offset to package name in string table
+//
+// Manage the image module meta data.
+class ImageModuleData : public CHeapObj<mtClass> {
+ class Header VALUE_OBJ_CLASS_SPEC {
+ private:
+ u4 _ptm_count; // Count of package to module entries
+ u4 _mtp_count; // Count of module to package entries
+ public:
+ inline u4 ptm_count(Endian* endian) const { return endian->get(_ptm_count); }
+ inline u4 mtp_count(Endian* endian) const { return endian->get(_mtp_count); }
+ };
- struct ImageHeader {
- u4 _magic; // Image file marker
- u2 _major_version; // Image file major version number
- u2 _minor_version; // Image file minor version number
- u4 _location_count; // Number of locations managed in index.
- u4 _locations_size; // Number of bytes in attribute table.
- u4 _strings_size; // Number of bytes in string table.
+ // Hashtable entry
+ class HashData VALUE_OBJ_CLASS_SPEC {
+ private:
+ u4 _name_offset; // Name offset in string table
+ public:
+ inline s4 name_offset(Endian* endian) const { return endian->get(_name_offset); }
+ };
+
+ // Package to module hashtable entry
+ class PTMData : public HashData {
+ private:
+ u4 _module_name_offset; // Module name offset in string table
+ public:
+ inline s4 module_name_offset(Endian* endian) const { return endian->get(_module_name_offset); }
+ };
+
+ // Module to package hashtable entry
+ class MTPData : public HashData {
+ private:
+ u4 _package_count; // Number of packages in module
+ u4 _package_offset; // Offset in package list
+ public:
+ inline u4 package_count(Endian* endian) const { return endian->get(_package_count); }
+ inline u4 package_offset(Endian* endian) const { return endian->get(_package_offset); }
};
- char* _name; // Name of image
- int _fd; // File descriptor
- bool _memory_mapped; // Is file memory mapped
- ImageHeader _header; // Image header
- u8 _index_size; // Total size of index
- u1* _index_data; // Raw index data
- s4* _redirect_table; // Perfect hash redirect table
- u4* _offsets_table; // Location offset table
- u1* _location_bytes; // Location attributes
- u1* _string_bytes; // String table
+ const ImageFileReader* _image_file; // Source image file
+ Endian* _endian; // Endian handler
+ ImageStrings _strings; // Image file strings
+ u1* _data; // Module data resource data
+ u8 _data_size; // Size of resource data
+ Header* _header; // Module data header
+ s4* _ptm_redirect; // Package to module hashtable redirect
+ PTMData* _ptm_data; // Package to module data
+ s4* _mtp_redirect; // Module to packages hashtable redirect
+ MTPData* _mtp_data; // Module to packages data
+ s4* _mtp_packages; // Package data (name offsets)
+
+ // Return a string from the string table.
+ inline const char* get_string(u4 offset) {
+ return _strings.get(offset);
+ }
+
+ inline u4 mtp_package(u4 index) {
+ return _endian->get(_mtp_packages[index]);
+ }
+
+public:
+ ImageModuleData(const ImageFileReader* image_file, const char* module_data_name);
+ ~ImageModuleData();
+
+ // Return the name of the module data resource.
+ static void module_data_name(char* buffer, const char* image_file_name);
+
+ // Return the module in which a package resides. Returns NULL if not found.
+ const char* package_to_module(const char* package_name);
+
+ // Returns all the package names in a module. Returns NULL if module not found.
+ GrowableArray<const char*>* module_to_packages(const char* module_name);
+};
+
+// Image file header, starting at offset 0.
+class ImageHeader VALUE_OBJ_CLASS_SPEC {
+private:
+ u4 _magic; // Image file marker
+ u4 _version; // Image file major version number
+ u4 _flags; // Image file flags
+ u4 _resource_count; // Number of resources in file
+ u4 _table_length; // Number of slots in index tables
+ u4 _locations_size; // Number of bytes in attribute table
+ u4 _strings_size; // Number of bytes in string table
+
+public:
+ u4 magic() const { return _magic; }
+ u4 magic(Endian* endian) const { return endian->get(_magic); }
+ void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); }
+
+ u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; }
+ u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; }
+ void set_version(Endian* endian, u4 major_version, u4 minor_version) {
+ return endian->set(_version, major_version << 16 | minor_version);
+ }
+
+ u4 flags(Endian* endian) const { return endian->get(_flags); }
+ void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); }
+
+ u4 resource_count(Endian* endian) const { return endian->get(_resource_count); }
+ void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); }
+
+ u4 table_length(Endian* endian) const { return endian->get(_table_length); }
+ void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); }
+
+ u4 locations_size(Endian* endian) const { return endian->get(_locations_size); }
+ void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); }
+
+ u4 strings_size(Endian* endian) const { return endian->get(_strings_size); }
+ void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); }
+};
+
+// Max path length limit independent of platform. Windows max path is 1024,
+// other platforms use 4096. The JCK fails several tests when 1024 is used.
+#define IMAGE_MAX_PATH 4096
+
+// Manage the image file.
+// ImageFileReader manages the content of an image file.
+// Initially, the header of the image file is read for validation. If valid,
+// values in the header are used calculate the size of the image index. The
+// index is then memory mapped to allow load on demand and sharing. The
+// -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.)
+// An image can be used by Hotspot and multiple reference points in the JDK, thus
+// it is desirable to share a reader. To accomodate sharing, a share table is
+// defined (see ImageFileReaderTable in imageFile.cpp) To track the number of
+// uses, ImageFileReader keeps a use count (_use). Use is incremented when
+// 'opened' by reference point and decremented when 'closed'. Use of zero
+// leads the ImageFileReader to be actually closed and discarded.
+class ImageFileReader : public CHeapObj<mtClass> {
+private:
+ // Manage a number of image files such that an image can be shared across
+ // multiple uses (ex. loader.)
+ static GrowableArray<ImageFileReader*>* _reader_table;
+
+ char* _name; // Name of image
+ s4 _use; // Use count
+ int _fd; // File descriptor
+ Endian* _endian; // Endian handler
+ u8 _file_size; // File size in bytes
+ ImageHeader _header; // Image header
+ size_t _index_size; // Total size of index
+ u1* _index_data; // Raw index data
+ s4* _redirect_table; // Perfect hash redirect table
+ u4* _offsets_table; // Location offset table
+ u1* _location_bytes; // Location attributes
+ u1* _string_bytes; // String table
+
+ ImageFileReader(const char* name, bool big_endian);
+ ~ImageFileReader();
// Compute number of bytes in image file index.
inline u8 index_size() {
return sizeof(ImageHeader) +
- _header._location_count * sizeof(u4) * 2 +
- _header._locations_size +
- _header._strings_size;
+ table_length() * sizeof(u4) * 2 + locations_size() + strings_size();
}
public:
- ImageFile(const char* name);
- ~ImageFile();
+ enum {
+ // Image file marker.
+ IMAGE_MAGIC = 0xCAFEDADA,
+ // Endian inverted Image file marker.
+ IMAGE_MAGIC_INVERT = 0xDADAFECA,
+ // Image file major version number.
+ MAJOR_VERSION = 1,
+ // Image file minor version number.
+ MINOR_VERSION = 0
+ };
+
+ // Open an image file, reuse structure if file already open.
+ static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian());
- // Open image file for access.
+ // Close an image file if the file is not in use elsewhere.
+ static void close(ImageFileReader *reader);
+
+ // Return an id for the specifed ImageFileReader.
+ static u8 readerToID(ImageFileReader *reader);
+
+ // Validate the image id.
+ static bool idCheck(u8 id);
+
+ // Return an id for the specifed ImageFileReader.
+ static ImageFileReader* idToReader(u8 id);
+
+ // Open image file for read access.
bool open();
+
// Close image file.
void close();
+ // Read directly from the file.
+ bool read_at(u1* data, u8 size, u8 offset) const;
+
+ inline Endian* endian() const { return _endian; }
+
// Retrieve name of image file.
inline const char* name() const {
return _name;
}
+ // Retrieve size of image file.
+ inline u8 file_size() const {
+ return _file_size;
+ }
+
+ // Return first address of index data.
+ inline u1* get_index_address() const {
+ return _index_data;
+ }
+
+ // Return first address of resource data.
+ inline u1* get_data_address() const {
+ return _index_data + _index_size;
+ }
+
+ // Get the size of the index data.
+ size_t get_index_size() const {
+ return _index_size;
+ }
+
+ inline u4 table_length() const {
+ return _header.table_length(_endian);
+ }
+
+ inline u4 locations_size() const {
+ return _header.locations_size(_endian);
+ }
+
+ inline u4 strings_size()const {
+ return _header.strings_size(_endian);
+ }
+
+ inline u4* offsets_table() const {
+ return _offsets_table;
+ }
+
+ // Increment use count.
+ inline void inc_use() {
+ _use++;
+ }
+
+ // Decrement use count.
+ inline bool dec_use() {
+ return --_use == 0;
+ }
+
// Return a string table accessor.
inline const ImageStrings get_strings() const {
- return ImageStrings(_string_bytes, _header._strings_size);
+ return ImageStrings(_string_bytes, _header.strings_size(_endian));
}
- // Return number of locations in image file index.
- inline u4 get_location_count() const {
- return _header._location_count;
+ // Return location attribute stream at offset.
+ inline u1* get_location_offset_data(u4 offset) const {
+ guarantee((u4)offset < _header.locations_size(_endian),
+ "offset exceeds location attributes size");
+ return offset != 0 ? _location_bytes + offset : NULL;
}
// Return location attribute stream for location i.
- inline u1* get_location_data(u4 i) const {
- u4 offset = _offsets_table[i];
+ inline u1* get_location_data(u4 index) const {
+ guarantee((u4)index < _header.table_length(_endian),
+ "index exceeds location count");
+ u4 offset = _endian->get(_offsets_table[index]);
- return offset != 0 ? _location_bytes + offset : NULL;
+ return get_location_offset_data(offset);
}
- // Return the attribute stream for a named resourced.
- u1* find_location_data(const char* path) const;
+ // Find the location attributes associated with the path. Returns true if
+ // the location is found, false otherwise.
+ bool find_location(const char* path, ImageLocation& location) const;
+
+ // Assemble the location path.
+ void location_path(ImageLocation& location, char* path, size_t max) const;
// Verify that a found location matches the supplied path.
bool verify_location(ImageLocation& location, const char* path) const;
- // Return the resource for the supplied location info.
- u1* get_resource(ImageLocation& location) const;
-
- // Return the resource associated with the path else NULL if not found.
- void get_resource(const char* path, u1*& buffer, u8& size) const;
-
- // Return an array of packages for a given module
- GrowableArray<const char*>* packages(const char* name);
+ // Return the resource for the supplied path.
+ void get_resource(ImageLocation& location, u1* uncompressed_data) const;
};
-
#endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP