hotspot/src/share/vm/classfile/imageFile.hpp
changeset 27562 47f369e3c69c
child 31608 b5cb9a07591a
equal deleted inserted replaced
27561:7ead528de130 27562:47f369e3c69c
       
     1 /*
       
     2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  *
       
    23  */
       
    24 
       
    25 #ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP
       
    26 #define SHARE_VM_CLASSFILE_IMAGEFILE_HPP
       
    27 
       
    28 #include "classfile/classLoader.hpp"
       
    29 #include "memory/allocation.hpp"
       
    30 #include "memory/allocation.inline.hpp"
       
    31 #include "utilities/globalDefinitions.hpp"
       
    32 
       
    33 // Image files are an alternate file format for storing classes and resources. The
       
    34 // goal is to supply file access which is faster and smaller that the jar format.
       
    35 // It should be noted that unlike jars information stored in an image is in native
       
    36 // endian format. This allows the image to be memory mapped into memory without
       
    37 // endian translation.  This also means that images are platform dependent.
       
    38 //
       
    39 // Image files are structured as three sections;
       
    40 //
       
    41 //         +-----------+
       
    42 //         |  Header   |
       
    43 //         +-----------+
       
    44 //         |           |
       
    45 //         | Directory |
       
    46 //         |           |
       
    47 //         +-----------+
       
    48 //         |           |
       
    49 //         |           |
       
    50 //         | Resources |
       
    51 //         |           |
       
    52 //         |           |
       
    53 //         +-----------+
       
    54 //
       
    55 // The header contains information related to identification and description of
       
    56 // contents.
       
    57 //
       
    58 //         +-------------------------+
       
    59 //         |   Magic (0xCAFEDADA)    |
       
    60 //         +------------+------------+
       
    61 //         | Major Vers | Minor Vers |
       
    62 //         +------------+------------+
       
    63 //         |      Location Count     |
       
    64 //         +-------------------------+
       
    65 //         |      Attributes Size    |
       
    66 //         +-------------------------+
       
    67 //         |       Strings Size      |
       
    68 //         +-------------------------+
       
    69 //
       
    70 // Magic - means of identifying validity of the file.  This avoids requiring a
       
    71 //         special file extension.
       
    72 // Major vers, minor vers - differences in version numbers indicate structural
       
    73 //                          changes in the image.
       
    74 // Location count - number of locations/resources in the file.  This count is also
       
    75 //                  the length of lookup tables used in the directory.
       
    76 // Attributes size - number of bytes in the region used to store location attribute
       
    77 //                   streams.
       
    78 // Strings size - the size of the region used to store strings used by the
       
    79 //                directory and meta data.
       
    80 //
       
    81 // The directory contains information related to resource lookup. The algorithm
       
    82 // used for lookup is "A Practical Minimal Perfect Hashing Method"
       
    83 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
       
    84 // in the form <package>/<base>.<extension>  return the resource location
       
    85 // information;
       
    86 //
       
    87 //     redirectIndex = hash(path, DEFAULT_SEED) % count;
       
    88 //     redirect = redirectTable[redirectIndex];
       
    89 //     if (redirect == 0) return not found;
       
    90 //     locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count;
       
    91 //     location = locationTable[locationIndex];
       
    92 //     if (!verify(location, path)) return not found;
       
    93 //     return location;
       
    94 //
       
    95 // Note: The hash function takes an initial seed value.  A different seed value
       
    96 // usually returns a different result for strings that would otherwise collide with
       
    97 // other seeds. The verify function guarantees the found resource location is
       
    98 // indeed the resource we are looking for.
       
    99 //
       
   100 // The following is the format of the directory;
       
   101 //
       
   102 //         +-------------------+
       
   103 //         |   Redirect Table  |
       
   104 //         +-------------------+
       
   105 //         | Attribute Offsets |
       
   106 //         +-------------------+
       
   107 //         |   Attribute Data  |
       
   108 //         +-------------------+
       
   109 //         |      Strings      |
       
   110 //         +-------------------+
       
   111 //
       
   112 // Redirect Table - Array of 32-bit signed values representing actions that
       
   113 //                  should take place for hashed strings that map to that
       
   114 //                  value.  Negative values indicate no hash collision and can be
       
   115 //                  quickly converted to indices into attribute offsets.  Positive
       
   116 //                  values represent a new seed for hashing an index into attribute
       
   117 //                  offsets.  Zero indicates not found.
       
   118 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into
       
   119 //                     attribute data.  Attribute offsets can be iterated to do a
       
   120 //                     full survey of resources in the image.
       
   121 // Attribute Data - Bytes representing compact attribute data for locations. (See
       
   122 //                  comments in ImageLocation.)
       
   123 // Strings - Collection of zero terminated UTF-8 strings used by the directory and
       
   124 //           image meta data.  Each string is accessed by offset.  Each string is
       
   125 //           unique.  Offset zero is reserved for the empty string.
       
   126 //
       
   127 // Note that the memory mapped directory assumes 32 bit alignment of the image
       
   128 // header, the redirect table and the attribute offsets.
       
   129 //
       
   130 
       
   131 
       
   132 // Manage image file string table.
       
   133 class ImageStrings {
       
   134 private:
       
   135   // Data bytes for strings.
       
   136   u1* _data;
       
   137   // Number of bytes in the string table.
       
   138   u4 _size;
       
   139 
       
   140 public:
       
   141   // Prime used to generate hash for Perfect Hashing.
       
   142   static const u4 HASH_MULTIPLIER = 0x01000193;
       
   143 
       
   144   ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
       
   145 
       
   146   // Return the UTF-8 string beginning at offset.
       
   147   inline const char* get(u4 offset) const {
       
   148     assert(offset < _size, "offset exceeds string table size");
       
   149     return (const char*)(_data + offset);
       
   150   }
       
   151 
       
   152   // Compute the Perfect Hashing hash code for the supplied string.
       
   153   inline static u4 hash_code(const char* string) {
       
   154     return hash_code(string, HASH_MULTIPLIER);
       
   155   }
       
   156 
       
   157   // Compute the Perfect Hashing hash code for the supplied string, starting at seed.
       
   158   static u4 hash_code(const char* string, u4 seed);
       
   159 
       
   160   // Test to see if string begins with start.  If so returns remaining portion
       
   161   // of string.  Otherwise, NULL.  Used to test sections of a path without
       
   162   // copying.
       
   163   static const char* starts_with(const char* string, const char* start);
       
   164 
       
   165 };
       
   166 
       
   167 // Manage image file location attribute streams.  Within an image, a location's
       
   168 // attributes are compressed into a stream of bytes.  An attribute stream is
       
   169 // composed of individual attribute sequences.  Each attribute sequence begins with
       
   170 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the
       
   171 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the
       
   172 // attribute value.  Attribute values present as most significant byte first.
       
   173 //
       
   174 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22
       
   175 // (kind = 4, length = 3), 0x03, 0x35, 0x62.
       
   176 //
       
   177 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header
       
   178 // byte of zero.)
       
   179 //
       
   180 // ImageLocation inflates the stream into individual values stored in the long
       
   181 // array _attributes. This allows an attribute value can be quickly accessed by
       
   182 // direct indexing. Unspecified values default to zero.
       
   183 //
       
   184 // Notes:
       
   185 //  - Even though ATTRIBUTE_END is used to mark the end of the attribute stream,
       
   186 //    streams will contain zero byte values to represent lesser significant bits.
       
   187 //    Thus, detecting a zero byte is not sufficient to detect the end of an attribute
       
   188 //    stream.
       
   189 //  - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
       
   190 //    storing the resources.  Thus, in an image this represents the number of bytes
       
   191 //    after the directory.
       
   192 //  - Currently, compressed resources are represented by having a non-zero
       
   193 //    ATTRIBUTE_COMPRESSED value.  This represents the number of bytes stored in the
       
   194 //    image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
       
   195 //    inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value
       
   196 //    of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and
       
   197 //    in memory.  In the future, additional compression techniques will be used and
       
   198 //    represented differently.
       
   199 //  - Package strings include trailing slash and extensions include prefix period.
       
   200 //
       
   201 class ImageLocation {
       
   202 public:
       
   203   // Attribute kind enumeration.
       
   204   static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker
       
   205   static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base
       
   206   static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent
       
   207   static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension
       
   208   static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource
       
   209   static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource
       
   210   static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource
       
   211   static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds
       
   212 
       
   213 private:
       
   214   // Values of inflated attributes.
       
   215   u8 _attributes[ATTRIBUTE_COUNT];
       
   216 
       
   217   // Return the attribute value number of bytes.
       
   218   inline static u1 attribute_length(u1 data) {
       
   219     return (data & 0x7) + 1;
       
   220   }
       
   221 
       
   222   // Return the attribute kind.
       
   223   inline static u1 attribute_kind(u1 data) {
       
   224     u1 kind = data >> 3;
       
   225     assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
       
   226     return kind;
       
   227   }
       
   228 
       
   229   // Return the attribute length.
       
   230   inline static u8 attribute_value(u1* data, u1 n) {
       
   231     assert(0 < n && n <= 8, "invalid attribute value length");
       
   232     u8 value = 0;
       
   233 
       
   234     // Most significant bytes first.
       
   235     for (u1 i = 0; i < n; i++) {
       
   236       value <<= 8;
       
   237       value |= data[i];
       
   238     }
       
   239 
       
   240     return value;
       
   241   }
       
   242 
       
   243 public:
       
   244   ImageLocation(u1* data);
       
   245 
       
   246   // Retrieve an attribute value from the inflated array.
       
   247   inline u8 get_attribute(u1 kind) const {
       
   248     assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
       
   249     return _attributes[kind];
       
   250   }
       
   251 
       
   252   // Retrieve an attribute string value from the inflated array.
       
   253   inline const char* get_attribute(u4 kind, const ImageStrings& strings) const {
       
   254     return strings.get((u4)get_attribute(kind));
       
   255   }
       
   256 };
       
   257 
       
   258 // Manage the image file.
       
   259 class ImageFile: public CHeapObj<mtClass> {
       
   260 private:
       
   261   // Image file marker.
       
   262   static const u4 IMAGE_MAGIC = 0xCAFEDADA;
       
   263   // Image file major version number.
       
   264   static const u2 MAJOR_VERSION = 0;
       
   265   // Image file minor version number.
       
   266   static const u2 MINOR_VERSION = 1;
       
   267 
       
   268   struct ImageHeader {
       
   269     u4 _magic;          // Image file marker
       
   270     u2 _major_version;  // Image file major version number
       
   271     u2 _minor_version;  // Image file minor version number
       
   272     u4 _location_count; // Number of locations managed in index.
       
   273     u4 _locations_size; // Number of bytes in attribute table.
       
   274     u4 _strings_size;   // Number of bytes in string table.
       
   275   };
       
   276 
       
   277   char* _name;          // Name of image
       
   278   int _fd;              // File descriptor
       
   279   bool _memory_mapped;  // Is file memory mapped
       
   280   ImageHeader _header;  // Image header
       
   281   u8 _index_size;       // Total size of index
       
   282   u1* _index_data;      // Raw index data
       
   283   s4* _redirect_table;  // Perfect hash redirect table
       
   284   u4* _offsets_table;   // Location offset table
       
   285   u1* _location_bytes;  // Location attributes
       
   286   u1* _string_bytes;    // String table
       
   287 
       
   288   // Compute number of bytes in image file index.
       
   289   inline u8 index_size() {
       
   290     return sizeof(ImageHeader) +
       
   291     _header._location_count * sizeof(u4) * 2 +
       
   292     _header._locations_size +
       
   293     _header._strings_size;
       
   294   }
       
   295 
       
   296 public:
       
   297   ImageFile(const char* name);
       
   298   ~ImageFile();
       
   299 
       
   300   // Open image file for access.
       
   301   bool open();
       
   302   // Close image file.
       
   303   void close();
       
   304 
       
   305   // Retrieve name of image file.
       
   306   inline const char* name() const {
       
   307     return _name;
       
   308   }
       
   309 
       
   310   // Return a string table accessor.
       
   311   inline const ImageStrings get_strings() const {
       
   312     return ImageStrings(_string_bytes, _header._strings_size);
       
   313   }
       
   314 
       
   315   // Return number of locations in image file index.
       
   316   inline u4 get_location_count() const {
       
   317     return _header._location_count;
       
   318   }
       
   319 
       
   320   // Return location attribute stream for location i.
       
   321   inline u1* get_location_data(u4 i) const {
       
   322     u4 offset = _offsets_table[i];
       
   323 
       
   324     return offset != 0 ? _location_bytes + offset : NULL;
       
   325   }
       
   326 
       
   327   // Return the attribute stream for a named resourced.
       
   328   u1* find_location_data(const char* path) const;
       
   329 
       
   330   // Verify that a found location matches the supplied path.
       
   331   bool verify_location(ImageLocation& location, const char* path) const;
       
   332 
       
   333   // Return the resource for the supplied location info.
       
   334   u1* get_resource(ImageLocation& location) const;
       
   335 
       
   336   // Return the resource associated with the path else NULL if not found.
       
   337   void get_resource(const char* path, u1*& buffer, u8& size) const;
       
   338 
       
   339   // Return an array of packages for a given module
       
   340   GrowableArray<const char*>* packages(const char* name);
       
   341 };
       
   342 
       
   343 #endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP