|
1 /* |
|
2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 * |
|
23 */ |
|
24 |
|
25 #ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP |
|
26 #define SHARE_VM_CLASSFILE_IMAGEFILE_HPP |
|
27 |
|
28 #include "classfile/classLoader.hpp" |
|
29 #include "memory/allocation.hpp" |
|
30 #include "memory/allocation.inline.hpp" |
|
31 #include "utilities/globalDefinitions.hpp" |
|
32 |
|
33 // Image files are an alternate file format for storing classes and resources. The |
|
34 // goal is to supply file access which is faster and smaller that the jar format. |
|
35 // It should be noted that unlike jars information stored in an image is in native |
|
36 // endian format. This allows the image to be memory mapped into memory without |
|
37 // endian translation. This also means that images are platform dependent. |
|
38 // |
|
39 // Image files are structured as three sections; |
|
40 // |
|
41 // +-----------+ |
|
42 // | Header | |
|
43 // +-----------+ |
|
44 // | | |
|
45 // | Directory | |
|
46 // | | |
|
47 // +-----------+ |
|
48 // | | |
|
49 // | | |
|
50 // | Resources | |
|
51 // | | |
|
52 // | | |
|
53 // +-----------+ |
|
54 // |
|
55 // The header contains information related to identification and description of |
|
56 // contents. |
|
57 // |
|
58 // +-------------------------+ |
|
59 // | Magic (0xCAFEDADA) | |
|
60 // +------------+------------+ |
|
61 // | Major Vers | Minor Vers | |
|
62 // +------------+------------+ |
|
63 // | Location Count | |
|
64 // +-------------------------+ |
|
65 // | Attributes Size | |
|
66 // +-------------------------+ |
|
67 // | Strings Size | |
|
68 // +-------------------------+ |
|
69 // |
|
70 // Magic - means of identifying validity of the file. This avoids requiring a |
|
71 // special file extension. |
|
72 // Major vers, minor vers - differences in version numbers indicate structural |
|
73 // changes in the image. |
|
74 // Location count - number of locations/resources in the file. This count is also |
|
75 // the length of lookup tables used in the directory. |
|
76 // Attributes size - number of bytes in the region used to store location attribute |
|
77 // streams. |
|
78 // Strings size - the size of the region used to store strings used by the |
|
79 // directory and meta data. |
|
80 // |
|
81 // The directory contains information related to resource lookup. The algorithm |
|
82 // used for lookup is "A Practical Minimal Perfect Hashing Method" |
|
83 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string |
|
84 // in the form <package>/<base>.<extension> return the resource location |
|
85 // information; |
|
86 // |
|
87 // redirectIndex = hash(path, DEFAULT_SEED) % count; |
|
88 // redirect = redirectTable[redirectIndex]; |
|
89 // if (redirect == 0) return not found; |
|
90 // locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count; |
|
91 // location = locationTable[locationIndex]; |
|
92 // if (!verify(location, path)) return not found; |
|
93 // return location; |
|
94 // |
|
95 // Note: The hash function takes an initial seed value. A different seed value |
|
96 // usually returns a different result for strings that would otherwise collide with |
|
97 // other seeds. The verify function guarantees the found resource location is |
|
98 // indeed the resource we are looking for. |
|
99 // |
|
100 // The following is the format of the directory; |
|
101 // |
|
102 // +-------------------+ |
|
103 // | Redirect Table | |
|
104 // +-------------------+ |
|
105 // | Attribute Offsets | |
|
106 // +-------------------+ |
|
107 // | Attribute Data | |
|
108 // +-------------------+ |
|
109 // | Strings | |
|
110 // +-------------------+ |
|
111 // |
|
112 // Redirect Table - Array of 32-bit signed values representing actions that |
|
113 // should take place for hashed strings that map to that |
|
114 // value. Negative values indicate no hash collision and can be |
|
115 // quickly converted to indices into attribute offsets. Positive |
|
116 // values represent a new seed for hashing an index into attribute |
|
117 // offsets. Zero indicates not found. |
|
118 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into |
|
119 // attribute data. Attribute offsets can be iterated to do a |
|
120 // full survey of resources in the image. |
|
121 // Attribute Data - Bytes representing compact attribute data for locations. (See |
|
122 // comments in ImageLocation.) |
|
123 // Strings - Collection of zero terminated UTF-8 strings used by the directory and |
|
124 // image meta data. Each string is accessed by offset. Each string is |
|
125 // unique. Offset zero is reserved for the empty string. |
|
126 // |
|
127 // Note that the memory mapped directory assumes 32 bit alignment of the image |
|
128 // header, the redirect table and the attribute offsets. |
|
129 // |
|
130 |
|
131 |
|
132 // Manage image file string table. |
|
133 class ImageStrings { |
|
134 private: |
|
135 // Data bytes for strings. |
|
136 u1* _data; |
|
137 // Number of bytes in the string table. |
|
138 u4 _size; |
|
139 |
|
140 public: |
|
141 // Prime used to generate hash for Perfect Hashing. |
|
142 static const u4 HASH_MULTIPLIER = 0x01000193; |
|
143 |
|
144 ImageStrings(u1* data, u4 size) : _data(data), _size(size) {} |
|
145 |
|
146 // Return the UTF-8 string beginning at offset. |
|
147 inline const char* get(u4 offset) const { |
|
148 assert(offset < _size, "offset exceeds string table size"); |
|
149 return (const char*)(_data + offset); |
|
150 } |
|
151 |
|
152 // Compute the Perfect Hashing hash code for the supplied string. |
|
153 inline static u4 hash_code(const char* string) { |
|
154 return hash_code(string, HASH_MULTIPLIER); |
|
155 } |
|
156 |
|
157 // Compute the Perfect Hashing hash code for the supplied string, starting at seed. |
|
158 static u4 hash_code(const char* string, u4 seed); |
|
159 |
|
160 // Test to see if string begins with start. If so returns remaining portion |
|
161 // of string. Otherwise, NULL. Used to test sections of a path without |
|
162 // copying. |
|
163 static const char* starts_with(const char* string, const char* start); |
|
164 |
|
165 }; |
|
166 |
|
167 // Manage image file location attribute streams. Within an image, a location's |
|
168 // attributes are compressed into a stream of bytes. An attribute stream is |
|
169 // composed of individual attribute sequences. Each attribute sequence begins with |
|
170 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the |
|
171 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the |
|
172 // attribute value. Attribute values present as most significant byte first. |
|
173 // |
|
174 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22 |
|
175 // (kind = 4, length = 3), 0x03, 0x35, 0x62. |
|
176 // |
|
177 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header |
|
178 // byte of zero.) |
|
179 // |
|
180 // ImageLocation inflates the stream into individual values stored in the long |
|
181 // array _attributes. This allows an attribute value can be quickly accessed by |
|
182 // direct indexing. Unspecified values default to zero. |
|
183 // |
|
184 // Notes: |
|
185 // - Even though ATTRIBUTE_END is used to mark the end of the attribute stream, |
|
186 // streams will contain zero byte values to represent lesser significant bits. |
|
187 // Thus, detecting a zero byte is not sufficient to detect the end of an attribute |
|
188 // stream. |
|
189 // - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region |
|
190 // storing the resources. Thus, in an image this represents the number of bytes |
|
191 // after the directory. |
|
192 // - Currently, compressed resources are represented by having a non-zero |
|
193 // ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the |
|
194 // image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the |
|
195 // inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value |
|
196 // of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and |
|
197 // in memory. In the future, additional compression techniques will be used and |
|
198 // represented differently. |
|
199 // - Package strings include trailing slash and extensions include prefix period. |
|
200 // |
|
201 class ImageLocation { |
|
202 public: |
|
203 // Attribute kind enumeration. |
|
204 static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker |
|
205 static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base |
|
206 static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent |
|
207 static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension |
|
208 static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource |
|
209 static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource |
|
210 static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource |
|
211 static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds |
|
212 |
|
213 private: |
|
214 // Values of inflated attributes. |
|
215 u8 _attributes[ATTRIBUTE_COUNT]; |
|
216 |
|
217 // Return the attribute value number of bytes. |
|
218 inline static u1 attribute_length(u1 data) { |
|
219 return (data & 0x7) + 1; |
|
220 } |
|
221 |
|
222 // Return the attribute kind. |
|
223 inline static u1 attribute_kind(u1 data) { |
|
224 u1 kind = data >> 3; |
|
225 assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind"); |
|
226 return kind; |
|
227 } |
|
228 |
|
229 // Return the attribute length. |
|
230 inline static u8 attribute_value(u1* data, u1 n) { |
|
231 assert(0 < n && n <= 8, "invalid attribute value length"); |
|
232 u8 value = 0; |
|
233 |
|
234 // Most significant bytes first. |
|
235 for (u1 i = 0; i < n; i++) { |
|
236 value <<= 8; |
|
237 value |= data[i]; |
|
238 } |
|
239 |
|
240 return value; |
|
241 } |
|
242 |
|
243 public: |
|
244 ImageLocation(u1* data); |
|
245 |
|
246 // Retrieve an attribute value from the inflated array. |
|
247 inline u8 get_attribute(u1 kind) const { |
|
248 assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind"); |
|
249 return _attributes[kind]; |
|
250 } |
|
251 |
|
252 // Retrieve an attribute string value from the inflated array. |
|
253 inline const char* get_attribute(u4 kind, const ImageStrings& strings) const { |
|
254 return strings.get((u4)get_attribute(kind)); |
|
255 } |
|
256 }; |
|
257 |
|
258 // Manage the image file. |
|
259 class ImageFile: public CHeapObj<mtClass> { |
|
260 private: |
|
261 // Image file marker. |
|
262 static const u4 IMAGE_MAGIC = 0xCAFEDADA; |
|
263 // Image file major version number. |
|
264 static const u2 MAJOR_VERSION = 0; |
|
265 // Image file minor version number. |
|
266 static const u2 MINOR_VERSION = 1; |
|
267 |
|
268 struct ImageHeader { |
|
269 u4 _magic; // Image file marker |
|
270 u2 _major_version; // Image file major version number |
|
271 u2 _minor_version; // Image file minor version number |
|
272 u4 _location_count; // Number of locations managed in index. |
|
273 u4 _locations_size; // Number of bytes in attribute table. |
|
274 u4 _strings_size; // Number of bytes in string table. |
|
275 }; |
|
276 |
|
277 char* _name; // Name of image |
|
278 int _fd; // File descriptor |
|
279 bool _memory_mapped; // Is file memory mapped |
|
280 ImageHeader _header; // Image header |
|
281 u8 _index_size; // Total size of index |
|
282 u1* _index_data; // Raw index data |
|
283 s4* _redirect_table; // Perfect hash redirect table |
|
284 u4* _offsets_table; // Location offset table |
|
285 u1* _location_bytes; // Location attributes |
|
286 u1* _string_bytes; // String table |
|
287 |
|
288 // Compute number of bytes in image file index. |
|
289 inline u8 index_size() { |
|
290 return sizeof(ImageHeader) + |
|
291 _header._location_count * sizeof(u4) * 2 + |
|
292 _header._locations_size + |
|
293 _header._strings_size; |
|
294 } |
|
295 |
|
296 public: |
|
297 ImageFile(const char* name); |
|
298 ~ImageFile(); |
|
299 |
|
300 // Open image file for access. |
|
301 bool open(); |
|
302 // Close image file. |
|
303 void close(); |
|
304 |
|
305 // Retrieve name of image file. |
|
306 inline const char* name() const { |
|
307 return _name; |
|
308 } |
|
309 |
|
310 // Return a string table accessor. |
|
311 inline const ImageStrings get_strings() const { |
|
312 return ImageStrings(_string_bytes, _header._strings_size); |
|
313 } |
|
314 |
|
315 // Return number of locations in image file index. |
|
316 inline u4 get_location_count() const { |
|
317 return _header._location_count; |
|
318 } |
|
319 |
|
320 // Return location attribute stream for location i. |
|
321 inline u1* get_location_data(u4 i) const { |
|
322 u4 offset = _offsets_table[i]; |
|
323 |
|
324 return offset != 0 ? _location_bytes + offset : NULL; |
|
325 } |
|
326 |
|
327 // Return the attribute stream for a named resourced. |
|
328 u1* find_location_data(const char* path) const; |
|
329 |
|
330 // Verify that a found location matches the supplied path. |
|
331 bool verify_location(ImageLocation& location, const char* path) const; |
|
332 |
|
333 // Return the resource for the supplied location info. |
|
334 u1* get_resource(ImageLocation& location) const; |
|
335 |
|
336 // Return the resource associated with the path else NULL if not found. |
|
337 void get_resource(const char* path, u1*& buffer, u8& size) const; |
|
338 |
|
339 // Return an array of packages for a given module |
|
340 GrowableArray<const char*>* packages(const char* name); |
|
341 }; |
|
342 |
|
343 #endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP |