27562
|
1 |
/*
|
|
2 |
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation.
|
|
8 |
*
|
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
13 |
* accompanied this code).
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License version
|
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 |
*
|
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
20 |
* or visit www.oracle.com if you need additional information or have any
|
|
21 |
* questions.
|
|
22 |
*
|
|
23 |
*/
|
|
24 |
|
|
25 |
#ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP
|
|
26 |
#define SHARE_VM_CLASSFILE_IMAGEFILE_HPP
|
|
27 |
|
|
28 |
#include "classfile/classLoader.hpp"
|
|
29 |
#include "memory/allocation.hpp"
|
|
30 |
#include "memory/allocation.inline.hpp"
|
|
31 |
#include "utilities/globalDefinitions.hpp"
|
|
32 |
|
|
33 |
// Image files are an alternate file format for storing classes and resources. The
|
|
34 |
// goal is to supply file access which is faster and smaller that the jar format.
|
|
35 |
// It should be noted that unlike jars information stored in an image is in native
|
|
36 |
// endian format. This allows the image to be memory mapped into memory without
|
|
37 |
// endian translation. This also means that images are platform dependent.
|
|
38 |
//
|
|
39 |
// Image files are structured as three sections;
|
|
40 |
//
|
|
41 |
// +-----------+
|
|
42 |
// | Header |
|
|
43 |
// +-----------+
|
|
44 |
// | |
|
|
45 |
// | Directory |
|
|
46 |
// | |
|
|
47 |
// +-----------+
|
|
48 |
// | |
|
|
49 |
// | |
|
|
50 |
// | Resources |
|
|
51 |
// | |
|
|
52 |
// | |
|
|
53 |
// +-----------+
|
|
54 |
//
|
|
55 |
// The header contains information related to identification and description of
|
|
56 |
// contents.
|
|
57 |
//
|
|
58 |
// +-------------------------+
|
|
59 |
// | Magic (0xCAFEDADA) |
|
|
60 |
// +------------+------------+
|
|
61 |
// | Major Vers | Minor Vers |
|
|
62 |
// +------------+------------+
|
|
63 |
// | Location Count |
|
|
64 |
// +-------------------------+
|
|
65 |
// | Attributes Size |
|
|
66 |
// +-------------------------+
|
|
67 |
// | Strings Size |
|
|
68 |
// +-------------------------+
|
|
69 |
//
|
|
70 |
// Magic - means of identifying validity of the file. This avoids requiring a
|
|
71 |
// special file extension.
|
|
72 |
// Major vers, minor vers - differences in version numbers indicate structural
|
|
73 |
// changes in the image.
|
|
74 |
// Location count - number of locations/resources in the file. This count is also
|
|
75 |
// the length of lookup tables used in the directory.
|
|
76 |
// Attributes size - number of bytes in the region used to store location attribute
|
|
77 |
// streams.
|
|
78 |
// Strings size - the size of the region used to store strings used by the
|
|
79 |
// directory and meta data.
|
|
80 |
//
|
|
81 |
// The directory contains information related to resource lookup. The algorithm
|
|
82 |
// used for lookup is "A Practical Minimal Perfect Hashing Method"
|
|
83 |
// (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
|
|
84 |
// in the form <package>/<base>.<extension> return the resource location
|
|
85 |
// information;
|
|
86 |
//
|
|
87 |
// redirectIndex = hash(path, DEFAULT_SEED) % count;
|
|
88 |
// redirect = redirectTable[redirectIndex];
|
|
89 |
// if (redirect == 0) return not found;
|
|
90 |
// locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count;
|
|
91 |
// location = locationTable[locationIndex];
|
|
92 |
// if (!verify(location, path)) return not found;
|
|
93 |
// return location;
|
|
94 |
//
|
|
95 |
// Note: The hash function takes an initial seed value. A different seed value
|
|
96 |
// usually returns a different result for strings that would otherwise collide with
|
|
97 |
// other seeds. The verify function guarantees the found resource location is
|
|
98 |
// indeed the resource we are looking for.
|
|
99 |
//
|
|
100 |
// The following is the format of the directory;
|
|
101 |
//
|
|
102 |
// +-------------------+
|
|
103 |
// | Redirect Table |
|
|
104 |
// +-------------------+
|
|
105 |
// | Attribute Offsets |
|
|
106 |
// +-------------------+
|
|
107 |
// | Attribute Data |
|
|
108 |
// +-------------------+
|
|
109 |
// | Strings |
|
|
110 |
// +-------------------+
|
|
111 |
//
|
|
112 |
// Redirect Table - Array of 32-bit signed values representing actions that
|
|
113 |
// should take place for hashed strings that map to that
|
|
114 |
// value. Negative values indicate no hash collision and can be
|
|
115 |
// quickly converted to indices into attribute offsets. Positive
|
|
116 |
// values represent a new seed for hashing an index into attribute
|
|
117 |
// offsets. Zero indicates not found.
|
|
118 |
// Attribute Offsets - Array of 32-bit unsigned values representing offsets into
|
|
119 |
// attribute data. Attribute offsets can be iterated to do a
|
|
120 |
// full survey of resources in the image.
|
|
121 |
// Attribute Data - Bytes representing compact attribute data for locations. (See
|
|
122 |
// comments in ImageLocation.)
|
|
123 |
// Strings - Collection of zero terminated UTF-8 strings used by the directory and
|
|
124 |
// image meta data. Each string is accessed by offset. Each string is
|
|
125 |
// unique. Offset zero is reserved for the empty string.
|
|
126 |
//
|
|
127 |
// Note that the memory mapped directory assumes 32 bit alignment of the image
|
|
128 |
// header, the redirect table and the attribute offsets.
|
|
129 |
//
|
|
130 |
|
|
131 |
|
|
132 |
// Manage image file string table.
|
|
133 |
class ImageStrings {
|
|
134 |
private:
|
|
135 |
// Data bytes for strings.
|
|
136 |
u1* _data;
|
|
137 |
// Number of bytes in the string table.
|
|
138 |
u4 _size;
|
|
139 |
|
|
140 |
public:
|
|
141 |
// Prime used to generate hash for Perfect Hashing.
|
|
142 |
static const u4 HASH_MULTIPLIER = 0x01000193;
|
|
143 |
|
|
144 |
ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
|
|
145 |
|
|
146 |
// Return the UTF-8 string beginning at offset.
|
|
147 |
inline const char* get(u4 offset) const {
|
|
148 |
assert(offset < _size, "offset exceeds string table size");
|
|
149 |
return (const char*)(_data + offset);
|
|
150 |
}
|
|
151 |
|
|
152 |
// Compute the Perfect Hashing hash code for the supplied string.
|
|
153 |
inline static u4 hash_code(const char* string) {
|
|
154 |
return hash_code(string, HASH_MULTIPLIER);
|
|
155 |
}
|
|
156 |
|
|
157 |
// Compute the Perfect Hashing hash code for the supplied string, starting at seed.
|
|
158 |
static u4 hash_code(const char* string, u4 seed);
|
|
159 |
|
|
160 |
// Test to see if string begins with start. If so returns remaining portion
|
|
161 |
// of string. Otherwise, NULL. Used to test sections of a path without
|
|
162 |
// copying.
|
|
163 |
static const char* starts_with(const char* string, const char* start);
|
|
164 |
|
|
165 |
};
|
|
166 |
|
|
167 |
// Manage image file location attribute streams. Within an image, a location's
|
|
168 |
// attributes are compressed into a stream of bytes. An attribute stream is
|
|
169 |
// composed of individual attribute sequences. Each attribute sequence begins with
|
|
170 |
// a header byte containing the attribute 'kind' (upper 5 bits of header) and the
|
|
171 |
// 'length' less 1 (lower 3 bits of header) of bytes that follow containing the
|
|
172 |
// attribute value. Attribute values present as most significant byte first.
|
|
173 |
//
|
|
174 |
// Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22
|
|
175 |
// (kind = 4, length = 3), 0x03, 0x35, 0x62.
|
|
176 |
//
|
|
177 |
// An attribute stream is terminated with a header kind of ATTRIBUTE_END (header
|
|
178 |
// byte of zero.)
|
|
179 |
//
|
|
180 |
// ImageLocation inflates the stream into individual values stored in the long
|
|
181 |
// array _attributes. This allows an attribute value can be quickly accessed by
|
|
182 |
// direct indexing. Unspecified values default to zero.
|
|
183 |
//
|
|
184 |
// Notes:
|
|
185 |
// - Even though ATTRIBUTE_END is used to mark the end of the attribute stream,
|
|
186 |
// streams will contain zero byte values to represent lesser significant bits.
|
|
187 |
// Thus, detecting a zero byte is not sufficient to detect the end of an attribute
|
|
188 |
// stream.
|
|
189 |
// - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
|
|
190 |
// storing the resources. Thus, in an image this represents the number of bytes
|
|
191 |
// after the directory.
|
|
192 |
// - Currently, compressed resources are represented by having a non-zero
|
|
193 |
// ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the
|
|
194 |
// image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
|
|
195 |
// inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value
|
|
196 |
// of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and
|
|
197 |
// in memory. In the future, additional compression techniques will be used and
|
|
198 |
// represented differently.
|
|
199 |
// - Package strings include trailing slash and extensions include prefix period.
|
|
200 |
//
|
|
201 |
class ImageLocation {
|
|
202 |
public:
|
|
203 |
// Attribute kind enumeration.
|
|
204 |
static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker
|
|
205 |
static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base
|
|
206 |
static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent
|
|
207 |
static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension
|
|
208 |
static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource
|
|
209 |
static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource
|
|
210 |
static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource
|
|
211 |
static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds
|
|
212 |
|
|
213 |
private:
|
|
214 |
// Values of inflated attributes.
|
|
215 |
u8 _attributes[ATTRIBUTE_COUNT];
|
|
216 |
|
|
217 |
// Return the attribute value number of bytes.
|
|
218 |
inline static u1 attribute_length(u1 data) {
|
|
219 |
return (data & 0x7) + 1;
|
|
220 |
}
|
|
221 |
|
|
222 |
// Return the attribute kind.
|
|
223 |
inline static u1 attribute_kind(u1 data) {
|
|
224 |
u1 kind = data >> 3;
|
|
225 |
assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
|
|
226 |
return kind;
|
|
227 |
}
|
|
228 |
|
|
229 |
// Return the attribute length.
|
|
230 |
inline static u8 attribute_value(u1* data, u1 n) {
|
|
231 |
assert(0 < n && n <= 8, "invalid attribute value length");
|
|
232 |
u8 value = 0;
|
|
233 |
|
|
234 |
// Most significant bytes first.
|
|
235 |
for (u1 i = 0; i < n; i++) {
|
|
236 |
value <<= 8;
|
|
237 |
value |= data[i];
|
|
238 |
}
|
|
239 |
|
|
240 |
return value;
|
|
241 |
}
|
|
242 |
|
|
243 |
public:
|
|
244 |
ImageLocation(u1* data);
|
|
245 |
|
|
246 |
// Retrieve an attribute value from the inflated array.
|
|
247 |
inline u8 get_attribute(u1 kind) const {
|
|
248 |
assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
|
|
249 |
return _attributes[kind];
|
|
250 |
}
|
|
251 |
|
|
252 |
// Retrieve an attribute string value from the inflated array.
|
|
253 |
inline const char* get_attribute(u4 kind, const ImageStrings& strings) const {
|
|
254 |
return strings.get((u4)get_attribute(kind));
|
|
255 |
}
|
|
256 |
};
|
|
257 |
|
|
258 |
// Manage the image file.
|
|
259 |
class ImageFile: public CHeapObj<mtClass> {
|
|
260 |
private:
|
|
261 |
// Image file marker.
|
|
262 |
static const u4 IMAGE_MAGIC = 0xCAFEDADA;
|
|
263 |
// Image file major version number.
|
|
264 |
static const u2 MAJOR_VERSION = 0;
|
|
265 |
// Image file minor version number.
|
|
266 |
static const u2 MINOR_VERSION = 1;
|
|
267 |
|
|
268 |
struct ImageHeader {
|
|
269 |
u4 _magic; // Image file marker
|
|
270 |
u2 _major_version; // Image file major version number
|
|
271 |
u2 _minor_version; // Image file minor version number
|
|
272 |
u4 _location_count; // Number of locations managed in index.
|
|
273 |
u4 _locations_size; // Number of bytes in attribute table.
|
|
274 |
u4 _strings_size; // Number of bytes in string table.
|
|
275 |
};
|
|
276 |
|
|
277 |
char* _name; // Name of image
|
|
278 |
int _fd; // File descriptor
|
|
279 |
bool _memory_mapped; // Is file memory mapped
|
|
280 |
ImageHeader _header; // Image header
|
|
281 |
u8 _index_size; // Total size of index
|
|
282 |
u1* _index_data; // Raw index data
|
|
283 |
s4* _redirect_table; // Perfect hash redirect table
|
|
284 |
u4* _offsets_table; // Location offset table
|
|
285 |
u1* _location_bytes; // Location attributes
|
|
286 |
u1* _string_bytes; // String table
|
|
287 |
|
|
288 |
// Compute number of bytes in image file index.
|
|
289 |
inline u8 index_size() {
|
|
290 |
return sizeof(ImageHeader) +
|
|
291 |
_header._location_count * sizeof(u4) * 2 +
|
|
292 |
_header._locations_size +
|
|
293 |
_header._strings_size;
|
|
294 |
}
|
|
295 |
|
|
296 |
public:
|
|
297 |
ImageFile(const char* name);
|
|
298 |
~ImageFile();
|
|
299 |
|
|
300 |
// Open image file for access.
|
|
301 |
bool open();
|
|
302 |
// Close image file.
|
|
303 |
void close();
|
|
304 |
|
|
305 |
// Retrieve name of image file.
|
|
306 |
inline const char* name() const {
|
|
307 |
return _name;
|
|
308 |
}
|
|
309 |
|
|
310 |
// Return a string table accessor.
|
|
311 |
inline const ImageStrings get_strings() const {
|
|
312 |
return ImageStrings(_string_bytes, _header._strings_size);
|
|
313 |
}
|
|
314 |
|
|
315 |
// Return number of locations in image file index.
|
|
316 |
inline u4 get_location_count() const {
|
|
317 |
return _header._location_count;
|
|
318 |
}
|
|
319 |
|
|
320 |
// Return location attribute stream for location i.
|
|
321 |
inline u1* get_location_data(u4 i) const {
|
|
322 |
u4 offset = _offsets_table[i];
|
|
323 |
|
|
324 |
return offset != 0 ? _location_bytes + offset : NULL;
|
|
325 |
}
|
|
326 |
|
|
327 |
// Return the attribute stream for a named resourced.
|
|
328 |
u1* find_location_data(const char* path) const;
|
|
329 |
|
|
330 |
// Verify that a found location matches the supplied path.
|
|
331 |
bool verify_location(ImageLocation& location, const char* path) const;
|
|
332 |
|
|
333 |
// Return the resource for the supplied location info.
|
|
334 |
u1* get_resource(ImageLocation& location) const;
|
|
335 |
|
|
336 |
// Return the resource associated with the path else NULL if not found.
|
|
337 |
void get_resource(const char* path, u1*& buffer, u8& size) const;
|
|
338 |
|
|
339 |
// Return an array of packages for a given module
|
|
340 |
GrowableArray<const char*>* packages(const char* name);
|
|
341 |
};
|
|
342 |
|
|
343 |
#endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP
|