author | martin |
Tue, 15 Sep 2015 21:56:04 -0700 | |
changeset 32649 | 2ee9017c7597 |
parent 28059 | e576535359cc |
permissions | -rw-r--r-- |
2 | 1 |
/* |
10115 | 2 |
* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package com.sun.java.util.jar.pack; |
|
27 |
||
7192
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
28 |
import java.io.IOException; |
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
29 |
import java.io.InputStream; |
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
30 |
import java.io.OutputStream; |
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
31 |
import java.util.HashMap; |
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
32 |
import java.util.Map; |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
33 |
import static com.sun.java.util.jar.pack.Constants.*; |
2 | 34 |
/** |
35 |
* Define the conversions between sequences of small integers and raw bytes. |
|
36 |
* This is a schema of encodings which incorporates varying lengths, |
|
37 |
* varying degrees of length variability, and varying amounts of signed-ness. |
|
38 |
* @author John Rose |
|
39 |
*/ |
|
10115 | 40 |
class Coding implements Comparable<Coding>, CodingMethod, Histogram.BitMetric { |
2 | 41 |
/* |
42 |
Coding schema for single integers, parameterized by (B,H,S): |
|
43 |
||
44 |
Let B in [1,5], H in [1,256], S in [0,3]. |
|
45 |
(S limit is arbitrary. B follows the 32-bit limit. H is byte size.) |
|
46 |
||
47 |
A given (B,H,S) code varies in length from 1 to B bytes. |
|
48 |
||
49 |
The 256 values a byte may take on are divided into L=(256-H) and H |
|
50 |
values, with all the H values larger than the L values. |
|
51 |
(That is, the L values are [0,L) and the H are [L,256).) |
|
52 |
||
53 |
The last byte is always either the B-th byte, a byte with "L value" |
|
54 |
(<L), or both. There is no other byte that satisfies these conditions. |
|
55 |
All bytes before the last always have "H values" (>=L). |
|
56 |
||
57 |
Therefore, if L==0, the code always has the full length of B bytes. |
|
58 |
The coding then becomes a classic B-byte little-endian unsigned integer. |
|
59 |
(Also, if L==128, the high bit of each byte acts signals the presence |
|
60 |
of a following byte, up to the maximum length.) |
|
61 |
||
62 |
In the unsigned case (S==0), the coding is compact and monotonic |
|
63 |
in the ordering of byte sequences defined by appending zero bytes |
|
64 |
to pad them to a common length B, reversing them, and ordering them |
|
65 |
lexicographically. (This agrees with "little-endian" byte order.) |
|
66 |
||
67 |
Therefore, the unsigned value of a byte sequence may be defined as: |
|
68 |
<pre> |
|
69 |
U(b0) == b0 |
|
70 |
in [0..L) |
|
71 |
or [0..256) if B==1 (**) |
|
72 |
||
73 |
U(b0,b1) == b0 + b1*H |
|
74 |
in [L..L*(1+H)) |
|
75 |
or [L..L*(1+H) + H^2) if B==2 |
|
76 |
||
77 |
U(b0,b1,b2) == b0 + b1*H + b2*H^2 |
|
78 |
in [L*(1+H)..L*(1+H+H^2)) |
|
79 |
or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3 |
|
80 |
||
81 |
U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) |
|
82 |
up to L*Sum[i<n]( H^i ) |
|
83 |
or to L*Sum[i<n]( H^i ) + H^n if n==B |
|
84 |
</pre> |
|
85 |
||
86 |
(**) If B==1, the values H,L play no role in the coding. |
|
87 |
As a convention, we require that any (1,H,S) code must always |
|
88 |
encode values less than H. Thus, a simple unsigned byte is coded |
|
89 |
specifically by the code (1,256,0). |
|
90 |
||
91 |
(Properly speaking, the unsigned case should be parameterized as |
|
92 |
S==Infinity. If the schema were regular, the case S==0 would really |
|
93 |
denote a numbering in which all coded values are negative.) |
|
94 |
||
95 |
If S>0, the unsigned value of a byte sequence is regarded as a binary |
|
96 |
integer. If any of the S low-order bits are zero, the corresponding |
|
97 |
signed value will be non-negative. If all of the S low-order bits |
|
28059
e576535359cc
8067377: My hobby: caning, then then canning, the the can-can
martin
parents:
25859
diff
changeset
|
98 |
(S>0) are one, the corresponding signed value will be negative. |
2 | 99 |
|
100 |
The non-negative signed values are compact and monotonically increasing |
|
101 |
(from 0) in the ordering of the corresponding unsigned values. |
|
102 |
||
103 |
The negative signed values are compact and monotonically decreasing |
|
104 |
(from -1) in the ordering of the corresponding unsigned values. |
|
105 |
||
106 |
In essence, the low-order S bits function as a collective sign bit |
|
107 |
for negative signed numbers, and as a low-order base-(2^S-1) digit |
|
108 |
for non-negative signed numbers. |
|
109 |
||
110 |
Therefore, the signed value corresponding to an unsigned value is: |
|
111 |
<pre> |
|
112 |
Sgn(x) == x if S==0 |
|
113 |
Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S), if S>0, (x % 2^S) < 2^S-1 |
|
114 |
Sgn(x) == -(x / 2^S)-1, if S>0, (x % 2^S) == 2^S-1 |
|
115 |
</pre> |
|
116 |
||
117 |
Finally, the value of a byte sequence, given the coding parameters |
|
118 |
(B,H,S), is defined as: |
|
119 |
<pre> |
|
120 |
V(b[i]: i<n) == Sgn(U(b[i]: i<n)) |
|
121 |
</pre> |
|
122 |
||
123 |
The extremal positive and negative signed value for a given range |
|
124 |
of unsigned values may be found by sign-encoding the largest unsigned |
|
125 |
value which is not 2^S-1 mod 2^S, and that which is, respectively. |
|
126 |
||
127 |
Because B,H,S are variable, this is not a single coding but a schema |
|
128 |
of codings. For optimal compression, it is necessary to adaptively |
|
129 |
select specific codings to the data being compressed. |
|
130 |
||
131 |
For example, if a sequence of values happens never to be negative, |
|
132 |
S==0 is the best choice. If the values are equally balanced between |
|
133 |
negative and positive, S==1. If negative values are rare, then S>1 |
|
134 |
is more appropriate. |
|
135 |
||
136 |
A (B,H,S) encoding is called a "subrange" if it does not encode |
|
137 |
the largest 32-bit value, and if the number R of values it does |
|
138 |
encode can be expressed as a positive 32-bit value. (Note that |
|
139 |
B=1 implies R<=256, B=2 implies R<=65536, etc.) |
|
140 |
||
141 |
A delta version of a given (B,H,S) coding encodes an array of integers |
|
142 |
by writing their successive differences in the (B,H,S) coding. |
|
143 |
The original integers themselves may be recovered by making a |
|
144 |
running accumulation of sum of the differences as they are read. |
|
145 |
||
146 |
As a special case, if a (B,H,S) encoding is a subrange, its delta |
|
147 |
version will only encode arrays of numbers in the coding's unsigned |
|
148 |
range, [0..R-1]. The coding of deltas is still in the normal signed |
|
149 |
range, if S!=0. During delta encoding, all subtraction results are |
|
150 |
reduced to the signed range, by adding multiples of R. Likewise, |
|
151 |
. during encoding, all addition results are reduced to the unsigned range. |
|
152 |
This special case for subranges allows the benefits of wraparound |
|
153 |
when encoding correlated sequences of very small positive numbers. |
|
154 |
*/ |
|
155 |
||
156 |
// Code-specific limits: |
|
157 |
private static int saturate32(long x) { |
|
158 |
if (x > Integer.MAX_VALUE) return Integer.MAX_VALUE; |
|
159 |
if (x < Integer.MIN_VALUE) return Integer.MIN_VALUE; |
|
160 |
return (int)x; |
|
161 |
} |
|
162 |
private static long codeRangeLong(int B, int H) { |
|
163 |
return codeRangeLong(B, H, B); |
|
164 |
} |
|
165 |
private static long codeRangeLong(int B, int H, int nMax) { |
|
166 |
// Code range for a all (B,H) codes of length <=nMax (<=B). |
|
167 |
// n < B: L*Sum[i<n]( H^i ) |
|
168 |
// n == B: L*Sum[i<B]( H^i ) + H^B |
|
169 |
assert(nMax >= 0 && nMax <= B); |
|
170 |
assert(B >= 1 && B <= 5); |
|
171 |
assert(H >= 1 && H <= 256); |
|
172 |
if (nMax == 0) return 0; // no codes of zero length |
|
173 |
if (B == 1) return H; // special case; see (**) above |
|
174 |
int L = 256-H; |
|
175 |
long sum = 0; |
|
176 |
long H_i = 1; |
|
177 |
for (int n = 1; n <= nMax; n++) { |
|
178 |
sum += H_i; |
|
179 |
H_i *= H; |
|
180 |
} |
|
181 |
sum *= L; |
|
182 |
if (nMax == B) |
|
183 |
sum += H_i; |
|
184 |
return sum; |
|
185 |
} |
|
186 |
/** Largest int representable by (B,H,S) in up to nMax bytes. */ |
|
187 |
public static int codeMax(int B, int H, int S, int nMax) { |
|
188 |
//assert(S >= 0 && S <= S_MAX); |
|
189 |
long range = codeRangeLong(B, H, nMax); |
|
190 |
if (range == 0) |
|
191 |
return -1; // degenerate max value for empty set of codes |
|
192 |
if (S == 0 || range >= (long)1<<32) |
|
193 |
return saturate32(range-1); |
|
194 |
long maxPos = range-1; |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
195 |
while (isNegativeCode(maxPos, S)) { |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
196 |
--maxPos; |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
197 |
} |
2 | 198 |
if (maxPos < 0) return -1; // No positive codings at all. |
199 |
int smax = decodeSign32(maxPos, S); |
|
200 |
// check for 32-bit wraparound: |
|
201 |
if (smax < 0) |
|
202 |
return Integer.MAX_VALUE; |
|
203 |
return smax; |
|
204 |
} |
|
205 |
/** Smallest int representable by (B,H,S) in up to nMax bytes. |
|
206 |
Returns Integer.MIN_VALUE if 32-bit wraparound covers |
|
207 |
the entire negative range. |
|
208 |
*/ |
|
209 |
public static int codeMin(int B, int H, int S, int nMax) { |
|
210 |
//assert(S >= 0 && S <= S_MAX); |
|
211 |
long range = codeRangeLong(B, H, nMax); |
|
212 |
if (range >= (long)1<<32 && nMax == B) { |
|
213 |
// Can code negative values via 32-bit wraparound. |
|
214 |
return Integer.MIN_VALUE; |
|
215 |
} |
|
216 |
if (S == 0) { |
|
217 |
return 0; |
|
218 |
} |
|
219 |
long maxNeg = range-1; |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
220 |
while (!isNegativeCode(maxNeg, S)) |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
221 |
--maxNeg; |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
222 |
|
2 | 223 |
if (maxNeg < 0) return 0; // No negative codings at all. |
224 |
return decodeSign32(maxNeg, S); |
|
225 |
} |
|
226 |
||
227 |
// Some of the arithmetic below is on unsigned 32-bit integers. |
|
228 |
// These must be represented in Java as longs in the range [0..2^32-1]. |
|
229 |
// The conversion to a signed int is just the Java cast (int), but |
|
230 |
// the conversion to an unsigned int is the following little method: |
|
231 |
private static long toUnsigned32(int sx) { |
|
232 |
return ((long)sx << 32) >>> 32; |
|
233 |
} |
|
234 |
||
235 |
// Sign encoding: |
|
236 |
private static boolean isNegativeCode(long ux, int S) { |
|
237 |
assert(S > 0); |
|
238 |
assert(ux >= -1); // can be out of 32-bit range; who cares |
|
239 |
int Smask = (1<<S)-1; |
|
240 |
return (((int)ux+1) & Smask) == 0; |
|
241 |
} |
|
242 |
private static boolean hasNegativeCode(int sx, int S) { |
|
243 |
assert(S > 0); |
|
244 |
// If S>=2 very low negatives are coded by 32-bit-wrapped positives. |
|
245 |
// The lowest negative representable by a negative coding is |
|
246 |
// ~(umax32 >> S), and the next lower number is coded by wrapping |
|
247 |
// the highest positive: |
|
248 |
// CodePos(umax32-1) -> (umax32-1)-((umax32-1)>>S) |
|
249 |
// which simplifies to ~(umax32 >> S)-1. |
|
250 |
return (0 > sx) && (sx >= ~(-1>>>S)); |
|
251 |
} |
|
252 |
private static int decodeSign32(long ux, int S) { |
|
253 |
assert(ux == toUnsigned32((int)ux)) // must be unsigned 32-bit number |
|
254 |
: (Long.toHexString(ux)); |
|
255 |
if (S == 0) { |
|
256 |
return (int) ux; // cast to signed int |
|
257 |
} |
|
258 |
int sx; |
|
259 |
if (isNegativeCode(ux, S)) { |
|
260 |
// Sgn(x) == -(x / 2^S)-1 |
|
261 |
sx = ~((int)ux >>> S); |
|
262 |
} else { |
|
263 |
// Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S) |
|
264 |
sx = (int)ux - ((int)ux >>> S); |
|
265 |
} |
|
266 |
// Assert special case of S==1: |
|
267 |
assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1))); |
|
268 |
return sx; |
|
269 |
} |
|
270 |
private static long encodeSign32(int sx, int S) { |
|
271 |
if (S == 0) { |
|
272 |
return toUnsigned32(sx); // unsigned 32-bit int |
|
273 |
} |
|
274 |
int Smask = (1<<S)-1; |
|
275 |
long ux; |
|
276 |
if (!hasNegativeCode(sx, S)) { |
|
277 |
// InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1)) |
|
278 |
ux = sx + (toUnsigned32(sx) / Smask); |
|
279 |
} else { |
|
280 |
// InvSgn(sx) = (-sx-1)*2^S + (2^S-1) |
|
281 |
ux = (-sx << S) - 1; |
|
282 |
} |
|
283 |
ux = toUnsigned32((int)ux); |
|
284 |
assert(sx == decodeSign32(ux, S)) |
|
285 |
: (Long.toHexString(ux)+" -> "+ |
|
286 |
Integer.toHexString(sx)+" != "+ |
|
287 |
Integer.toHexString(decodeSign32(ux, S))); |
|
288 |
return ux; |
|
289 |
} |
|
290 |
||
291 |
// Top-level coding of single integers: |
|
292 |
public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) { |
|
293 |
long ux = encodeSign32(sx, S); |
|
294 |
assert(ux == toUnsigned32((int)ux)); |
|
295 |
assert(ux < codeRangeLong(B, H)) |
|
296 |
: Long.toHexString(ux); |
|
297 |
int L = 256-H; |
|
298 |
long sum = ux; |
|
299 |
int pos = outpos[0]; |
|
300 |
for (int i = 0; i < B-1; i++) { |
|
301 |
if (sum < L) |
|
302 |
break; |
|
303 |
sum -= L; |
|
304 |
int b_i = (int)( L + (sum % H) ); |
|
305 |
sum /= H; |
|
306 |
out[pos++] = (byte)b_i; |
|
307 |
} |
|
308 |
out[pos++] = (byte)sum; |
|
309 |
// Report number of bytes written by updating outpos[0]: |
|
310 |
outpos[0] = pos; |
|
311 |
// Check right away for mis-coding. |
|
312 |
//assert(sx == readInt(out, new int[1], B, H, S)); |
|
313 |
} |
|
314 |
public static int readInt(byte[] in, int[] inpos, int B, int H, int S) { |
|
315 |
// U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) |
|
316 |
int L = 256-H; |
|
317 |
long sum = 0; |
|
318 |
long H_i = 1; |
|
319 |
int pos = inpos[0]; |
|
320 |
for (int i = 0; i < B; i++) { |
|
321 |
int b_i = in[pos++] & 0xFF; |
|
322 |
sum += b_i*H_i; |
|
323 |
H_i *= H; |
|
324 |
if (b_i < L) break; |
|
325 |
} |
|
326 |
//assert(sum >= 0 && sum < codeRangeLong(B, H)); |
|
327 |
// Report number of bytes read by updating inpos[0]: |
|
328 |
inpos[0] = pos; |
|
329 |
return decodeSign32(sum, S); |
|
330 |
} |
|
331 |
// The Stream version doesn't fetch a byte unless it is needed for coding. |
|
332 |
public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException { |
|
333 |
// U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) |
|
334 |
int L = 256-H; |
|
335 |
long sum = 0; |
|
336 |
long H_i = 1; |
|
337 |
for (int i = 0; i < B; i++) { |
|
338 |
int b_i = in.read(); |
|
339 |
if (b_i < 0) throw new RuntimeException("unexpected EOF"); |
|
340 |
sum += b_i*H_i; |
|
341 |
H_i *= H; |
|
342 |
if (b_i < L) break; |
|
343 |
} |
|
344 |
assert(sum >= 0 && sum < codeRangeLong(B, H)); |
|
345 |
return decodeSign32(sum, S); |
|
346 |
} |
|
347 |
||
348 |
public static final int B_MAX = 5; /* B: [1,5] */ |
|
349 |
public static final int H_MAX = 256; /* H: [1,256] */ |
|
350 |
public static final int S_MAX = 2; /* S: [0,2] */ |
|
351 |
||
352 |
// END OF STATICS. |
|
353 |
||
354 |
private final int B; /*1..5*/ // # bytes (1..5) |
|
355 |
private final int H; /*1..256*/ // # codes requiring a higher byte |
|
356 |
private final int L; /*0..255*/ // # codes requiring a higher byte |
|
357 |
private final int S; /*0..3*/ // # low-order bits representing sign |
|
358 |
private final int del; /*0..2*/ // type of delta encoding (0 == none) |
|
359 |
private final int min; // smallest representable value |
|
360 |
private final int max; // largest representable value |
|
361 |
private final int umin; // smallest representable uns. value |
|
362 |
private final int umax; // largest representable uns. value |
|
363 |
private final int[] byteMin; // smallest repr. value, given # bytes |
|
364 |
private final int[] byteMax; // largest repr. value, given # bytes |
|
365 |
||
366 |
private Coding(int B, int H, int S) { |
|
367 |
this(B, H, S, 0); |
|
368 |
} |
|
369 |
private Coding(int B, int H, int S, int del) { |
|
370 |
this.B = B; |
|
371 |
this.H = H; |
|
372 |
this.L = 256-H; |
|
373 |
this.S = S; |
|
374 |
this.del = del; |
|
375 |
this.min = codeMin(B, H, S, B); |
|
376 |
this.max = codeMax(B, H, S, B); |
|
377 |
this.umin = codeMin(B, H, 0, B); |
|
378 |
this.umax = codeMax(B, H, 0, B); |
|
379 |
this.byteMin = new int[B]; |
|
380 |
this.byteMax = new int[B]; |
|
381 |
||
382 |
for (int nMax = 1; nMax <= B; nMax++) { |
|
383 |
byteMin[nMax-1] = codeMin(B, H, S, nMax); |
|
384 |
byteMax[nMax-1] = codeMax(B, H, S, nMax); |
|
385 |
} |
|
386 |
} |
|
387 |
||
388 |
public boolean equals(Object x) { |
|
389 |
if (!(x instanceof Coding)) return false; |
|
390 |
Coding that = (Coding) x; |
|
391 |
if (this.B != that.B) return false; |
|
392 |
if (this.H != that.H) return false; |
|
393 |
if (this.S != that.S) return false; |
|
394 |
if (this.del != that.del) return false; |
|
395 |
return true; |
|
396 |
} |
|
397 |
||
398 |
public int hashCode() { |
|
399 |
return (del<<14)+(S<<11)+(B<<8)+(H<<0); |
|
400 |
} |
|
401 |
||
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
402 |
private static Map<Coding, Coding> codeMap; |
2 | 403 |
|
404 |
private static synchronized Coding of(int B, int H, int S, int del) { |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
405 |
if (codeMap == null) codeMap = new HashMap<>(); |
2 | 406 |
Coding x0 = new Coding(B, H, S, del); |
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
407 |
Coding x1 = codeMap.get(x0); |
2 | 408 |
if (x1 == null) codeMap.put(x0, x1 = x0); |
409 |
return x1; |
|
410 |
} |
|
411 |
||
412 |
public static Coding of(int B, int H) { |
|
413 |
return of(B, H, 0, 0); |
|
414 |
} |
|
415 |
||
416 |
public static Coding of(int B, int H, int S) { |
|
417 |
return of(B, H, S, 0); |
|
418 |
} |
|
419 |
||
420 |
public boolean canRepresentValue(int x) { |
|
421 |
if (isSubrange()) |
|
422 |
return canRepresentUnsigned(x); |
|
423 |
else |
|
424 |
return canRepresentSigned(x); |
|
425 |
} |
|
426 |
/** Can this coding represent a single value, possibly a delta? |
|
427 |
* This ignores the D property. That is, for delta codings, |
|
428 |
* this tests whether a delta value of 'x' can be coded. |
|
429 |
* For signed delta codings which produce unsigned end values, |
|
430 |
* use canRepresentUnsigned. |
|
431 |
*/ |
|
432 |
public boolean canRepresentSigned(int x) { |
|
433 |
return (x >= min && x <= max); |
|
434 |
} |
|
435 |
/** Can this coding, apart from its S property, |
|
436 |
* represent a single value? (Negative values |
|
437 |
* can only be represented via 32-bit overflow, |
|
438 |
* so this returns true for negative values |
|
439 |
* if isFullRange is true.) |
|
440 |
*/ |
|
441 |
public boolean canRepresentUnsigned(int x) { |
|
442 |
return (x >= umin && x <= umax); |
|
443 |
} |
|
444 |
||
445 |
// object-oriented code/decode |
|
446 |
public int readFrom(byte[] in, int[] inpos) { |
|
447 |
return readInt(in, inpos, B, H, S); |
|
448 |
} |
|
449 |
public void writeTo(byte[] out, int[] outpos, int x) { |
|
450 |
writeInt(out, outpos, x, B, H, S); |
|
451 |
} |
|
452 |
||
453 |
// Stream versions |
|
454 |
public int readFrom(InputStream in) throws IOException { |
|
455 |
return readIntFrom(in, B, H, S); |
|
456 |
} |
|
457 |
public void writeTo(OutputStream out, int x) throws IOException { |
|
458 |
byte[] buf = new byte[B]; |
|
459 |
int[] pos = new int[1]; |
|
460 |
writeInt(buf, pos, x, B, H, S); |
|
461 |
out.write(buf, 0, pos[0]); |
|
462 |
} |
|
463 |
||
464 |
// Stream/array versions |
|
465 |
public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException { |
|
466 |
// %%% use byte[] buffer |
|
467 |
for (int i = start; i < end; i++) |
|
468 |
a[i] = readFrom(in); |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
469 |
|
2 | 470 |
for (int dstep = 0; dstep < del; dstep++) { |
471 |
long state = 0; |
|
472 |
for (int i = start; i < end; i++) { |
|
473 |
state += a[i]; |
|
474 |
// Reduce array values to the required range. |
|
475 |
if (isSubrange()) { |
|
476 |
state = reduceToUnsignedRange(state); |
|
477 |
} |
|
478 |
a[i] = (int) state; |
|
479 |
} |
|
480 |
} |
|
481 |
} |
|
482 |
public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException { |
|
483 |
if (end <= start) return; |
|
484 |
for (int dstep = 0; dstep < del; dstep++) { |
|
485 |
int[] deltas; |
|
486 |
if (!isSubrange()) |
|
487 |
deltas = makeDeltas(a, start, end, 0, 0); |
|
488 |
else |
|
489 |
deltas = makeDeltas(a, start, end, min, max); |
|
490 |
a = deltas; |
|
491 |
start = 0; |
|
492 |
end = deltas.length; |
|
493 |
} |
|
494 |
// The following code is a buffered version of this loop: |
|
495 |
// for (int i = start; i < end; i++) |
|
496 |
// writeTo(out, a[i]); |
|
497 |
byte[] buf = new byte[1<<8]; |
|
498 |
final int bufmax = buf.length-B; |
|
499 |
int[] pos = { 0 }; |
|
500 |
for (int i = start; i < end; ) { |
|
501 |
while (pos[0] <= bufmax) { |
|
502 |
writeTo(buf, pos, a[i++]); |
|
503 |
if (i >= end) break; |
|
504 |
} |
|
505 |
out.write(buf, 0, pos[0]); |
|
506 |
pos[0] = 0; |
|
507 |
} |
|
508 |
} |
|
509 |
||
510 |
/** Tell if the range of this coding (number of distinct |
|
511 |
* representable values) can be expressed in 32 bits. |
|
512 |
*/ |
|
513 |
boolean isSubrange() { |
|
514 |
return max < Integer.MAX_VALUE |
|
515 |
&& ((long)max - (long)min + 1) <= Integer.MAX_VALUE; |
|
516 |
} |
|
517 |
||
518 |
/** Tell if this coding can represent all 32-bit values. |
|
519 |
* Note: Some codings, such as unsigned ones, can be neither |
|
520 |
* subranges nor full-range codings. |
|
521 |
*/ |
|
522 |
boolean isFullRange() { |
|
523 |
return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE; |
|
524 |
} |
|
525 |
||
526 |
/** Return the number of values this coding (a subrange) can represent. */ |
|
527 |
int getRange() { |
|
528 |
assert(isSubrange()); |
|
529 |
return (max - min) + 1; // range includes both min & max |
|
530 |
} |
|
531 |
||
532 |
Coding setB(int B) { return Coding.of(B, H, S, del); } |
|
533 |
Coding setH(int H) { return Coding.of(B, H, S, del); } |
|
534 |
Coding setS(int S) { return Coding.of(B, H, S, del); } |
|
535 |
Coding setL(int L) { return setH(256-L); } |
|
536 |
Coding setD(int del) { return Coding.of(B, H, S, del); } |
|
537 |
Coding getDeltaCoding() { return setD(del+1); } |
|
538 |
||
539 |
/** Return a coding suitable for representing summed, modulo-reduced values. */ |
|
540 |
Coding getValueCoding() { |
|
541 |
if (isDelta()) |
|
542 |
return Coding.of(B, H, 0, del-1); |
|
543 |
else |
|
544 |
return this; |
|
545 |
} |
|
546 |
||
547 |
/** Reduce the given value to be within this coding's unsigned range, |
|
548 |
* by adding or subtracting a multiple of (max-min+1). |
|
549 |
*/ |
|
550 |
int reduceToUnsignedRange(long value) { |
|
551 |
if (value == (int)value && canRepresentUnsigned((int)value)) |
|
552 |
// already in unsigned range |
|
553 |
return (int)value; |
|
554 |
int range = getRange(); |
|
555 |
assert(range > 0); |
|
556 |
value %= range; |
|
557 |
if (value < 0) value += range; |
|
558 |
assert(canRepresentUnsigned((int)value)); |
|
559 |
return (int)value; |
|
560 |
} |
|
561 |
||
562 |
int reduceToSignedRange(int value) { |
|
563 |
if (canRepresentSigned(value)) |
|
564 |
// already in signed range |
|
565 |
return value; |
|
566 |
return reduceToSignedRange(value, min, max); |
|
567 |
} |
|
568 |
static int reduceToSignedRange(int value, int min, int max) { |
|
569 |
int range = (max-min+1); |
|
570 |
assert(range > 0); |
|
571 |
int value0 = value; |
|
572 |
value -= min; |
|
573 |
if (value < 0 && value0 >= 0) { |
|
574 |
// 32-bit overflow, but the next '%=' op needs to be unsigned |
|
575 |
value -= range; |
|
576 |
assert(value >= 0); |
|
577 |
} |
|
578 |
value %= range; |
|
579 |
if (value < 0) value += range; |
|
580 |
value += min; |
|
581 |
assert(min <= value && value <= max); |
|
582 |
return value; |
|
583 |
} |
|
584 |
||
585 |
/** Does this coding support at least one negative value? |
|
586 |
Includes codings that can do so via 32-bit wraparound. |
|
587 |
*/ |
|
588 |
boolean isSigned() { |
|
589 |
return min < 0; |
|
590 |
} |
|
591 |
/** Does this coding code arrays by making successive differences? */ |
|
592 |
boolean isDelta() { |
|
593 |
return del != 0; |
|
594 |
} |
|
595 |
||
596 |
public int B() { return B; } |
|
597 |
public int H() { return H; } |
|
598 |
public int L() { return L; } |
|
599 |
public int S() { return S; } |
|
600 |
public int del() { return del; } |
|
601 |
public int min() { return min; } |
|
602 |
public int max() { return max; } |
|
603 |
public int umin() { return umin; } |
|
604 |
public int umax() { return umax; } |
|
605 |
public int byteMin(int b) { return byteMin[b-1]; } |
|
606 |
public int byteMax(int b) { return byteMax[b-1]; } |
|
607 |
||
10115 | 608 |
public int compareTo(Coding that) { |
2 | 609 |
int dkey = this.del - that.del; |
610 |
if (dkey == 0) |
|
611 |
dkey = this.B - that.B; |
|
612 |
if (dkey == 0) |
|
613 |
dkey = this.H - that.H; |
|
614 |
if (dkey == 0) |
|
615 |
dkey = this.S - that.S; |
|
616 |
return dkey; |
|
617 |
} |
|
618 |
||
619 |
/** Heuristic measure of the difference between two codings. */ |
|
620 |
public int distanceFrom(Coding that) { |
|
621 |
int diffdel = this.del - that.del; |
|
622 |
if (diffdel < 0) diffdel = -diffdel; |
|
623 |
int diffS = this.S - that.S; |
|
624 |
if (diffS < 0) diffS = -diffS; |
|
625 |
int diffB = this.B - that.B; |
|
626 |
if (diffB < 0) diffB = -diffB; |
|
627 |
int diffHL; |
|
628 |
if (this.H == that.H) { |
|
629 |
diffHL = 0; |
|
630 |
} else { |
|
631 |
// Distance in log space of H (<=128) and L (<128). |
|
632 |
int thisHL = this.getHL(); |
|
633 |
int thatHL = that.getHL(); |
|
634 |
// Double the accuracy of the log: |
|
635 |
thisHL *= thisHL; |
|
636 |
thatHL *= thatHL; |
|
637 |
if (thisHL > thatHL) |
|
638 |
diffHL = ceil_lg2(1+(thisHL-1)/thatHL); |
|
639 |
else |
|
640 |
diffHL = ceil_lg2(1+(thatHL-1)/thisHL); |
|
641 |
} |
|
642 |
int norm = 5*(diffdel + diffS + diffB) + diffHL; |
|
643 |
assert(norm != 0 || this.compareTo(that) == 0); |
|
644 |
return norm; |
|
645 |
} |
|
646 |
private int getHL() { |
|
647 |
// Follow H in log space by the multiplicative inverse of L. |
|
648 |
if (H <= 128) return H; |
|
649 |
if (L >= 1) return 128*128/L; |
|
650 |
return 128*256; |
|
651 |
} |
|
652 |
||
653 |
/** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */ |
|
654 |
static int ceil_lg2(int x) { |
|
655 |
assert(x-1 >= 0); // x in range (int.MIN_VALUE -> 32) |
|
656 |
x -= 1; |
|
657 |
int lg = 0; |
|
658 |
while (x != 0) { |
|
659 |
lg++; |
|
660 |
x >>= 1; |
|
661 |
} |
|
662 |
return lg; |
|
663 |
} |
|
664 |
||
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
28059
diff
changeset
|
665 |
private static final byte[] byteBitWidths = new byte[0x100]; |
2 | 666 |
static { |
667 |
for (int b = 0; b < byteBitWidths.length; b++) { |
|
668 |
byteBitWidths[b] = (byte) ceil_lg2(b + 1); |
|
669 |
} |
|
670 |
for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) { |
|
671 |
assert(bitWidth(i) == ceil_lg2(i + 1)); |
|
672 |
} |
|
673 |
} |
|
674 |
||
675 |
/** Number of significant bits in i, not counting sign bits. |
|
676 |
* For positive i, it is ceil_lg2(i + 1). |
|
677 |
*/ |
|
678 |
static int bitWidth(int i) { |
|
679 |
if (i < 0) i = ~i; // change sign |
|
680 |
int w = 0; |
|
681 |
int lo = i; |
|
682 |
if (lo < byteBitWidths.length) |
|
683 |
return byteBitWidths[lo]; |
|
684 |
int hi; |
|
685 |
hi = (lo >>> 16); |
|
686 |
if (hi != 0) { |
|
687 |
lo = hi; |
|
688 |
w += 16; |
|
689 |
} |
|
690 |
hi = (lo >>> 8); |
|
691 |
if (hi != 0) { |
|
692 |
lo = hi; |
|
693 |
w += 8; |
|
694 |
} |
|
695 |
w += byteBitWidths[lo]; |
|
696 |
//assert(w == ceil_lg2(i + 1)); |
|
697 |
return w; |
|
698 |
} |
|
699 |
||
700 |
/** Create an array of successive differences. |
|
701 |
* If min==max, accept any and all 32-bit overflow. |
|
702 |
* Otherwise, avoid 32-bit overflow, and reduce all differences |
|
703 |
* to a value in the given range, by adding or subtracting |
|
704 |
* multiples of the range cardinality (max-min+1). |
|
705 |
* Also, the values are assumed to be in the range [0..(max-min)]. |
|
706 |
*/ |
|
707 |
static int[] makeDeltas(int[] values, int start, int end, |
|
708 |
int min, int max) { |
|
709 |
assert(max >= min); |
|
710 |
int count = end-start; |
|
711 |
int[] deltas = new int[count]; |
|
712 |
int state = 0; |
|
713 |
if (min == max) { |
|
714 |
for (int i = 0; i < count; i++) { |
|
715 |
int value = values[start+i]; |
|
716 |
deltas[i] = value - state; |
|
717 |
state = value; |
|
718 |
} |
|
719 |
} else { |
|
720 |
for (int i = 0; i < count; i++) { |
|
721 |
int value = values[start+i]; |
|
722 |
assert(value >= 0 && value+min <= max); |
|
723 |
int delta = value - state; |
|
724 |
assert(delta == (long)value - (long)state); // no overflow |
|
725 |
state = value; |
|
726 |
// Reduce delta values to the required range. |
|
727 |
delta = reduceToSignedRange(delta, min, max); |
|
728 |
deltas[i] = delta; |
|
729 |
} |
|
730 |
} |
|
731 |
return deltas; |
|
732 |
} |
|
733 |
||
734 |
boolean canRepresent(int minValue, int maxValue) { |
|
735 |
assert(minValue <= maxValue); |
|
736 |
if (del > 0) { |
|
737 |
if (isSubrange()) { |
|
738 |
// We will force the values to reduce to the right subrange. |
|
739 |
return canRepresentUnsigned(maxValue) |
|
740 |
&& canRepresentUnsigned(minValue); |
|
741 |
} else { |
|
742 |
// Huge range; delta values must assume full 32-bit range. |
|
743 |
return isFullRange(); |
|
744 |
} |
|
745 |
} |
|
746 |
else |
|
747 |
// final values must be representable |
|
748 |
return canRepresentSigned(maxValue) |
|
749 |
&& canRepresentSigned(minValue); |
|
750 |
} |
|
751 |
||
752 |
boolean canRepresent(int[] values, int start, int end) { |
|
753 |
int len = end-start; |
|
754 |
if (len == 0) return true; |
|
755 |
if (isFullRange()) return true; |
|
756 |
// Calculate max, min: |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
757 |
int lmax = values[start]; |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
758 |
int lmin = lmax; |
2 | 759 |
for (int i = 1; i < len; i++) { |
760 |
int value = values[start+i]; |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
761 |
if (lmax < value) lmax = value; |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
762 |
if (lmin > value) lmin = value; |
2 | 763 |
} |
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
764 |
return canRepresent(lmin, lmax); |
2 | 765 |
} |
766 |
||
767 |
public double getBitLength(int value) { // implements BitMetric |
|
768 |
return (double) getLength(value) * 8; |
|
769 |
} |
|
770 |
||
771 |
/** How many bytes are in the coding of this value? |
|
772 |
* Returns Integer.MAX_VALUE if the value has no coding. |
|
773 |
* The coding must not be a delta coding, since there is no |
|
774 |
* definite size for a single value apart from its context. |
|
775 |
*/ |
|
776 |
public int getLength(int value) { |
|
777 |
if (isDelta() && isSubrange()) { |
|
778 |
if (!canRepresentUnsigned(value)) |
|
779 |
return Integer.MAX_VALUE; |
|
780 |
value = reduceToSignedRange(value); |
|
781 |
} |
|
782 |
if (value >= 0) { |
|
783 |
for (int n = 0; n < B; n++) { |
|
784 |
if (value <= byteMax[n]) return n+1; |
|
785 |
} |
|
786 |
} else { |
|
787 |
for (int n = 0; n < B; n++) { |
|
788 |
if (value >= byteMin[n]) return n+1; |
|
789 |
} |
|
790 |
} |
|
791 |
return Integer.MAX_VALUE; |
|
792 |
} |
|
793 |
||
794 |
public int getLength(int[] values, int start, int end) { |
|
795 |
int len = end-start; |
|
796 |
if (B == 1) return len; |
|
797 |
if (L == 0) return len * B; |
|
798 |
if (isDelta()) { |
|
799 |
int[] deltas; |
|
800 |
if (!isSubrange()) |
|
801 |
deltas = makeDeltas(values, start, end, 0, 0); |
|
802 |
else |
|
803 |
deltas = makeDeltas(values, start, end, min, max); |
|
804 |
//return Coding.of(B, H, S).getLength(deltas, 0, len); |
|
805 |
values = deltas; |
|
806 |
start = 0; |
|
807 |
} |
|
808 |
int sum = len; // at least 1 byte per |
|
809 |
// add extra bytes for extra-long values |
|
810 |
for (int n = 1; n <= B; n++) { |
|
811 |
// what is the coding interval [min..max] for n bytes? |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
812 |
int lmax = byteMax[n-1]; |
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
813 |
int lmin = byteMin[n-1]; |
2 | 814 |
int longer = 0; // count of guys longer than n bytes |
815 |
for (int i = 0; i < len; i++) { |
|
816 |
int value = values[start+i]; |
|
817 |
if (value >= 0) { |
|
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
818 |
if (value > lmax) longer++; |
2 | 819 |
} else { |
7795
98021fc612af
6990106: FindBugs scan - Malicious code vulnerability Warnings in com.sun.java.util.jar.pack.*
ksrini
parents:
7192
diff
changeset
|
820 |
if (value < lmin) longer++; |
2 | 821 |
} |
822 |
} |
|
823 |
if (longer == 0) break; // no more passes needed |
|
824 |
if (n == B) return Integer.MAX_VALUE; // cannot represent! |
|
825 |
sum += longer; |
|
826 |
} |
|
827 |
return sum; |
|
828 |
} |
|
829 |
||
830 |
public byte[] getMetaCoding(Coding dflt) { |
|
831 |
if (dflt == this) return new byte[]{ (byte) _meta_default }; |
|
832 |
int canonicalIndex = BandStructure.indexOf(this); |
|
833 |
if (canonicalIndex > 0) |
|
834 |
return new byte[]{ (byte) canonicalIndex }; |
|
835 |
return new byte[]{ |
|
836 |
(byte)_meta_arb, |
|
837 |
(byte)(del + 2*S + 8*(B-1)), |
|
838 |
(byte)(H-1) |
|
839 |
}; |
|
840 |
} |
|
841 |
public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) { |
|
842 |
int op = bytes[pos++] & 0xFF; |
|
843 |
if (_meta_canon_min <= op && op <= _meta_canon_max) { |
|
844 |
Coding c = BandStructure.codingForIndex(op); |
|
845 |
assert(c != null); |
|
846 |
res[0] = c; |
|
847 |
return pos; |
|
848 |
} |
|
849 |
if (op == _meta_arb) { |
|
850 |
int dsb = bytes[pos++] & 0xFF; |
|
851 |
int H_1 = bytes[pos++] & 0xFF; |
|
852 |
int del = dsb % 2; |
|
853 |
int S = (dsb / 2) % 4; |
|
854 |
int B = (dsb / 8)+1; |
|
855 |
int H = H_1+1; |
|
856 |
if (!((1 <= B && B <= B_MAX) && |
|
857 |
(0 <= S && S <= S_MAX) && |
|
858 |
(1 <= H && H <= H_MAX) && |
|
859 |
(0 <= del && del <= 1)) |
|
860 |
|| (B == 1 && H != 256) |
|
861 |
|| (B == 5 && H == 256)) { |
|
862 |
throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del); |
|
863 |
} |
|
864 |
res[0] = Coding.of(B, H, S, del); |
|
865 |
return pos; |
|
866 |
} |
|
867 |
return pos-1; // backup |
|
868 |
} |
|
869 |
||
870 |
||
871 |
public String keyString() { |
|
872 |
return "("+B+","+H+","+S+","+del+")"; |
|
873 |
} |
|
874 |
||
875 |
public String toString() { |
|
876 |
String str = "Coding"+keyString(); |
|
877 |
// If -ea, print out more informative strings! |
|
878 |
//assert((str = stringForDebug()) != null); |
|
879 |
return str; |
|
880 |
} |
|
881 |
||
882 |
static boolean verboseStringForDebug = false; |
|
883 |
String stringForDebug() { |
|
884 |
String minS = (min == Integer.MIN_VALUE ? "min" : ""+min); |
|
885 |
String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max); |
|
886 |
String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]"; |
|
887 |
if (isSubrange()) |
|
888 |
str += " subrange"; |
|
889 |
else if (!isFullRange()) |
|
890 |
str += " MIDRANGE"; |
|
891 |
if (verboseStringForDebug) { |
|
892 |
str += " {"; |
|
893 |
int prev_range = 0; |
|
894 |
for (int n = 1; n <= B; n++) { |
|
895 |
int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1); |
|
896 |
assert(range_n == saturate32(codeRangeLong(B, H, n))); |
|
897 |
range_n -= prev_range; |
|
898 |
prev_range = range_n; |
|
899 |
String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n); |
|
900 |
str += " #"+n+"="+rngS; |
|
901 |
} |
|
902 |
str += " }"; |
|
903 |
} |
|
904 |
return str; |
|
905 |
} |
|
906 |
} |