author | ohair |
Tue, 28 Dec 2010 15:53:50 -0800 | |
changeset 7668 | d4a77089c587 |
parent 7192 | 445c518364c4 |
child 7816 | 55a18147b4bf |
permissions | -rw-r--r-- |
2 | 1 |
/* |
7668 | 2 |
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package com.sun.java.util.jar.pack; |
|
27 |
||
7192
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
28 |
import java.io.IOException; |
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
29 |
import java.io.InputStream; |
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
30 |
import java.io.OutputStream; |
445c518364c4
7003227: (pack200) intermittent failures compiling pack200
ksrini
parents:
5506
diff
changeset
|
31 |
import java.util.HashMap; |
2 | 32 |
|
33 |
/** |
|
34 |
* Define the conversions between sequences of small integers and raw bytes. |
|
35 |
* This is a schema of encodings which incorporates varying lengths, |
|
36 |
* varying degrees of length variability, and varying amounts of signed-ness. |
|
37 |
* @author John Rose |
|
38 |
*/ |
|
39 |
class Coding implements Constants, Comparable, CodingMethod, Histogram.BitMetric { |
|
40 |
/* |
|
41 |
Coding schema for single integers, parameterized by (B,H,S): |
|
42 |
||
43 |
Let B in [1,5], H in [1,256], S in [0,3]. |
|
44 |
(S limit is arbitrary. B follows the 32-bit limit. H is byte size.) |
|
45 |
||
46 |
A given (B,H,S) code varies in length from 1 to B bytes. |
|
47 |
||
48 |
The 256 values a byte may take on are divided into L=(256-H) and H |
|
49 |
values, with all the H values larger than the L values. |
|
50 |
(That is, the L values are [0,L) and the H are [L,256).) |
|
51 |
||
52 |
The last byte is always either the B-th byte, a byte with "L value" |
|
53 |
(<L), or both. There is no other byte that satisfies these conditions. |
|
54 |
All bytes before the last always have "H values" (>=L). |
|
55 |
||
56 |
Therefore, if L==0, the code always has the full length of B bytes. |
|
57 |
The coding then becomes a classic B-byte little-endian unsigned integer. |
|
58 |
(Also, if L==128, the high bit of each byte acts signals the presence |
|
59 |
of a following byte, up to the maximum length.) |
|
60 |
||
61 |
In the unsigned case (S==0), the coding is compact and monotonic |
|
62 |
in the ordering of byte sequences defined by appending zero bytes |
|
63 |
to pad them to a common length B, reversing them, and ordering them |
|
64 |
lexicographically. (This agrees with "little-endian" byte order.) |
|
65 |
||
66 |
Therefore, the unsigned value of a byte sequence may be defined as: |
|
67 |
<pre> |
|
68 |
U(b0) == b0 |
|
69 |
in [0..L) |
|
70 |
or [0..256) if B==1 (**) |
|
71 |
||
72 |
U(b0,b1) == b0 + b1*H |
|
73 |
in [L..L*(1+H)) |
|
74 |
or [L..L*(1+H) + H^2) if B==2 |
|
75 |
||
76 |
U(b0,b1,b2) == b0 + b1*H + b2*H^2 |
|
77 |
in [L*(1+H)..L*(1+H+H^2)) |
|
78 |
or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3 |
|
79 |
||
80 |
U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) |
|
81 |
up to L*Sum[i<n]( H^i ) |
|
82 |
or to L*Sum[i<n]( H^i ) + H^n if n==B |
|
83 |
</pre> |
|
84 |
||
85 |
(**) If B==1, the values H,L play no role in the coding. |
|
86 |
As a convention, we require that any (1,H,S) code must always |
|
87 |
encode values less than H. Thus, a simple unsigned byte is coded |
|
88 |
specifically by the code (1,256,0). |
|
89 |
||
90 |
(Properly speaking, the unsigned case should be parameterized as |
|
91 |
S==Infinity. If the schema were regular, the case S==0 would really |
|
92 |
denote a numbering in which all coded values are negative.) |
|
93 |
||
94 |
If S>0, the unsigned value of a byte sequence is regarded as a binary |
|
95 |
integer. If any of the S low-order bits are zero, the corresponding |
|
96 |
signed value will be non-negative. If all of the S low-order bits |
|
97 |
(S>0) are one, the the corresponding signed value will be negative. |
|
98 |
||
99 |
The non-negative signed values are compact and monotonically increasing |
|
100 |
(from 0) in the ordering of the corresponding unsigned values. |
|
101 |
||
102 |
The negative signed values are compact and monotonically decreasing |
|
103 |
(from -1) in the ordering of the corresponding unsigned values. |
|
104 |
||
105 |
In essence, the low-order S bits function as a collective sign bit |
|
106 |
for negative signed numbers, and as a low-order base-(2^S-1) digit |
|
107 |
for non-negative signed numbers. |
|
108 |
||
109 |
Therefore, the signed value corresponding to an unsigned value is: |
|
110 |
<pre> |
|
111 |
Sgn(x) == x if S==0 |
|
112 |
Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S), if S>0, (x % 2^S) < 2^S-1 |
|
113 |
Sgn(x) == -(x / 2^S)-1, if S>0, (x % 2^S) == 2^S-1 |
|
114 |
</pre> |
|
115 |
||
116 |
Finally, the value of a byte sequence, given the coding parameters |
|
117 |
(B,H,S), is defined as: |
|
118 |
<pre> |
|
119 |
V(b[i]: i<n) == Sgn(U(b[i]: i<n)) |
|
120 |
</pre> |
|
121 |
||
122 |
The extremal positive and negative signed value for a given range |
|
123 |
of unsigned values may be found by sign-encoding the largest unsigned |
|
124 |
value which is not 2^S-1 mod 2^S, and that which is, respectively. |
|
125 |
||
126 |
Because B,H,S are variable, this is not a single coding but a schema |
|
127 |
of codings. For optimal compression, it is necessary to adaptively |
|
128 |
select specific codings to the data being compressed. |
|
129 |
||
130 |
For example, if a sequence of values happens never to be negative, |
|
131 |
S==0 is the best choice. If the values are equally balanced between |
|
132 |
negative and positive, S==1. If negative values are rare, then S>1 |
|
133 |
is more appropriate. |
|
134 |
||
135 |
A (B,H,S) encoding is called a "subrange" if it does not encode |
|
136 |
the largest 32-bit value, and if the number R of values it does |
|
137 |
encode can be expressed as a positive 32-bit value. (Note that |
|
138 |
B=1 implies R<=256, B=2 implies R<=65536, etc.) |
|
139 |
||
140 |
A delta version of a given (B,H,S) coding encodes an array of integers |
|
141 |
by writing their successive differences in the (B,H,S) coding. |
|
142 |
The original integers themselves may be recovered by making a |
|
143 |
running accumulation of sum of the differences as they are read. |
|
144 |
||
145 |
As a special case, if a (B,H,S) encoding is a subrange, its delta |
|
146 |
version will only encode arrays of numbers in the coding's unsigned |
|
147 |
range, [0..R-1]. The coding of deltas is still in the normal signed |
|
148 |
range, if S!=0. During delta encoding, all subtraction results are |
|
149 |
reduced to the signed range, by adding multiples of R. Likewise, |
|
150 |
. during encoding, all addition results are reduced to the unsigned range. |
|
151 |
This special case for subranges allows the benefits of wraparound |
|
152 |
when encoding correlated sequences of very small positive numbers. |
|
153 |
*/ |
|
154 |
||
155 |
// Code-specific limits: |
|
156 |
private static int saturate32(long x) { |
|
157 |
if (x > Integer.MAX_VALUE) return Integer.MAX_VALUE; |
|
158 |
if (x < Integer.MIN_VALUE) return Integer.MIN_VALUE; |
|
159 |
return (int)x; |
|
160 |
} |
|
161 |
private static long codeRangeLong(int B, int H) { |
|
162 |
return codeRangeLong(B, H, B); |
|
163 |
} |
|
164 |
private static long codeRangeLong(int B, int H, int nMax) { |
|
165 |
// Code range for a all (B,H) codes of length <=nMax (<=B). |
|
166 |
// n < B: L*Sum[i<n]( H^i ) |
|
167 |
// n == B: L*Sum[i<B]( H^i ) + H^B |
|
168 |
assert(nMax >= 0 && nMax <= B); |
|
169 |
assert(B >= 1 && B <= 5); |
|
170 |
assert(H >= 1 && H <= 256); |
|
171 |
if (nMax == 0) return 0; // no codes of zero length |
|
172 |
if (B == 1) return H; // special case; see (**) above |
|
173 |
int L = 256-H; |
|
174 |
long sum = 0; |
|
175 |
long H_i = 1; |
|
176 |
for (int n = 1; n <= nMax; n++) { |
|
177 |
sum += H_i; |
|
178 |
H_i *= H; |
|
179 |
} |
|
180 |
sum *= L; |
|
181 |
if (nMax == B) |
|
182 |
sum += H_i; |
|
183 |
return sum; |
|
184 |
} |
|
185 |
/** Largest int representable by (B,H,S) in up to nMax bytes. */ |
|
186 |
public static int codeMax(int B, int H, int S, int nMax) { |
|
187 |
//assert(S >= 0 && S <= S_MAX); |
|
188 |
long range = codeRangeLong(B, H, nMax); |
|
189 |
if (range == 0) |
|
190 |
return -1; // degenerate max value for empty set of codes |
|
191 |
if (S == 0 || range >= (long)1<<32) |
|
192 |
return saturate32(range-1); |
|
193 |
long maxPos = range-1; |
|
194 |
while (isNegativeCode(maxPos, S)) --maxPos; |
|
195 |
if (maxPos < 0) return -1; // No positive codings at all. |
|
196 |
int smax = decodeSign32(maxPos, S); |
|
197 |
// check for 32-bit wraparound: |
|
198 |
if (smax < 0) |
|
199 |
return Integer.MAX_VALUE; |
|
200 |
return smax; |
|
201 |
} |
|
202 |
/** Smallest int representable by (B,H,S) in up to nMax bytes. |
|
203 |
Returns Integer.MIN_VALUE if 32-bit wraparound covers |
|
204 |
the entire negative range. |
|
205 |
*/ |
|
206 |
public static int codeMin(int B, int H, int S, int nMax) { |
|
207 |
//assert(S >= 0 && S <= S_MAX); |
|
208 |
long range = codeRangeLong(B, H, nMax); |
|
209 |
if (range >= (long)1<<32 && nMax == B) { |
|
210 |
// Can code negative values via 32-bit wraparound. |
|
211 |
return Integer.MIN_VALUE; |
|
212 |
} |
|
213 |
if (S == 0) { |
|
214 |
return 0; |
|
215 |
} |
|
216 |
int Smask = (1<<S)-1; |
|
217 |
long maxNeg = range-1; |
|
218 |
while (!isNegativeCode(maxNeg, S)) --maxNeg; |
|
219 |
if (maxNeg < 0) return 0; // No negative codings at all. |
|
220 |
return decodeSign32(maxNeg, S); |
|
221 |
} |
|
222 |
||
223 |
// Some of the arithmetic below is on unsigned 32-bit integers. |
|
224 |
// These must be represented in Java as longs in the range [0..2^32-1]. |
|
225 |
// The conversion to a signed int is just the Java cast (int), but |
|
226 |
// the conversion to an unsigned int is the following little method: |
|
227 |
private static long toUnsigned32(int sx) { |
|
228 |
return ((long)sx << 32) >>> 32; |
|
229 |
} |
|
230 |
||
231 |
// Sign encoding: |
|
232 |
private static boolean isNegativeCode(long ux, int S) { |
|
233 |
assert(S > 0); |
|
234 |
assert(ux >= -1); // can be out of 32-bit range; who cares |
|
235 |
int Smask = (1<<S)-1; |
|
236 |
return (((int)ux+1) & Smask) == 0; |
|
237 |
} |
|
238 |
private static boolean hasNegativeCode(int sx, int S) { |
|
239 |
assert(S > 0); |
|
240 |
// If S>=2 very low negatives are coded by 32-bit-wrapped positives. |
|
241 |
// The lowest negative representable by a negative coding is |
|
242 |
// ~(umax32 >> S), and the next lower number is coded by wrapping |
|
243 |
// the highest positive: |
|
244 |
// CodePos(umax32-1) -> (umax32-1)-((umax32-1)>>S) |
|
245 |
// which simplifies to ~(umax32 >> S)-1. |
|
246 |
return (0 > sx) && (sx >= ~(-1>>>S)); |
|
247 |
} |
|
248 |
private static int decodeSign32(long ux, int S) { |
|
249 |
assert(ux == toUnsigned32((int)ux)) // must be unsigned 32-bit number |
|
250 |
: (Long.toHexString(ux)); |
|
251 |
if (S == 0) { |
|
252 |
return (int) ux; // cast to signed int |
|
253 |
} |
|
254 |
int sx; |
|
255 |
if (isNegativeCode(ux, S)) { |
|
256 |
// Sgn(x) == -(x / 2^S)-1 |
|
257 |
sx = ~((int)ux >>> S); |
|
258 |
} else { |
|
259 |
// Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S) |
|
260 |
sx = (int)ux - ((int)ux >>> S); |
|
261 |
} |
|
262 |
// Assert special case of S==1: |
|
263 |
assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1))); |
|
264 |
return sx; |
|
265 |
} |
|
266 |
private static long encodeSign32(int sx, int S) { |
|
267 |
if (S == 0) { |
|
268 |
return toUnsigned32(sx); // unsigned 32-bit int |
|
269 |
} |
|
270 |
int Smask = (1<<S)-1; |
|
271 |
long ux; |
|
272 |
if (!hasNegativeCode(sx, S)) { |
|
273 |
// InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1)) |
|
274 |
ux = sx + (toUnsigned32(sx) / Smask); |
|
275 |
} else { |
|
276 |
// InvSgn(sx) = (-sx-1)*2^S + (2^S-1) |
|
277 |
ux = (-sx << S) - 1; |
|
278 |
} |
|
279 |
ux = toUnsigned32((int)ux); |
|
280 |
assert(sx == decodeSign32(ux, S)) |
|
281 |
: (Long.toHexString(ux)+" -> "+ |
|
282 |
Integer.toHexString(sx)+" != "+ |
|
283 |
Integer.toHexString(decodeSign32(ux, S))); |
|
284 |
return ux; |
|
285 |
} |
|
286 |
||
287 |
// Top-level coding of single integers: |
|
288 |
public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) { |
|
289 |
long ux = encodeSign32(sx, S); |
|
290 |
assert(ux == toUnsigned32((int)ux)); |
|
291 |
assert(ux < codeRangeLong(B, H)) |
|
292 |
: Long.toHexString(ux); |
|
293 |
int L = 256-H; |
|
294 |
long sum = ux; |
|
295 |
int pos = outpos[0]; |
|
296 |
for (int i = 0; i < B-1; i++) { |
|
297 |
if (sum < L) |
|
298 |
break; |
|
299 |
sum -= L; |
|
300 |
int b_i = (int)( L + (sum % H) ); |
|
301 |
sum /= H; |
|
302 |
out[pos++] = (byte)b_i; |
|
303 |
} |
|
304 |
out[pos++] = (byte)sum; |
|
305 |
// Report number of bytes written by updating outpos[0]: |
|
306 |
outpos[0] = pos; |
|
307 |
// Check right away for mis-coding. |
|
308 |
//assert(sx == readInt(out, new int[1], B, H, S)); |
|
309 |
} |
|
310 |
public static int readInt(byte[] in, int[] inpos, int B, int H, int S) { |
|
311 |
// U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) |
|
312 |
int L = 256-H; |
|
313 |
long sum = 0; |
|
314 |
long H_i = 1; |
|
315 |
int pos = inpos[0]; |
|
316 |
for (int i = 0; i < B; i++) { |
|
317 |
int b_i = in[pos++] & 0xFF; |
|
318 |
sum += b_i*H_i; |
|
319 |
H_i *= H; |
|
320 |
if (b_i < L) break; |
|
321 |
} |
|
322 |
//assert(sum >= 0 && sum < codeRangeLong(B, H)); |
|
323 |
// Report number of bytes read by updating inpos[0]: |
|
324 |
inpos[0] = pos; |
|
325 |
return decodeSign32(sum, S); |
|
326 |
} |
|
327 |
// The Stream version doesn't fetch a byte unless it is needed for coding. |
|
328 |
public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException { |
|
329 |
// U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) |
|
330 |
int L = 256-H; |
|
331 |
long sum = 0; |
|
332 |
long H_i = 1; |
|
333 |
for (int i = 0; i < B; i++) { |
|
334 |
int b_i = in.read(); |
|
335 |
if (b_i < 0) throw new RuntimeException("unexpected EOF"); |
|
336 |
sum += b_i*H_i; |
|
337 |
H_i *= H; |
|
338 |
if (b_i < L) break; |
|
339 |
} |
|
340 |
assert(sum >= 0 && sum < codeRangeLong(B, H)); |
|
341 |
return decodeSign32(sum, S); |
|
342 |
} |
|
343 |
||
344 |
public static final int B_MAX = 5; /* B: [1,5] */ |
|
345 |
public static final int H_MAX = 256; /* H: [1,256] */ |
|
346 |
public static final int S_MAX = 2; /* S: [0,2] */ |
|
347 |
||
348 |
// END OF STATICS. |
|
349 |
||
350 |
private final int B; /*1..5*/ // # bytes (1..5) |
|
351 |
private final int H; /*1..256*/ // # codes requiring a higher byte |
|
352 |
private final int L; /*0..255*/ // # codes requiring a higher byte |
|
353 |
private final int S; /*0..3*/ // # low-order bits representing sign |
|
354 |
private final int del; /*0..2*/ // type of delta encoding (0 == none) |
|
355 |
private final int min; // smallest representable value |
|
356 |
private final int max; // largest representable value |
|
357 |
private final int umin; // smallest representable uns. value |
|
358 |
private final int umax; // largest representable uns. value |
|
359 |
private final int[] byteMin; // smallest repr. value, given # bytes |
|
360 |
private final int[] byteMax; // largest repr. value, given # bytes |
|
361 |
||
362 |
private Coding(int B, int H, int S) { |
|
363 |
this(B, H, S, 0); |
|
364 |
} |
|
365 |
private Coding(int B, int H, int S, int del) { |
|
366 |
this.B = B; |
|
367 |
this.H = H; |
|
368 |
this.L = 256-H; |
|
369 |
this.S = S; |
|
370 |
this.del = del; |
|
371 |
this.min = codeMin(B, H, S, B); |
|
372 |
this.max = codeMax(B, H, S, B); |
|
373 |
this.umin = codeMin(B, H, 0, B); |
|
374 |
this.umax = codeMax(B, H, 0, B); |
|
375 |
this.byteMin = new int[B]; |
|
376 |
this.byteMax = new int[B]; |
|
377 |
||
378 |
for (int nMax = 1; nMax <= B; nMax++) { |
|
379 |
byteMin[nMax-1] = codeMin(B, H, S, nMax); |
|
380 |
byteMax[nMax-1] = codeMax(B, H, S, nMax); |
|
381 |
} |
|
382 |
} |
|
383 |
||
384 |
public boolean equals(Object x) { |
|
385 |
if (!(x instanceof Coding)) return false; |
|
386 |
Coding that = (Coding) x; |
|
387 |
if (this.B != that.B) return false; |
|
388 |
if (this.H != that.H) return false; |
|
389 |
if (this.S != that.S) return false; |
|
390 |
if (this.del != that.del) return false; |
|
391 |
return true; |
|
392 |
} |
|
393 |
||
394 |
public int hashCode() { |
|
395 |
return (del<<14)+(S<<11)+(B<<8)+(H<<0); |
|
396 |
} |
|
397 |
||
398 |
private static HashMap codeMap; |
|
399 |
||
400 |
private static synchronized Coding of(int B, int H, int S, int del) { |
|
401 |
if (codeMap == null) codeMap = new HashMap(); |
|
402 |
Coding x0 = new Coding(B, H, S, del); |
|
403 |
Coding x1 = (Coding) codeMap.get(x0); |
|
404 |
if (x1 == null) codeMap.put(x0, x1 = x0); |
|
405 |
return x1; |
|
406 |
} |
|
407 |
||
408 |
public static Coding of(int B, int H) { |
|
409 |
return of(B, H, 0, 0); |
|
410 |
} |
|
411 |
||
412 |
public static Coding of(int B, int H, int S) { |
|
413 |
return of(B, H, S, 0); |
|
414 |
} |
|
415 |
||
416 |
public boolean canRepresentValue(int x) { |
|
417 |
if (isSubrange()) |
|
418 |
return canRepresentUnsigned(x); |
|
419 |
else |
|
420 |
return canRepresentSigned(x); |
|
421 |
} |
|
422 |
/** Can this coding represent a single value, possibly a delta? |
|
423 |
* This ignores the D property. That is, for delta codings, |
|
424 |
* this tests whether a delta value of 'x' can be coded. |
|
425 |
* For signed delta codings which produce unsigned end values, |
|
426 |
* use canRepresentUnsigned. |
|
427 |
*/ |
|
428 |
public boolean canRepresentSigned(int x) { |
|
429 |
return (x >= min && x <= max); |
|
430 |
} |
|
431 |
/** Can this coding, apart from its S property, |
|
432 |
* represent a single value? (Negative values |
|
433 |
* can only be represented via 32-bit overflow, |
|
434 |
* so this returns true for negative values |
|
435 |
* if isFullRange is true.) |
|
436 |
*/ |
|
437 |
public boolean canRepresentUnsigned(int x) { |
|
438 |
return (x >= umin && x <= umax); |
|
439 |
} |
|
440 |
||
441 |
// object-oriented code/decode |
|
442 |
public int readFrom(byte[] in, int[] inpos) { |
|
443 |
return readInt(in, inpos, B, H, S); |
|
444 |
} |
|
445 |
public void writeTo(byte[] out, int[] outpos, int x) { |
|
446 |
writeInt(out, outpos, x, B, H, S); |
|
447 |
} |
|
448 |
||
449 |
// Stream versions |
|
450 |
public int readFrom(InputStream in) throws IOException { |
|
451 |
return readIntFrom(in, B, H, S); |
|
452 |
} |
|
453 |
public void writeTo(OutputStream out, int x) throws IOException { |
|
454 |
byte[] buf = new byte[B]; |
|
455 |
int[] pos = new int[1]; |
|
456 |
writeInt(buf, pos, x, B, H, S); |
|
457 |
out.write(buf, 0, pos[0]); |
|
458 |
} |
|
459 |
||
460 |
// Stream/array versions |
|
461 |
public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException { |
|
462 |
// %%% use byte[] buffer |
|
463 |
for (int i = start; i < end; i++) |
|
464 |
a[i] = readFrom(in); |
|
465 |
for (int dstep = 0; dstep < del; dstep++) { |
|
466 |
long state = 0; |
|
467 |
for (int i = start; i < end; i++) { |
|
468 |
state += a[i]; |
|
469 |
// Reduce array values to the required range. |
|
470 |
if (isSubrange()) { |
|
471 |
state = reduceToUnsignedRange(state); |
|
472 |
} |
|
473 |
a[i] = (int) state; |
|
474 |
} |
|
475 |
} |
|
476 |
} |
|
477 |
public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException { |
|
478 |
if (end <= start) return; |
|
479 |
for (int dstep = 0; dstep < del; dstep++) { |
|
480 |
int[] deltas; |
|
481 |
if (!isSubrange()) |
|
482 |
deltas = makeDeltas(a, start, end, 0, 0); |
|
483 |
else |
|
484 |
deltas = makeDeltas(a, start, end, min, max); |
|
485 |
a = deltas; |
|
486 |
start = 0; |
|
487 |
end = deltas.length; |
|
488 |
} |
|
489 |
// The following code is a buffered version of this loop: |
|
490 |
// for (int i = start; i < end; i++) |
|
491 |
// writeTo(out, a[i]); |
|
492 |
byte[] buf = new byte[1<<8]; |
|
493 |
final int bufmax = buf.length-B; |
|
494 |
int[] pos = { 0 }; |
|
495 |
for (int i = start; i < end; ) { |
|
496 |
while (pos[0] <= bufmax) { |
|
497 |
writeTo(buf, pos, a[i++]); |
|
498 |
if (i >= end) break; |
|
499 |
} |
|
500 |
out.write(buf, 0, pos[0]); |
|
501 |
pos[0] = 0; |
|
502 |
} |
|
503 |
} |
|
504 |
||
505 |
/** Tell if the range of this coding (number of distinct |
|
506 |
* representable values) can be expressed in 32 bits. |
|
507 |
*/ |
|
508 |
boolean isSubrange() { |
|
509 |
return max < Integer.MAX_VALUE |
|
510 |
&& ((long)max - (long)min + 1) <= Integer.MAX_VALUE; |
|
511 |
} |
|
512 |
||
513 |
/** Tell if this coding can represent all 32-bit values. |
|
514 |
* Note: Some codings, such as unsigned ones, can be neither |
|
515 |
* subranges nor full-range codings. |
|
516 |
*/ |
|
517 |
boolean isFullRange() { |
|
518 |
return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE; |
|
519 |
} |
|
520 |
||
521 |
/** Return the number of values this coding (a subrange) can represent. */ |
|
522 |
int getRange() { |
|
523 |
assert(isSubrange()); |
|
524 |
return (max - min) + 1; // range includes both min & max |
|
525 |
} |
|
526 |
||
527 |
Coding setB(int B) { return Coding.of(B, H, S, del); } |
|
528 |
Coding setH(int H) { return Coding.of(B, H, S, del); } |
|
529 |
Coding setS(int S) { return Coding.of(B, H, S, del); } |
|
530 |
Coding setL(int L) { return setH(256-L); } |
|
531 |
Coding setD(int del) { return Coding.of(B, H, S, del); } |
|
532 |
Coding getDeltaCoding() { return setD(del+1); } |
|
533 |
||
534 |
/** Return a coding suitable for representing summed, modulo-reduced values. */ |
|
535 |
Coding getValueCoding() { |
|
536 |
if (isDelta()) |
|
537 |
return Coding.of(B, H, 0, del-1); |
|
538 |
else |
|
539 |
return this; |
|
540 |
} |
|
541 |
||
542 |
/** Reduce the given value to be within this coding's unsigned range, |
|
543 |
* by adding or subtracting a multiple of (max-min+1). |
|
544 |
*/ |
|
545 |
int reduceToUnsignedRange(long value) { |
|
546 |
if (value == (int)value && canRepresentUnsigned((int)value)) |
|
547 |
// already in unsigned range |
|
548 |
return (int)value; |
|
549 |
int range = getRange(); |
|
550 |
assert(range > 0); |
|
551 |
value %= range; |
|
552 |
if (value < 0) value += range; |
|
553 |
assert(canRepresentUnsigned((int)value)); |
|
554 |
return (int)value; |
|
555 |
} |
|
556 |
||
557 |
int reduceToSignedRange(int value) { |
|
558 |
if (canRepresentSigned(value)) |
|
559 |
// already in signed range |
|
560 |
return value; |
|
561 |
return reduceToSignedRange(value, min, max); |
|
562 |
} |
|
563 |
static int reduceToSignedRange(int value, int min, int max) { |
|
564 |
int range = (max-min+1); |
|
565 |
assert(range > 0); |
|
566 |
int value0 = value; |
|
567 |
value -= min; |
|
568 |
if (value < 0 && value0 >= 0) { |
|
569 |
// 32-bit overflow, but the next '%=' op needs to be unsigned |
|
570 |
value -= range; |
|
571 |
assert(value >= 0); |
|
572 |
} |
|
573 |
value %= range; |
|
574 |
if (value < 0) value += range; |
|
575 |
value += min; |
|
576 |
assert(min <= value && value <= max); |
|
577 |
return value; |
|
578 |
} |
|
579 |
||
580 |
/** Does this coding support at least one negative value? |
|
581 |
Includes codings that can do so via 32-bit wraparound. |
|
582 |
*/ |
|
583 |
boolean isSigned() { |
|
584 |
return min < 0; |
|
585 |
} |
|
586 |
/** Does this coding code arrays by making successive differences? */ |
|
587 |
boolean isDelta() { |
|
588 |
return del != 0; |
|
589 |
} |
|
590 |
||
591 |
public int B() { return B; } |
|
592 |
public int H() { return H; } |
|
593 |
public int L() { return L; } |
|
594 |
public int S() { return S; } |
|
595 |
public int del() { return del; } |
|
596 |
public int min() { return min; } |
|
597 |
public int max() { return max; } |
|
598 |
public int umin() { return umin; } |
|
599 |
public int umax() { return umax; } |
|
600 |
public int byteMin(int b) { return byteMin[b-1]; } |
|
601 |
public int byteMax(int b) { return byteMax[b-1]; } |
|
602 |
||
603 |
public int compareTo(Object x) { |
|
604 |
Coding that = (Coding) x; |
|
605 |
int dkey = this.del - that.del; |
|
606 |
if (dkey == 0) |
|
607 |
dkey = this.B - that.B; |
|
608 |
if (dkey == 0) |
|
609 |
dkey = this.H - that.H; |
|
610 |
if (dkey == 0) |
|
611 |
dkey = this.S - that.S; |
|
612 |
return dkey; |
|
613 |
} |
|
614 |
||
615 |
/** Heuristic measure of the difference between two codings. */ |
|
616 |
public int distanceFrom(Coding that) { |
|
617 |
int diffdel = this.del - that.del; |
|
618 |
if (diffdel < 0) diffdel = -diffdel; |
|
619 |
int diffS = this.S - that.S; |
|
620 |
if (diffS < 0) diffS = -diffS; |
|
621 |
int diffB = this.B - that.B; |
|
622 |
if (diffB < 0) diffB = -diffB; |
|
623 |
int diffHL; |
|
624 |
if (this.H == that.H) { |
|
625 |
diffHL = 0; |
|
626 |
} else { |
|
627 |
// Distance in log space of H (<=128) and L (<128). |
|
628 |
int thisHL = this.getHL(); |
|
629 |
int thatHL = that.getHL(); |
|
630 |
// Double the accuracy of the log: |
|
631 |
thisHL *= thisHL; |
|
632 |
thatHL *= thatHL; |
|
633 |
if (thisHL > thatHL) |
|
634 |
diffHL = ceil_lg2(1+(thisHL-1)/thatHL); |
|
635 |
else |
|
636 |
diffHL = ceil_lg2(1+(thatHL-1)/thisHL); |
|
637 |
} |
|
638 |
int norm = 5*(diffdel + diffS + diffB) + diffHL; |
|
639 |
assert(norm != 0 || this.compareTo(that) == 0); |
|
640 |
return norm; |
|
641 |
} |
|
642 |
private int getHL() { |
|
643 |
// Follow H in log space by the multiplicative inverse of L. |
|
644 |
if (H <= 128) return H; |
|
645 |
if (L >= 1) return 128*128/L; |
|
646 |
return 128*256; |
|
647 |
} |
|
648 |
||
649 |
/** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */ |
|
650 |
static int ceil_lg2(int x) { |
|
651 |
assert(x-1 >= 0); // x in range (int.MIN_VALUE -> 32) |
|
652 |
x -= 1; |
|
653 |
int lg = 0; |
|
654 |
while (x != 0) { |
|
655 |
lg++; |
|
656 |
x >>= 1; |
|
657 |
} |
|
658 |
return lg; |
|
659 |
} |
|
660 |
||
661 |
static private final byte[] byteBitWidths = new byte[0x100]; |
|
662 |
static { |
|
663 |
for (int b = 0; b < byteBitWidths.length; b++) { |
|
664 |
byteBitWidths[b] = (byte) ceil_lg2(b + 1); |
|
665 |
} |
|
666 |
for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) { |
|
667 |
assert(bitWidth(i) == ceil_lg2(i + 1)); |
|
668 |
} |
|
669 |
} |
|
670 |
||
671 |
/** Number of significant bits in i, not counting sign bits. |
|
672 |
* For positive i, it is ceil_lg2(i + 1). |
|
673 |
*/ |
|
674 |
static int bitWidth(int i) { |
|
675 |
if (i < 0) i = ~i; // change sign |
|
676 |
int w = 0; |
|
677 |
int lo = i; |
|
678 |
if (lo < byteBitWidths.length) |
|
679 |
return byteBitWidths[lo]; |
|
680 |
int hi; |
|
681 |
hi = (lo >>> 16); |
|
682 |
if (hi != 0) { |
|
683 |
lo = hi; |
|
684 |
w += 16; |
|
685 |
} |
|
686 |
hi = (lo >>> 8); |
|
687 |
if (hi != 0) { |
|
688 |
lo = hi; |
|
689 |
w += 8; |
|
690 |
} |
|
691 |
w += byteBitWidths[lo]; |
|
692 |
//assert(w == ceil_lg2(i + 1)); |
|
693 |
return w; |
|
694 |
} |
|
695 |
||
696 |
/** Create an array of successive differences. |
|
697 |
* If min==max, accept any and all 32-bit overflow. |
|
698 |
* Otherwise, avoid 32-bit overflow, and reduce all differences |
|
699 |
* to a value in the given range, by adding or subtracting |
|
700 |
* multiples of the range cardinality (max-min+1). |
|
701 |
* Also, the values are assumed to be in the range [0..(max-min)]. |
|
702 |
*/ |
|
703 |
static int[] makeDeltas(int[] values, int start, int end, |
|
704 |
int min, int max) { |
|
705 |
assert(max >= min); |
|
706 |
int count = end-start; |
|
707 |
int[] deltas = new int[count]; |
|
708 |
int state = 0; |
|
709 |
if (min == max) { |
|
710 |
for (int i = 0; i < count; i++) { |
|
711 |
int value = values[start+i]; |
|
712 |
deltas[i] = value - state; |
|
713 |
state = value; |
|
714 |
} |
|
715 |
} else { |
|
716 |
for (int i = 0; i < count; i++) { |
|
717 |
int value = values[start+i]; |
|
718 |
assert(value >= 0 && value+min <= max); |
|
719 |
int delta = value - state; |
|
720 |
assert(delta == (long)value - (long)state); // no overflow |
|
721 |
state = value; |
|
722 |
// Reduce delta values to the required range. |
|
723 |
delta = reduceToSignedRange(delta, min, max); |
|
724 |
deltas[i] = delta; |
|
725 |
} |
|
726 |
} |
|
727 |
return deltas; |
|
728 |
} |
|
729 |
||
730 |
boolean canRepresent(int minValue, int maxValue) { |
|
731 |
assert(minValue <= maxValue); |
|
732 |
if (del > 0) { |
|
733 |
if (isSubrange()) { |
|
734 |
// We will force the values to reduce to the right subrange. |
|
735 |
return canRepresentUnsigned(maxValue) |
|
736 |
&& canRepresentUnsigned(minValue); |
|
737 |
} else { |
|
738 |
// Huge range; delta values must assume full 32-bit range. |
|
739 |
return isFullRange(); |
|
740 |
} |
|
741 |
} |
|
742 |
else |
|
743 |
// final values must be representable |
|
744 |
return canRepresentSigned(maxValue) |
|
745 |
&& canRepresentSigned(minValue); |
|
746 |
} |
|
747 |
||
748 |
boolean canRepresent(int[] values, int start, int end) { |
|
749 |
int len = end-start; |
|
750 |
if (len == 0) return true; |
|
751 |
if (isFullRange()) return true; |
|
752 |
// Calculate max, min: |
|
753 |
int max = values[start]; |
|
754 |
int min = max; |
|
755 |
for (int i = 1; i < len; i++) { |
|
756 |
int value = values[start+i]; |
|
757 |
if (max < value) max = value; |
|
758 |
if (min > value) min = value; |
|
759 |
} |
|
760 |
return canRepresent(min, max); |
|
761 |
} |
|
762 |
||
763 |
public double getBitLength(int value) { // implements BitMetric |
|
764 |
return (double) getLength(value) * 8; |
|
765 |
} |
|
766 |
||
767 |
/** How many bytes are in the coding of this value? |
|
768 |
* Returns Integer.MAX_VALUE if the value has no coding. |
|
769 |
* The coding must not be a delta coding, since there is no |
|
770 |
* definite size for a single value apart from its context. |
|
771 |
*/ |
|
772 |
public int getLength(int value) { |
|
773 |
if (isDelta() && isSubrange()) { |
|
774 |
if (!canRepresentUnsigned(value)) |
|
775 |
return Integer.MAX_VALUE; |
|
776 |
value = reduceToSignedRange(value); |
|
777 |
} |
|
778 |
if (value >= 0) { |
|
779 |
for (int n = 0; n < B; n++) { |
|
780 |
if (value <= byteMax[n]) return n+1; |
|
781 |
} |
|
782 |
} else { |
|
783 |
for (int n = 0; n < B; n++) { |
|
784 |
if (value >= byteMin[n]) return n+1; |
|
785 |
} |
|
786 |
} |
|
787 |
return Integer.MAX_VALUE; |
|
788 |
} |
|
789 |
||
790 |
public int getLength(int[] values, int start, int end) { |
|
791 |
int len = end-start; |
|
792 |
if (B == 1) return len; |
|
793 |
if (L == 0) return len * B; |
|
794 |
if (isDelta()) { |
|
795 |
int[] deltas; |
|
796 |
if (!isSubrange()) |
|
797 |
deltas = makeDeltas(values, start, end, 0, 0); |
|
798 |
else |
|
799 |
deltas = makeDeltas(values, start, end, min, max); |
|
800 |
//return Coding.of(B, H, S).getLength(deltas, 0, len); |
|
801 |
values = deltas; |
|
802 |
start = 0; |
|
803 |
end = values.length; |
|
804 |
} |
|
805 |
int sum = len; // at least 1 byte per |
|
806 |
// add extra bytes for extra-long values |
|
807 |
for (int n = 1; n <= B; n++) { |
|
808 |
// what is the coding interval [min..max] for n bytes? |
|
809 |
int max = byteMax[n-1]; |
|
810 |
int min = byteMin[n-1]; |
|
811 |
int longer = 0; // count of guys longer than n bytes |
|
812 |
for (int i = 0; i < len; i++) { |
|
813 |
int value = values[start+i]; |
|
814 |
if (value >= 0) { |
|
815 |
if (value > max) longer++; |
|
816 |
} else { |
|
817 |
if (value < min) longer++; |
|
818 |
} |
|
819 |
} |
|
820 |
if (longer == 0) break; // no more passes needed |
|
821 |
if (n == B) return Integer.MAX_VALUE; // cannot represent! |
|
822 |
sum += longer; |
|
823 |
} |
|
824 |
return sum; |
|
825 |
} |
|
826 |
||
827 |
public byte[] getMetaCoding(Coding dflt) { |
|
828 |
if (dflt == this) return new byte[]{ (byte) _meta_default }; |
|
829 |
int canonicalIndex = BandStructure.indexOf(this); |
|
830 |
if (canonicalIndex > 0) |
|
831 |
return new byte[]{ (byte) canonicalIndex }; |
|
832 |
return new byte[]{ |
|
833 |
(byte)_meta_arb, |
|
834 |
(byte)(del + 2*S + 8*(B-1)), |
|
835 |
(byte)(H-1) |
|
836 |
}; |
|
837 |
} |
|
838 |
public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) { |
|
839 |
int op = bytes[pos++] & 0xFF; |
|
840 |
if (_meta_canon_min <= op && op <= _meta_canon_max) { |
|
841 |
Coding c = BandStructure.codingForIndex(op); |
|
842 |
assert(c != null); |
|
843 |
res[0] = c; |
|
844 |
return pos; |
|
845 |
} |
|
846 |
if (op == _meta_arb) { |
|
847 |
int dsb = bytes[pos++] & 0xFF; |
|
848 |
int H_1 = bytes[pos++] & 0xFF; |
|
849 |
int del = dsb % 2; |
|
850 |
int S = (dsb / 2) % 4; |
|
851 |
int B = (dsb / 8)+1; |
|
852 |
int H = H_1+1; |
|
853 |
if (!((1 <= B && B <= B_MAX) && |
|
854 |
(0 <= S && S <= S_MAX) && |
|
855 |
(1 <= H && H <= H_MAX) && |
|
856 |
(0 <= del && del <= 1)) |
|
857 |
|| (B == 1 && H != 256) |
|
858 |
|| (B == 5 && H == 256)) { |
|
859 |
throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del); |
|
860 |
} |
|
861 |
res[0] = Coding.of(B, H, S, del); |
|
862 |
return pos; |
|
863 |
} |
|
864 |
return pos-1; // backup |
|
865 |
} |
|
866 |
||
867 |
||
868 |
public String keyString() { |
|
869 |
return "("+B+","+H+","+S+","+del+")"; |
|
870 |
} |
|
871 |
||
872 |
public String toString() { |
|
873 |
String str = "Coding"+keyString(); |
|
874 |
// If -ea, print out more informative strings! |
|
875 |
//assert((str = stringForDebug()) != null); |
|
876 |
return str; |
|
877 |
} |
|
878 |
||
879 |
static boolean verboseStringForDebug = false; |
|
880 |
String stringForDebug() { |
|
881 |
String minS = (min == Integer.MIN_VALUE ? "min" : ""+min); |
|
882 |
String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max); |
|
883 |
String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]"; |
|
884 |
if (isSubrange()) |
|
885 |
str += " subrange"; |
|
886 |
else if (!isFullRange()) |
|
887 |
str += " MIDRANGE"; |
|
888 |
if (verboseStringForDebug) { |
|
889 |
str += " {"; |
|
890 |
int prev_range = 0; |
|
891 |
for (int n = 1; n <= B; n++) { |
|
892 |
int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1); |
|
893 |
assert(range_n == saturate32(codeRangeLong(B, H, n))); |
|
894 |
range_n -= prev_range; |
|
895 |
prev_range = range_n; |
|
896 |
String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n); |
|
897 |
str += " #"+n+"="+rngS; |
|
898 |
} |
|
899 |
str += " }"; |
|
900 |
} |
|
901 |
return str; |
|
902 |
} |
|
903 |
} |