1 /* |
|
2 * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 // -*- C++ -*- |
|
27 // Small program for unpacking specially compressed Java packages. |
|
28 // John R. Rose |
|
29 |
|
30 #include <stdio.h> |
|
31 #include <string.h> |
|
32 #include <stdlib.h> |
|
33 #include <stdarg.h> |
|
34 |
|
35 #include "jni_util.h" |
|
36 |
|
37 #include "defines.h" |
|
38 #include "bytes.h" |
|
39 #include "utils.h" |
|
40 #include "coding.h" |
|
41 |
|
42 #include "constants.h" |
|
43 #include "unpack.h" |
|
44 |
|
45 extern coding basic_codings[]; |
|
46 |
|
47 #define CODING_PRIVATE(spec) \ |
|
48 int spec_ = spec; \ |
|
49 int B = CODING_B(spec_); \ |
|
50 int H = CODING_H(spec_); \ |
|
51 int L = 256 - H; \ |
|
52 int S = CODING_S(spec_); \ |
|
53 int D = CODING_D(spec_) |
|
54 |
|
55 #define IS_NEG_CODE(S, codeVal) \ |
|
56 ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 ) |
|
57 |
|
58 #define DECODE_SIGN_S1(ux) \ |
|
59 ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) ) |
|
60 |
|
61 static maybe_inline |
|
62 int decode_sign(int S, uint ux) { // == Coding.decodeSign32 |
|
63 assert(S > 0); |
|
64 uint sigbits = (ux >> S); |
|
65 if (IS_NEG_CODE(S, ux)) |
|
66 return (int)( ~sigbits); |
|
67 else |
|
68 return (int)(ux - sigbits); |
|
69 // Note that (int)(ux-sigbits) can be negative, if ux is large enough. |
|
70 } |
|
71 |
|
72 coding* coding::init() { |
|
73 if (umax > 0) return this; // already done |
|
74 assert(spec != 0); // sanity |
|
75 |
|
76 // fill in derived fields |
|
77 CODING_PRIVATE(spec); |
|
78 |
|
79 // Return null if 'arb(BHSD)' parameter constraints are not met: |
|
80 if (B < 1 || B > B_MAX) return null; |
|
81 if (H < 1 || H > 256) return null; |
|
82 if (S < 0 || S > 2) return null; |
|
83 if (D < 0 || D > 1) return null; |
|
84 if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size |
|
85 if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding |
|
86 |
|
87 // first compute the range of the coding, in 64 bits |
|
88 jlong range = 0; |
|
89 { |
|
90 jlong H_i = 1; |
|
91 for (int i = 0; i < B; i++) { |
|
92 range += H_i; |
|
93 H_i *= H; |
|
94 } |
|
95 range *= L; |
|
96 range += H_i; |
|
97 } |
|
98 assert(range > 0); // no useless codings, please |
|
99 |
|
100 int this_umax; |
|
101 |
|
102 // now, compute min and max |
|
103 if (range >= ((jlong)1 << 32)) { |
|
104 this_umax = INT_MAX_VALUE; |
|
105 this->umin = INT_MIN_VALUE; |
|
106 this->max = INT_MAX_VALUE; |
|
107 this->min = INT_MIN_VALUE; |
|
108 } else { |
|
109 this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1; |
|
110 this->max = this_umax; |
|
111 this->min = this->umin = 0; |
|
112 if (S != 0 && range != 0) { |
|
113 int Smask = (1<<S)-1; |
|
114 jlong maxPosCode = range-1; |
|
115 jlong maxNegCode = range-1; |
|
116 while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode; |
|
117 while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode; |
|
118 int maxPos = decode_sign(S, (uint)maxPosCode); |
|
119 if (maxPos < 0) |
|
120 this->max = INT_MAX_VALUE; // 32-bit wraparound |
|
121 else |
|
122 this->max = maxPos; |
|
123 if (maxNegCode < 0) |
|
124 this->min = 0; // No negative codings at all. |
|
125 else |
|
126 this->min = decode_sign(S, (uint)maxNegCode); |
|
127 } |
|
128 } |
|
129 |
|
130 assert(!(isFullRange | isSigned | isSubrange)); // init |
|
131 if (min < 0) |
|
132 this->isSigned = true; |
|
133 if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) |
|
134 this->isSubrange = true; |
|
135 if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) |
|
136 this->isFullRange = true; |
|
137 |
|
138 // do this last, to reduce MT exposure (should have a membar too) |
|
139 this->umax = this_umax; |
|
140 |
|
141 return this; |
|
142 } |
|
143 |
|
144 coding* coding::findBySpec(int spec) { |
|
145 for (coding* scan = &basic_codings[0]; ; scan++) { |
|
146 if (scan->spec == spec) |
|
147 return scan->init(); |
|
148 if (scan->spec == 0) |
|
149 break; |
|
150 } |
|
151 coding* ptr = NEW(coding, 1); |
|
152 CHECK_NULL_RETURN(ptr, 0); |
|
153 coding* c = ptr->initFrom(spec); |
|
154 if (c == null) { |
|
155 mtrace('f', ptr, 0); |
|
156 ::free(ptr); |
|
157 } else |
|
158 // else caller should free it... |
|
159 c->isMalloc = true; |
|
160 return c; |
|
161 } |
|
162 |
|
163 coding* coding::findBySpec(int B, int H, int S, int D) { |
|
164 if (B < 1 || B > B_MAX) return null; |
|
165 if (H < 1 || H > 256) return null; |
|
166 if (S < 0 || S > 2) return null; |
|
167 if (D < 0 || D > 1) return null; |
|
168 return findBySpec(CODING_SPEC(B, H, S, D)); |
|
169 } |
|
170 |
|
171 void coding::free() { |
|
172 if (isMalloc) { |
|
173 mtrace('f', this, 0); |
|
174 ::free(this); |
|
175 } |
|
176 } |
|
177 |
|
178 void coding_method::reset(value_stream* state) { |
|
179 assert(state->rp == state->rplimit); // not in mid-stream, please |
|
180 //assert(this == vs0.cm); |
|
181 state[0] = vs0; |
|
182 if (uValues != null) { |
|
183 uValues->reset(state->helper()); |
|
184 } |
|
185 } |
|
186 |
|
187 maybe_inline |
|
188 uint coding::parse(byte* &rp, int B, int H) { |
|
189 int L = 256-H; |
|
190 byte* ptr = rp; |
|
191 // hand peel the i==0 part of the loop: |
|
192 uint b_i = *ptr++ & 0xFF; |
|
193 if (B == 1 || b_i < (uint)L) |
|
194 { rp = ptr; return b_i; } |
|
195 uint sum = b_i; |
|
196 uint H_i = H; |
|
197 assert(B <= B_MAX); |
|
198 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired |
|
199 b_i = *ptr++ & 0xFF; |
|
200 sum += b_i * H_i; |
|
201 if (i == B || b_i < (uint)L) |
|
202 { rp = ptr; return sum; } |
|
203 H_i *= H; |
|
204 } |
|
205 assert(false); |
|
206 return 0; |
|
207 } |
|
208 |
|
209 maybe_inline |
|
210 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) { |
|
211 assert(H == (1<<lgH)); |
|
212 int L = 256-(1<<lgH); |
|
213 byte* ptr = rp; |
|
214 // hand peel the i==0 part of the loop: |
|
215 uint b_i = *ptr++ & 0xFF; |
|
216 if (B == 1 || b_i < (uint)L) |
|
217 { rp = ptr; return b_i; } |
|
218 uint sum = b_i; |
|
219 uint lg_H_i = lgH; |
|
220 assert(B <= B_MAX); |
|
221 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired |
|
222 b_i = *ptr++ & 0xFF; |
|
223 sum += b_i << lg_H_i; |
|
224 if (i == B || b_i < (uint)L) |
|
225 { rp = ptr; return sum; } |
|
226 lg_H_i += lgH; |
|
227 } |
|
228 assert(false); |
|
229 return 0; |
|
230 } |
|
231 |
|
232 static const char ERB[] = "EOF reading band"; |
|
233 |
|
234 maybe_inline |
|
235 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) { |
|
236 if (N < 0) { |
|
237 abort("bad value count"); |
|
238 return; |
|
239 } |
|
240 byte* ptr = rp; |
|
241 if (B == 1 || H == 256) { |
|
242 size_t len = (size_t)N*B; |
|
243 if (len / B != (size_t)N || ptr+len > limit) { |
|
244 abort(ERB); |
|
245 return; |
|
246 } |
|
247 rp = ptr+len; |
|
248 return; |
|
249 } |
|
250 // Note: We assume rp has enough zero-padding. |
|
251 int L = 256-H; |
|
252 int n = B; |
|
253 while (N > 0) { |
|
254 ptr += 1; |
|
255 if (--n == 0) { |
|
256 // end of encoding at B bytes, regardless of byte value |
|
257 } else { |
|
258 int b = (ptr[-1] & 0xFF); |
|
259 if (b >= L) { |
|
260 // keep going, unless we find a byte < L |
|
261 continue; |
|
262 } |
|
263 } |
|
264 // found the last byte |
|
265 N -= 1; |
|
266 n = B; // reset length counter |
|
267 // do an error check here |
|
268 if (ptr > limit) { |
|
269 abort(ERB); |
|
270 return; |
|
271 } |
|
272 } |
|
273 rp = ptr; |
|
274 return; |
|
275 } |
|
276 |
|
277 bool value_stream::hasHelper() { |
|
278 // If my coding method is a pop-style method, |
|
279 // then I need a second value stream to transmit |
|
280 // unfavored values. |
|
281 // This can be determined by examining fValues. |
|
282 return cm->fValues != null; |
|
283 } |
|
284 |
|
285 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) { |
|
286 rp = rp_; |
|
287 rplimit = rplimit_; |
|
288 sum = 0; |
|
289 cm = null; // no need in the simple case |
|
290 setCoding(defc); |
|
291 } |
|
292 |
|
293 void value_stream::setCoding(coding* defc) { |
|
294 if (defc == null) { |
|
295 unpack_abort("bad coding"); |
|
296 defc = coding::findByIndex(_meta_canon_min); // random pick for recovery |
|
297 } |
|
298 |
|
299 c = (*defc); |
|
300 |
|
301 // choose cmk |
|
302 cmk = cmk_ERROR; |
|
303 switch (c.spec) { |
|
304 case BYTE1_spec: cmk = cmk_BYTE1; break; |
|
305 case CHAR3_spec: cmk = cmk_CHAR3; break; |
|
306 case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break; |
|
307 case DELTA5_spec: cmk = cmk_DELTA5; break; |
|
308 case BCI5_spec: cmk = cmk_BCI5; break; |
|
309 case BRANCH5_spec: cmk = cmk_BRANCH5; break; |
|
310 default: |
|
311 if (c.D() == 0) { |
|
312 switch (c.S()) { |
|
313 case 0: cmk = cmk_BHS0; break; |
|
314 case 1: cmk = cmk_BHS1; break; |
|
315 default: cmk = cmk_BHS; break; |
|
316 } |
|
317 } else { |
|
318 if (c.S() == 1) { |
|
319 if (c.isFullRange) cmk = cmk_BHS1D1full; |
|
320 if (c.isSubrange) cmk = cmk_BHS1D1sub; |
|
321 } |
|
322 if (cmk == cmk_ERROR) cmk = cmk_BHSD1; |
|
323 } |
|
324 } |
|
325 } |
|
326 |
|
327 static maybe_inline |
|
328 int getPopValue(value_stream* self, uint uval) { |
|
329 if (uval > 0) { |
|
330 // note that the initial parse performed a range check |
|
331 assert(uval <= (uint)self->cm->fVlength); |
|
332 return self->cm->fValues[uval-1]; |
|
333 } else { |
|
334 // take an unfavored value |
|
335 return self->helper()->getInt(); |
|
336 } |
|
337 } |
|
338 |
|
339 maybe_inline |
|
340 int coding::sumInUnsignedRange(int x, int y) { |
|
341 assert(isSubrange); |
|
342 int range = (int)(umax+1); |
|
343 assert(range > 0); |
|
344 x += y; |
|
345 if (x != (int)((jlong)(x-y) + (jlong)y)) { |
|
346 // 32-bit overflow interferes with range reduction. |
|
347 // Back off from the overflow by adding a multiple of range: |
|
348 if (x < 0) { |
|
349 x -= range; |
|
350 assert(x >= 0); |
|
351 } else { |
|
352 x += range; |
|
353 assert(x < 0); |
|
354 } |
|
355 } |
|
356 if (x < 0) { |
|
357 x += range; |
|
358 if (x >= 0) return x; |
|
359 } else if (x >= range) { |
|
360 x -= range; |
|
361 if (x < range) return x; |
|
362 } else { |
|
363 // in range |
|
364 return x; |
|
365 } |
|
366 // do it the hard way |
|
367 x %= range; |
|
368 if (x < 0) x += range; |
|
369 return x; |
|
370 } |
|
371 |
|
372 static maybe_inline |
|
373 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) { |
|
374 assert((uint)(self->c.isSubrange) == (uint)isSubrange); |
|
375 assert(self->c.isSubrange | self->c.isFullRange); |
|
376 if (isSubrange) |
|
377 return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); |
|
378 else |
|
379 return self->sum += (int) uval; |
|
380 } |
|
381 |
|
382 bool value_stream::hasValue() { |
|
383 if (rp < rplimit) return true; |
|
384 if (cm == null) return false; |
|
385 if (cm->next == null) return false; |
|
386 cm->next->reset(this); |
|
387 return hasValue(); |
|
388 } |
|
389 |
|
390 int value_stream::getInt() { |
|
391 if (rp >= rplimit) { |
|
392 // Advance to next coding segment. |
|
393 if (rp > rplimit || cm == null || cm->next == null) { |
|
394 // Must perform this check and throw an exception on bad input. |
|
395 unpack_abort(ERB); |
|
396 return 0; |
|
397 } |
|
398 cm->next->reset(this); |
|
399 return getInt(); |
|
400 } |
|
401 |
|
402 CODING_PRIVATE(c.spec); |
|
403 uint uval; |
|
404 enum { |
|
405 B5 = 5, |
|
406 B3 = 3, |
|
407 H128 = 128, |
|
408 H64 = 64, |
|
409 H4 = 4 |
|
410 }; |
|
411 switch (cmk) { |
|
412 case cmk_BHS: |
|
413 assert(D == 0); |
|
414 uval = coding::parse(rp, B, H); |
|
415 if (S == 0) |
|
416 return (int) uval; |
|
417 return decode_sign(S, uval); |
|
418 |
|
419 case cmk_BHS0: |
|
420 assert(S == 0 && D == 0); |
|
421 uval = coding::parse(rp, B, H); |
|
422 return (int) uval; |
|
423 |
|
424 case cmk_BHS1: |
|
425 assert(S == 1 && D == 0); |
|
426 uval = coding::parse(rp, B, H); |
|
427 return DECODE_SIGN_S1(uval); |
|
428 |
|
429 case cmk_BYTE1: |
|
430 assert(c.spec == BYTE1_spec); |
|
431 assert(B == 1 && H == 256 && S == 0 && D == 0); |
|
432 return *rp++ & 0xFF; |
|
433 |
|
434 case cmk_CHAR3: |
|
435 assert(c.spec == CHAR3_spec); |
|
436 assert(B == B3 && H == H128 && S == 0 && D == 0); |
|
437 return coding::parse_lgH(rp, B3, H128, 7); |
|
438 |
|
439 case cmk_UNSIGNED5: |
|
440 assert(c.spec == UNSIGNED5_spec); |
|
441 assert(B == B5 && H == H64 && S == 0 && D == 0); |
|
442 return coding::parse_lgH(rp, B5, H64, 6); |
|
443 |
|
444 case cmk_BHSD1: |
|
445 assert(D == 1); |
|
446 uval = coding::parse(rp, B, H); |
|
447 if (S != 0) |
|
448 uval = (uint) decode_sign(S, uval); |
|
449 return getDeltaValue(this, uval, (bool)c.isSubrange); |
|
450 |
|
451 case cmk_BHS1D1full: |
|
452 assert(S == 1 && D == 1 && c.isFullRange); |
|
453 uval = coding::parse(rp, B, H); |
|
454 uval = (uint) DECODE_SIGN_S1(uval); |
|
455 return getDeltaValue(this, uval, false); |
|
456 |
|
457 case cmk_BHS1D1sub: |
|
458 assert(S == 1 && D == 1 && c.isSubrange); |
|
459 uval = coding::parse(rp, B, H); |
|
460 uval = (uint) DECODE_SIGN_S1(uval); |
|
461 return getDeltaValue(this, uval, true); |
|
462 |
|
463 case cmk_DELTA5: |
|
464 assert(c.spec == DELTA5_spec); |
|
465 assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); |
|
466 uval = coding::parse_lgH(rp, B5, H64, 6); |
|
467 sum += DECODE_SIGN_S1(uval); |
|
468 return sum; |
|
469 |
|
470 case cmk_BCI5: |
|
471 assert(c.spec == BCI5_spec); |
|
472 assert(B == B5 && H == H4 && S == 0 && D == 0); |
|
473 return coding::parse_lgH(rp, B5, H4, 2); |
|
474 |
|
475 case cmk_BRANCH5: |
|
476 assert(c.spec == BRANCH5_spec); |
|
477 assert(B == B5 && H == H4 && S == 2 && D == 0); |
|
478 uval = coding::parse_lgH(rp, B5, H4, 2); |
|
479 return decode_sign(S, uval); |
|
480 |
|
481 case cmk_pop: |
|
482 uval = coding::parse(rp, B, H); |
|
483 if (S != 0) { |
|
484 uval = (uint) decode_sign(S, uval); |
|
485 } |
|
486 if (D != 0) { |
|
487 assert(c.isSubrange | c.isFullRange); |
|
488 if (c.isSubrange) |
|
489 sum = c.sumInUnsignedRange(sum, (int) uval); |
|
490 else |
|
491 sum += (int) uval; |
|
492 uval = (uint) sum; |
|
493 } |
|
494 return getPopValue(this, uval); |
|
495 |
|
496 case cmk_pop_BHS0: |
|
497 assert(S == 0 && D == 0); |
|
498 uval = coding::parse(rp, B, H); |
|
499 return getPopValue(this, uval); |
|
500 |
|
501 case cmk_pop_BYTE1: |
|
502 assert(c.spec == BYTE1_spec); |
|
503 assert(B == 1 && H == 256 && S == 0 && D == 0); |
|
504 return getPopValue(this, *rp++ & 0xFF); |
|
505 |
|
506 default: |
|
507 break; |
|
508 } |
|
509 assert(false); |
|
510 return 0; |
|
511 } |
|
512 |
|
513 static maybe_inline |
|
514 int moreCentral(int x, int y) { // used to find end of Pop.{F} |
|
515 // Suggested implementation from the Pack200 specification: |
|
516 uint kx = (x >> 31) ^ (x << 1); |
|
517 uint ky = (y >> 31) ^ (y << 1); |
|
518 return (kx < ky? x: y); |
|
519 } |
|
520 //static maybe_inline |
|
521 //int moreCentral2(int x, int y, int min) { |
|
522 // // Strict implementation of buggy 150.7 specification. |
|
523 // // The bug is that the spec. says absolute-value ties are broken |
|
524 // // in favor of positive numbers, but the suggested implementation |
|
525 // // (also mentioned in the spec.) breaks ties in favor of negative numbers. |
|
526 // if ((x + y) != 0) |
|
527 // return min; |
|
528 // else |
|
529 // // return the other value, which breaks a tie in the positive direction |
|
530 // return (x > y)? x: y; |
|
531 //} |
|
532 |
|
533 static const byte* no_meta[] = {null}; |
|
534 #define NO_META (*(byte**)no_meta) |
|
535 enum { POP_FAVORED_N = -2 }; |
|
536 |
|
537 // mode bits |
|
538 #define DISABLE_RUN 1 // used immediately inside ACodee |
|
539 #define DISABLE_POP 2 // used recursively in all pop sub-bands |
|
540 |
|
541 // This function knows all about meta-coding. |
|
542 void coding_method::init(byte* &band_rp, byte* band_limit, |
|
543 byte* &meta_rp, int mode, |
|
544 coding* defc, int N, |
|
545 intlist* valueSink) { |
|
546 assert(N != 0); |
|
547 |
|
548 assert(u != null); // must be pre-initialized |
|
549 //if (u == null) u = unpacker::current(); // expensive |
|
550 |
|
551 int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF); |
|
552 coding* foundc = null; |
|
553 coding* to_free = null; |
|
554 |
|
555 if (op == _meta_default) { |
|
556 foundc = defc; |
|
557 // and fall through |
|
558 |
|
559 } else if (op >= _meta_canon_min && op <= _meta_canon_max) { |
|
560 foundc = coding::findByIndex(op); |
|
561 // and fall through |
|
562 |
|
563 } else if (op == _meta_arb) { |
|
564 int args = (*meta_rp++ & 0xFF); |
|
565 // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) |
|
566 int D = ((args >> 0) & 1); |
|
567 int S = ((args >> 1) & 3); |
|
568 int B = ((args >> 3) & -1) + 1; |
|
569 // & (H[1..256]-1) |
|
570 int H = (*meta_rp++ & 0xFF) + 1; |
|
571 foundc = coding::findBySpec(B, H, S, D); |
|
572 to_free = foundc; // findBySpec may dynamically allocate |
|
573 if (foundc == null) { |
|
574 abort("illegal arb. coding"); |
|
575 return; |
|
576 } |
|
577 // and fall through |
|
578 |
|
579 } else if (op >= _meta_run && op < _meta_pop) { |
|
580 int args = (op - _meta_run); |
|
581 // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) |
|
582 int KX = ((args >> 0) & 3); |
|
583 int KBFlag = ((args >> 2) & 1); |
|
584 int ABDef = ((args >> 3) & -1); |
|
585 assert(ABDef <= 2); |
|
586 // & KB: one of [0..255] if KBFlag=1 |
|
587 int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF)); |
|
588 int K = (KB+1) << (KX * 4); |
|
589 int N2 = (N >= 0) ? N-K : N; |
|
590 if (N == 0 || (N2 <= 0 && N2 != N)) { |
|
591 abort("illegal run encoding"); |
|
592 return; |
|
593 } |
|
594 if ((mode & DISABLE_RUN) != 0) { |
|
595 abort("illegal nested run encoding"); |
|
596 return; |
|
597 } |
|
598 |
|
599 // & Enc{ ACode } if ADef=0 (ABDef != 1) |
|
600 // No direct nesting of 'run' in ACode, but in BCode it's OK. |
|
601 int disRun = mode | DISABLE_RUN; |
|
602 if (ABDef == 1) { |
|
603 this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); |
|
604 } else { |
|
605 this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); |
|
606 } |
|
607 CHECK; |
|
608 |
|
609 // & Enc{ BCode } if BDef=0 (ABDef != 2) |
|
610 coding_method* tail = U_NEW(coding_method, 1); |
|
611 CHECK_NULL(tail); |
|
612 tail->u = u; |
|
613 |
|
614 // The 'run' codings may be nested indirectly via 'pop' codings. |
|
615 // This means that this->next may already be filled in, if |
|
616 // ACode was of type 'pop' with a 'run' token coding. |
|
617 // No problem: Just chain the upcoming BCode onto the end. |
|
618 for (coding_method* self = this; ; self = self->next) { |
|
619 if (self->next == null) { |
|
620 self->next = tail; |
|
621 break; |
|
622 } |
|
623 } |
|
624 |
|
625 if (ABDef == 2) { |
|
626 tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); |
|
627 } else { |
|
628 tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); |
|
629 } |
|
630 // Note: The preceding calls to init should be tail-recursive. |
|
631 |
|
632 return; // done; no falling through |
|
633 |
|
634 } else if (op >= _meta_pop && op < _meta_limit) { |
|
635 int args = (op - _meta_pop); |
|
636 // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) |
|
637 int FDef = ((args >> 0) & 1); |
|
638 int UDef = ((args >> 1) & 1); |
|
639 int TDefL = ((args >> 2) & -1); |
|
640 assert(TDefL <= 11); |
|
641 int TDef = (TDefL > 0); |
|
642 int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL))); |
|
643 int TH = (256-TL); |
|
644 if (N <= 0) { |
|
645 abort("illegal pop encoding"); |
|
646 return; |
|
647 } |
|
648 if ((mode & DISABLE_POP) != 0) { |
|
649 abort("illegal nested pop encoding"); |
|
650 return; |
|
651 } |
|
652 |
|
653 // No indirect nesting of 'pop', but 'run' is OK. |
|
654 int disPop = DISABLE_POP; |
|
655 |
|
656 // & Enc{ FCode } if FDef=0 |
|
657 int FN = POP_FAVORED_N; |
|
658 assert(valueSink == null); |
|
659 intlist fValueSink; fValueSink.init(); |
|
660 coding_method fval; |
|
661 BYTES_OF(fval).clear(); fval.u = u; |
|
662 if (FDef != 0) { |
|
663 fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); |
|
664 } else { |
|
665 fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); |
|
666 } |
|
667 bytes fvbuf; |
|
668 fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr); |
|
669 fVlength = fValueSink.length(); // i.e., the parameter K |
|
670 fValueSink.free(); |
|
671 CHECK; |
|
672 |
|
673 // Skip the first {F} run in all subsequent passes. |
|
674 // The next call to this->init(...) will set vs0.rp to point after the {F}. |
|
675 |
|
676 // & Enc{ TCode } if TDef=0 (TDefL==0) |
|
677 if (TDef != 0) { |
|
678 coding* tcode = coding::findBySpec(1, 256); // BYTE1 |
|
679 // find the most narrowly sufficient code: |
|
680 for (int B = 2; B <= B_MAX; B++) { |
|
681 if (fVlength <= tcode->umax) break; // found it |
|
682 tcode->free(); |
|
683 tcode = coding::findBySpec(B, TH); |
|
684 CHECK_NULL(tcode); |
|
685 } |
|
686 if (!(fVlength <= tcode->umax)) { |
|
687 abort("pop.L value too small"); |
|
688 return; |
|
689 } |
|
690 this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null); |
|
691 tcode->free(); |
|
692 } else { |
|
693 this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null); |
|
694 } |
|
695 CHECK; |
|
696 |
|
697 // Count the number of zero tokens right now. |
|
698 // Also verify that they are in bounds. |
|
699 int UN = 0; // one {U} for each zero in {T} |
|
700 value_stream vs = vs0; |
|
701 for (int i = 0; i < N; i++) { |
|
702 uint val = vs.getInt(); |
|
703 if (val == 0) UN += 1; |
|
704 if (!(val <= (uint)fVlength)) { |
|
705 abort("pop token out of range"); |
|
706 return; |
|
707 } |
|
708 } |
|
709 vs.done(); |
|
710 |
|
711 // & Enc{ UCode } if UDef=0 |
|
712 if (UN != 0) { |
|
713 uValues = U_NEW(coding_method, 1); |
|
714 CHECK_NULL(uValues); |
|
715 uValues->u = u; |
|
716 if (UDef != 0) { |
|
717 uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null); |
|
718 } else { |
|
719 uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null); |
|
720 } |
|
721 } else { |
|
722 if (UDef == 0) { |
|
723 int uop = (*meta_rp++ & 0xFF); |
|
724 if (uop > _meta_canon_max) |
|
725 // %%% Spec. requires the more strict (uop != _meta_default). |
|
726 abort("bad meta-coding for empty pop/U"); |
|
727 } |
|
728 } |
|
729 |
|
730 // Bug fix for 6259542 |
|
731 // Last of all, adjust vs0.cmk to the 'pop' flavor |
|
732 for (coding_method* self = this; self != null; self = self->next) { |
|
733 coding_method_kind cmk2 = cmk_pop; |
|
734 switch (self->vs0.cmk) { |
|
735 case cmk_BHS0: cmk2 = cmk_pop_BHS0; break; |
|
736 case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break; |
|
737 default: break; |
|
738 } |
|
739 self->vs0.cmk = cmk2; |
|
740 if (self != this) { |
|
741 assert(self->fValues == null); // no double init |
|
742 self->fValues = this->fValues; |
|
743 self->fVlength = this->fVlength; |
|
744 assert(self->uValues == null); // must stay null |
|
745 } |
|
746 } |
|
747 |
|
748 return; // done; no falling through |
|
749 |
|
750 } else { |
|
751 abort("bad meta-coding"); |
|
752 return; |
|
753 } |
|
754 |
|
755 // Common code here skips a series of values with one coding. |
|
756 assert(foundc != null); |
|
757 |
|
758 assert(vs0.cmk == cmk_ERROR); // no garbage, please |
|
759 assert(vs0.rp == null); // no garbage, please |
|
760 assert(vs0.rplimit == null); // no garbage, please |
|
761 assert(vs0.sum == 0); // no garbage, please |
|
762 |
|
763 vs0.init(band_rp, band_limit, foundc); |
|
764 |
|
765 // Done with foundc. Free if necessary. |
|
766 if (to_free != null) { |
|
767 to_free->free(); |
|
768 to_free = null; |
|
769 } |
|
770 foundc = null; |
|
771 |
|
772 coding& c = vs0.c; |
|
773 CODING_PRIVATE(c.spec); |
|
774 // assert sane N |
|
775 assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); |
|
776 |
|
777 // Look at the values, or at least skip over them quickly. |
|
778 if (valueSink == null) { |
|
779 // Skip and ignore values in the first pass. |
|
780 c.parseMultiple(band_rp, N, band_limit, B, H); |
|
781 } else if (N >= 0) { |
|
782 // Pop coding, {F} sequence, initial run of values... |
|
783 assert((mode & DISABLE_POP) != 0); |
|
784 value_stream vs = vs0; |
|
785 for (int n = 0; n < N; n++) { |
|
786 int val = vs.getInt(); |
|
787 valueSink->add(val); |
|
788 } |
|
789 band_rp = vs.rp; |
|
790 } else { |
|
791 // Pop coding, {F} sequence, final run of values... |
|
792 assert((mode & DISABLE_POP) != 0); |
|
793 assert(N == POP_FAVORED_N); |
|
794 int min = INT_MIN_VALUE; // farthest from the center |
|
795 // min2 is based on the buggy specification of centrality in version 150.7 |
|
796 // no known implementations transmit this value, but just in case... |
|
797 //int min2 = INT_MIN_VALUE; |
|
798 int last = 0; |
|
799 // if there were initial runs, find the potential sentinels in them: |
|
800 for (int i = 0; i < valueSink->length(); i++) { |
|
801 last = valueSink->get(i); |
|
802 min = moreCentral(min, last); |
|
803 //min2 = moreCentral2(min2, last, min); |
|
804 } |
|
805 value_stream vs = vs0; |
|
806 for (;;) { |
|
807 int val = vs.getInt(); |
|
808 if (valueSink->length() > 0 && |
|
809 (val == last || val == min)) //|| val == min2 |
|
810 break; |
|
811 valueSink->add(val); |
|
812 CHECK; |
|
813 last = val; |
|
814 min = moreCentral(min, last); |
|
815 //min2 = moreCentral2(min2, last, min); |
|
816 } |
|
817 band_rp = vs.rp; |
|
818 } |
|
819 CHECK; |
|
820 |
|
821 // Get an accurate upper limit now. |
|
822 vs0.rplimit = band_rp; |
|
823 vs0.cm = this; |
|
824 |
|
825 return; // success |
|
826 } |
|
827 |
|
828 coding basic_codings[] = { |
|
829 // This one is not a usable irregular coding, but is used by cp_Utf8_chars. |
|
830 CODING_INIT(3,128,0,0), |
|
831 |
|
832 // Fixed-length codings: |
|
833 CODING_INIT(1,256,0,0), |
|
834 CODING_INIT(1,256,1,0), |
|
835 CODING_INIT(1,256,0,1), |
|
836 CODING_INIT(1,256,1,1), |
|
837 CODING_INIT(2,256,0,0), |
|
838 CODING_INIT(2,256,1,0), |
|
839 CODING_INIT(2,256,0,1), |
|
840 CODING_INIT(2,256,1,1), |
|
841 CODING_INIT(3,256,0,0), |
|
842 CODING_INIT(3,256,1,0), |
|
843 CODING_INIT(3,256,0,1), |
|
844 CODING_INIT(3,256,1,1), |
|
845 CODING_INIT(4,256,0,0), |
|
846 CODING_INIT(4,256,1,0), |
|
847 CODING_INIT(4,256,0,1), |
|
848 CODING_INIT(4,256,1,1), |
|
849 |
|
850 // Full-range variable-length codings: |
|
851 CODING_INIT(5, 4,0,0), |
|
852 CODING_INIT(5, 4,1,0), |
|
853 CODING_INIT(5, 4,2,0), |
|
854 CODING_INIT(5, 16,0,0), |
|
855 CODING_INIT(5, 16,1,0), |
|
856 CODING_INIT(5, 16,2,0), |
|
857 CODING_INIT(5, 32,0,0), |
|
858 CODING_INIT(5, 32,1,0), |
|
859 CODING_INIT(5, 32,2,0), |
|
860 CODING_INIT(5, 64,0,0), |
|
861 CODING_INIT(5, 64,1,0), |
|
862 CODING_INIT(5, 64,2,0), |
|
863 CODING_INIT(5,128,0,0), |
|
864 CODING_INIT(5,128,1,0), |
|
865 CODING_INIT(5,128,2,0), |
|
866 |
|
867 CODING_INIT(5, 4,0,1), |
|
868 CODING_INIT(5, 4,1,1), |
|
869 CODING_INIT(5, 4,2,1), |
|
870 CODING_INIT(5, 16,0,1), |
|
871 CODING_INIT(5, 16,1,1), |
|
872 CODING_INIT(5, 16,2,1), |
|
873 CODING_INIT(5, 32,0,1), |
|
874 CODING_INIT(5, 32,1,1), |
|
875 CODING_INIT(5, 32,2,1), |
|
876 CODING_INIT(5, 64,0,1), |
|
877 CODING_INIT(5, 64,1,1), |
|
878 CODING_INIT(5, 64,2,1), |
|
879 CODING_INIT(5,128,0,1), |
|
880 CODING_INIT(5,128,1,1), |
|
881 CODING_INIT(5,128,2,1), |
|
882 |
|
883 // Variable length subrange codings: |
|
884 CODING_INIT(2,192,0,0), |
|
885 CODING_INIT(2,224,0,0), |
|
886 CODING_INIT(2,240,0,0), |
|
887 CODING_INIT(2,248,0,0), |
|
888 CODING_INIT(2,252,0,0), |
|
889 |
|
890 CODING_INIT(2, 8,0,1), |
|
891 CODING_INIT(2, 8,1,1), |
|
892 CODING_INIT(2, 16,0,1), |
|
893 CODING_INIT(2, 16,1,1), |
|
894 CODING_INIT(2, 32,0,1), |
|
895 CODING_INIT(2, 32,1,1), |
|
896 CODING_INIT(2, 64,0,1), |
|
897 CODING_INIT(2, 64,1,1), |
|
898 CODING_INIT(2,128,0,1), |
|
899 CODING_INIT(2,128,1,1), |
|
900 CODING_INIT(2,192,0,1), |
|
901 CODING_INIT(2,192,1,1), |
|
902 CODING_INIT(2,224,0,1), |
|
903 CODING_INIT(2,224,1,1), |
|
904 CODING_INIT(2,240,0,1), |
|
905 CODING_INIT(2,240,1,1), |
|
906 CODING_INIT(2,248,0,1), |
|
907 CODING_INIT(2,248,1,1), |
|
908 |
|
909 CODING_INIT(3,192,0,0), |
|
910 CODING_INIT(3,224,0,0), |
|
911 CODING_INIT(3,240,0,0), |
|
912 CODING_INIT(3,248,0,0), |
|
913 CODING_INIT(3,252,0,0), |
|
914 |
|
915 CODING_INIT(3, 8,0,1), |
|
916 CODING_INIT(3, 8,1,1), |
|
917 CODING_INIT(3, 16,0,1), |
|
918 CODING_INIT(3, 16,1,1), |
|
919 CODING_INIT(3, 32,0,1), |
|
920 CODING_INIT(3, 32,1,1), |
|
921 CODING_INIT(3, 64,0,1), |
|
922 CODING_INIT(3, 64,1,1), |
|
923 CODING_INIT(3,128,0,1), |
|
924 CODING_INIT(3,128,1,1), |
|
925 CODING_INIT(3,192,0,1), |
|
926 CODING_INIT(3,192,1,1), |
|
927 CODING_INIT(3,224,0,1), |
|
928 CODING_INIT(3,224,1,1), |
|
929 CODING_INIT(3,240,0,1), |
|
930 CODING_INIT(3,240,1,1), |
|
931 CODING_INIT(3,248,0,1), |
|
932 CODING_INIT(3,248,1,1), |
|
933 |
|
934 CODING_INIT(4,192,0,0), |
|
935 CODING_INIT(4,224,0,0), |
|
936 CODING_INIT(4,240,0,0), |
|
937 CODING_INIT(4,248,0,0), |
|
938 CODING_INIT(4,252,0,0), |
|
939 |
|
940 CODING_INIT(4, 8,0,1), |
|
941 CODING_INIT(4, 8,1,1), |
|
942 CODING_INIT(4, 16,0,1), |
|
943 CODING_INIT(4, 16,1,1), |
|
944 CODING_INIT(4, 32,0,1), |
|
945 CODING_INIT(4, 32,1,1), |
|
946 CODING_INIT(4, 64,0,1), |
|
947 CODING_INIT(4, 64,1,1), |
|
948 CODING_INIT(4,128,0,1), |
|
949 CODING_INIT(4,128,1,1), |
|
950 CODING_INIT(4,192,0,1), |
|
951 CODING_INIT(4,192,1,1), |
|
952 CODING_INIT(4,224,0,1), |
|
953 CODING_INIT(4,224,1,1), |
|
954 CODING_INIT(4,240,0,1), |
|
955 CODING_INIT(4,240,1,1), |
|
956 CODING_INIT(4,248,0,1), |
|
957 CODING_INIT(4,248,1,1), |
|
958 CODING_INIT(0,0,0,0) |
|
959 }; |
|
960 #define BASIC_INDEX_LIMIT \ |
|
961 (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1) |
|
962 |
|
963 coding* coding::findByIndex(int idx) { |
|
964 #ifndef PRODUCT |
|
965 /* Tricky assert here, constants and gcc complains about it without local. */ |
|
966 int index_limit = BASIC_INDEX_LIMIT; |
|
967 assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit); |
|
968 #endif |
|
969 if (idx >= _meta_canon_min && idx <= _meta_canon_max) |
|
970 return basic_codings[idx].init(); |
|
971 else |
|
972 return null; |
|
973 } |
|
974 |
|
975 #ifndef PRODUCT |
|
976 const char* coding::string() { |
|
977 CODING_PRIVATE(spec); |
|
978 bytes buf; |
|
979 buf.malloc(100); |
|
980 char maxS[20], minS[20]; |
|
981 sprintf(maxS, "%d", max); |
|
982 sprintf(minS, "%d", min); |
|
983 if (max == INT_MAX_VALUE) strcpy(maxS, "max"); |
|
984 if (min == INT_MIN_VALUE) strcpy(minS, "min"); |
|
985 sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]", |
|
986 B,H,S,D,L,minS,maxS); |
|
987 return (const char*) buf.ptr; |
|
988 } |
|
989 #endif |
|