jdk/src/jdk.runtime/share/native/common-unpack/coding.cpp
changeset 29672 3449f2420896
parent 29671 2f05f44dfe01
parent 29670 218d8ee4cf12
child 29673 a4e2dbd7c777
equal deleted inserted replaced
29671:2f05f44dfe01 29672:3449f2420896
     1 /*
       
     2  * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 // -*- C++ -*-
       
    27 // Small program for unpacking specially compressed Java packages.
       
    28 // John R. Rose
       
    29 
       
    30 #include <stdio.h>
       
    31 #include <string.h>
       
    32 #include <stdlib.h>
       
    33 #include <stdarg.h>
       
    34 
       
    35 #include "jni_util.h"
       
    36 
       
    37 #include "defines.h"
       
    38 #include "bytes.h"
       
    39 #include "utils.h"
       
    40 #include "coding.h"
       
    41 
       
    42 #include "constants.h"
       
    43 #include "unpack.h"
       
    44 
       
    45 extern coding basic_codings[];
       
    46 
       
    47 #define CODING_PRIVATE(spec) \
       
    48   int spec_ = spec; \
       
    49   int B = CODING_B(spec_); \
       
    50   int H = CODING_H(spec_); \
       
    51   int L = 256 - H; \
       
    52   int S = CODING_S(spec_); \
       
    53   int D = CODING_D(spec_)
       
    54 
       
    55 #define IS_NEG_CODE(S, codeVal) \
       
    56   ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
       
    57 
       
    58 #define DECODE_SIGN_S1(ux) \
       
    59   ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
       
    60 
       
    61 static maybe_inline
       
    62 int decode_sign(int S, uint ux) {  // == Coding.decodeSign32
       
    63   assert(S > 0);
       
    64   uint sigbits = (ux >> S);
       
    65   if (IS_NEG_CODE(S, ux))
       
    66     return (int)(    ~sigbits);
       
    67   else
       
    68     return (int)(ux - sigbits);
       
    69   // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
       
    70 }
       
    71 
       
    72 coding* coding::init() {
       
    73   if (umax > 0)  return this;  // already done
       
    74   assert(spec != 0);  // sanity
       
    75 
       
    76   // fill in derived fields
       
    77   CODING_PRIVATE(spec);
       
    78 
       
    79   // Return null if 'arb(BHSD)' parameter constraints are not met:
       
    80   if (B < 1 || B > B_MAX)  return null;
       
    81   if (H < 1 || H > 256)    return null;
       
    82   if (S < 0 || S > 2)      return null;
       
    83   if (D < 0 || D > 1)      return null;
       
    84   if (B == 1 && H != 256)  return null;  // 1-byte coding must be fixed-size
       
    85   if (B >= 5 && H == 256)  return null;  // no 5-byte fixed-size coding
       
    86 
       
    87   // first compute the range of the coding, in 64 bits
       
    88   jlong range = 0;
       
    89   {
       
    90     jlong H_i = 1;
       
    91     for (int i = 0; i < B; i++) {
       
    92       range += H_i;
       
    93       H_i *= H;
       
    94     }
       
    95     range *= L;
       
    96     range += H_i;
       
    97   }
       
    98   assert(range > 0);  // no useless codings, please
       
    99 
       
   100   int this_umax;
       
   101 
       
   102   // now, compute min and max
       
   103   if (range >= ((jlong)1 << 32)) {
       
   104     this_umax  = INT_MAX_VALUE;
       
   105     this->umin = INT_MIN_VALUE;
       
   106     this->max  = INT_MAX_VALUE;
       
   107     this->min  = INT_MIN_VALUE;
       
   108   } else {
       
   109     this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
       
   110     this->max = this_umax;
       
   111     this->min = this->umin = 0;
       
   112     if (S != 0 && range != 0) {
       
   113       int Smask = (1<<S)-1;
       
   114       jlong maxPosCode = range-1;
       
   115       jlong maxNegCode = range-1;
       
   116       while (IS_NEG_CODE(S,  maxPosCode))  --maxPosCode;
       
   117       while (!IS_NEG_CODE(S, maxNegCode))  --maxNegCode;
       
   118       int maxPos = decode_sign(S, (uint)maxPosCode);
       
   119       if (maxPos < 0)
       
   120         this->max = INT_MAX_VALUE;  // 32-bit wraparound
       
   121       else
       
   122         this->max = maxPos;
       
   123       if (maxNegCode < 0)
       
   124         this->min = 0;  // No negative codings at all.
       
   125       else
       
   126         this->min = decode_sign(S, (uint)maxNegCode);
       
   127     }
       
   128   }
       
   129 
       
   130   assert(!(isFullRange | isSigned | isSubrange)); // init
       
   131   if (min < 0)
       
   132     this->isSigned = true;
       
   133   if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
       
   134     this->isSubrange = true;
       
   135   if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
       
   136     this->isFullRange = true;
       
   137 
       
   138   // do this last, to reduce MT exposure (should have a membar too)
       
   139   this->umax = this_umax;
       
   140 
       
   141   return this;
       
   142 }
       
   143 
       
   144 coding* coding::findBySpec(int spec) {
       
   145   for (coding* scan = &basic_codings[0]; ; scan++) {
       
   146     if (scan->spec == spec)
       
   147       return scan->init();
       
   148     if (scan->spec == 0)
       
   149       break;
       
   150   }
       
   151   coding* ptr = NEW(coding, 1);
       
   152   CHECK_NULL_RETURN(ptr, 0);
       
   153   coding* c = ptr->initFrom(spec);
       
   154   if (c == null) {
       
   155     mtrace('f', ptr, 0);
       
   156     ::free(ptr);
       
   157   } else
       
   158     // else caller should free it...
       
   159     c->isMalloc = true;
       
   160   return c;
       
   161 }
       
   162 
       
   163 coding* coding::findBySpec(int B, int H, int S, int D) {
       
   164   if (B < 1 || B > B_MAX)  return null;
       
   165   if (H < 1 || H > 256)    return null;
       
   166   if (S < 0 || S > 2)      return null;
       
   167   if (D < 0 || D > 1)      return null;
       
   168   return findBySpec(CODING_SPEC(B, H, S, D));
       
   169 }
       
   170 
       
   171 void coding::free() {
       
   172   if (isMalloc) {
       
   173     mtrace('f', this, 0);
       
   174     ::free(this);
       
   175   }
       
   176 }
       
   177 
       
   178 void coding_method::reset(value_stream* state) {
       
   179   assert(state->rp == state->rplimit);  // not in mid-stream, please
       
   180   //assert(this == vs0.cm);
       
   181   state[0] = vs0;
       
   182   if (uValues != null) {
       
   183     uValues->reset(state->helper());
       
   184   }
       
   185 }
       
   186 
       
   187 maybe_inline
       
   188 uint coding::parse(byte* &rp, int B, int H) {
       
   189   int L = 256-H;
       
   190   byte* ptr = rp;
       
   191   // hand peel the i==0 part of the loop:
       
   192   uint b_i = *ptr++ & 0xFF;
       
   193   if (B == 1 || b_i < (uint)L)
       
   194     { rp = ptr; return b_i; }
       
   195   uint sum = b_i;
       
   196   uint H_i = H;
       
   197   assert(B <= B_MAX);
       
   198   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
       
   199     b_i = *ptr++ & 0xFF;
       
   200     sum += b_i * H_i;
       
   201     if (i == B || b_i < (uint)L)
       
   202       { rp = ptr; return sum; }
       
   203     H_i *= H;
       
   204   }
       
   205   assert(false);
       
   206   return 0;
       
   207 }
       
   208 
       
   209 maybe_inline
       
   210 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
       
   211   assert(H == (1<<lgH));
       
   212   int L = 256-(1<<lgH);
       
   213   byte* ptr = rp;
       
   214   // hand peel the i==0 part of the loop:
       
   215   uint b_i = *ptr++ & 0xFF;
       
   216   if (B == 1 || b_i < (uint)L)
       
   217     { rp = ptr; return b_i; }
       
   218   uint sum = b_i;
       
   219   uint lg_H_i = lgH;
       
   220   assert(B <= B_MAX);
       
   221   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
       
   222     b_i = *ptr++ & 0xFF;
       
   223     sum += b_i << lg_H_i;
       
   224     if (i == B || b_i < (uint)L)
       
   225       { rp = ptr; return sum; }
       
   226     lg_H_i += lgH;
       
   227   }
       
   228   assert(false);
       
   229   return 0;
       
   230 }
       
   231 
       
   232 static const char ERB[] = "EOF reading band";
       
   233 
       
   234 maybe_inline
       
   235 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
       
   236   if (N < 0) {
       
   237     abort("bad value count");
       
   238     return;
       
   239   }
       
   240   byte* ptr = rp;
       
   241   if (B == 1 || H == 256) {
       
   242     size_t len = (size_t)N*B;
       
   243     if (len / B != (size_t)N || ptr+len > limit) {
       
   244       abort(ERB);
       
   245       return;
       
   246     }
       
   247     rp = ptr+len;
       
   248     return;
       
   249   }
       
   250   // Note:  We assume rp has enough zero-padding.
       
   251   int L = 256-H;
       
   252   int n = B;
       
   253   while (N > 0) {
       
   254     ptr += 1;
       
   255     if (--n == 0) {
       
   256       // end of encoding at B bytes, regardless of byte value
       
   257     } else {
       
   258       int b = (ptr[-1] & 0xFF);
       
   259       if (b >= L) {
       
   260         // keep going, unless we find a byte < L
       
   261         continue;
       
   262       }
       
   263     }
       
   264     // found the last byte
       
   265     N -= 1;
       
   266     n = B;   // reset length counter
       
   267     // do an error check here
       
   268     if (ptr > limit) {
       
   269       abort(ERB);
       
   270       return;
       
   271     }
       
   272   }
       
   273   rp = ptr;
       
   274   return;
       
   275 }
       
   276 
       
   277 bool value_stream::hasHelper() {
       
   278   // If my coding method is a pop-style method,
       
   279   // then I need a second value stream to transmit
       
   280   // unfavored values.
       
   281   // This can be determined by examining fValues.
       
   282   return cm->fValues != null;
       
   283 }
       
   284 
       
   285 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
       
   286   rp = rp_;
       
   287   rplimit = rplimit_;
       
   288   sum = 0;
       
   289   cm = null;  // no need in the simple case
       
   290   setCoding(defc);
       
   291 }
       
   292 
       
   293 void value_stream::setCoding(coding* defc) {
       
   294   if (defc == null) {
       
   295     unpack_abort("bad coding");
       
   296     defc = coding::findByIndex(_meta_canon_min);  // random pick for recovery
       
   297   }
       
   298 
       
   299   c = (*defc);
       
   300 
       
   301   // choose cmk
       
   302   cmk = cmk_ERROR;
       
   303   switch (c.spec) {
       
   304   case BYTE1_spec:      cmk = cmk_BYTE1;        break;
       
   305   case CHAR3_spec:      cmk = cmk_CHAR3;        break;
       
   306   case UNSIGNED5_spec:  cmk = cmk_UNSIGNED5;    break;
       
   307   case DELTA5_spec:     cmk = cmk_DELTA5;       break;
       
   308   case BCI5_spec:       cmk = cmk_BCI5;         break;
       
   309   case BRANCH5_spec:    cmk = cmk_BRANCH5;      break;
       
   310   default:
       
   311     if (c.D() == 0) {
       
   312       switch (c.S()) {
       
   313       case 0:  cmk = cmk_BHS0;  break;
       
   314       case 1:  cmk = cmk_BHS1;  break;
       
   315       default: cmk = cmk_BHS;   break;
       
   316       }
       
   317     } else {
       
   318       if (c.S() == 1) {
       
   319         if (c.isFullRange)   cmk = cmk_BHS1D1full;
       
   320         if (c.isSubrange)    cmk = cmk_BHS1D1sub;
       
   321       }
       
   322       if (cmk == cmk_ERROR)  cmk = cmk_BHSD1;
       
   323     }
       
   324   }
       
   325 }
       
   326 
       
   327 static maybe_inline
       
   328 int getPopValue(value_stream* self, uint uval) {
       
   329   if (uval > 0) {
       
   330     // note that the initial parse performed a range check
       
   331     assert(uval <= (uint)self->cm->fVlength);
       
   332     return self->cm->fValues[uval-1];
       
   333   } else {
       
   334     // take an unfavored value
       
   335     return self->helper()->getInt();
       
   336   }
       
   337 }
       
   338 
       
   339 maybe_inline
       
   340 int coding::sumInUnsignedRange(int x, int y) {
       
   341   assert(isSubrange);
       
   342   int range = (int)(umax+1);
       
   343   assert(range > 0);
       
   344   x += y;
       
   345   if (x != (int)((jlong)(x-y) + (jlong)y)) {
       
   346     // 32-bit overflow interferes with range reduction.
       
   347     // Back off from the overflow by adding a multiple of range:
       
   348     if (x < 0) {
       
   349       x -= range;
       
   350       assert(x >= 0);
       
   351     } else {
       
   352       x += range;
       
   353       assert(x < 0);
       
   354     }
       
   355   }
       
   356   if (x < 0) {
       
   357     x += range;
       
   358     if (x >= 0)  return x;
       
   359   } else if (x >= range) {
       
   360     x -= range;
       
   361     if (x < range)  return x;
       
   362   } else {
       
   363     // in range
       
   364     return x;
       
   365   }
       
   366   // do it the hard way
       
   367   x %= range;
       
   368   if (x < 0)  x += range;
       
   369   return x;
       
   370 }
       
   371 
       
   372 static maybe_inline
       
   373 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
       
   374   assert((uint)(self->c.isSubrange) == (uint)isSubrange);
       
   375   assert(self->c.isSubrange | self->c.isFullRange);
       
   376   if (isSubrange)
       
   377     return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
       
   378   else
       
   379     return self->sum += (int) uval;
       
   380 }
       
   381 
       
   382 bool value_stream::hasValue() {
       
   383   if (rp < rplimit)      return true;
       
   384   if (cm == null)        return false;
       
   385   if (cm->next == null)  return false;
       
   386   cm->next->reset(this);
       
   387   return hasValue();
       
   388 }
       
   389 
       
   390 int value_stream::getInt() {
       
   391   if (rp >= rplimit) {
       
   392     // Advance to next coding segment.
       
   393     if (rp > rplimit || cm == null || cm->next == null) {
       
   394       // Must perform this check and throw an exception on bad input.
       
   395       unpack_abort(ERB);
       
   396       return 0;
       
   397     }
       
   398     cm->next->reset(this);
       
   399     return getInt();
       
   400   }
       
   401 
       
   402   CODING_PRIVATE(c.spec);
       
   403   uint uval;
       
   404   enum {
       
   405     B5 = 5,
       
   406     B3 = 3,
       
   407     H128 = 128,
       
   408     H64 = 64,
       
   409     H4 = 4
       
   410   };
       
   411   switch (cmk) {
       
   412   case cmk_BHS:
       
   413     assert(D == 0);
       
   414     uval = coding::parse(rp, B, H);
       
   415     if (S == 0)
       
   416       return (int) uval;
       
   417     return decode_sign(S, uval);
       
   418 
       
   419   case cmk_BHS0:
       
   420     assert(S == 0 && D == 0);
       
   421     uval = coding::parse(rp, B, H);
       
   422     return (int) uval;
       
   423 
       
   424   case cmk_BHS1:
       
   425     assert(S == 1 && D == 0);
       
   426     uval = coding::parse(rp, B, H);
       
   427     return DECODE_SIGN_S1(uval);
       
   428 
       
   429   case cmk_BYTE1:
       
   430     assert(c.spec == BYTE1_spec);
       
   431     assert(B == 1 && H == 256 && S == 0 && D == 0);
       
   432     return *rp++ & 0xFF;
       
   433 
       
   434   case cmk_CHAR3:
       
   435     assert(c.spec == CHAR3_spec);
       
   436     assert(B == B3 && H == H128 && S == 0 && D == 0);
       
   437     return coding::parse_lgH(rp, B3, H128, 7);
       
   438 
       
   439   case cmk_UNSIGNED5:
       
   440     assert(c.spec == UNSIGNED5_spec);
       
   441     assert(B == B5 && H == H64 && S == 0 && D == 0);
       
   442     return coding::parse_lgH(rp, B5, H64, 6);
       
   443 
       
   444   case cmk_BHSD1:
       
   445     assert(D == 1);
       
   446     uval = coding::parse(rp, B, H);
       
   447     if (S != 0)
       
   448       uval = (uint) decode_sign(S, uval);
       
   449     return getDeltaValue(this, uval, (bool)c.isSubrange);
       
   450 
       
   451   case cmk_BHS1D1full:
       
   452     assert(S == 1 && D == 1 && c.isFullRange);
       
   453     uval = coding::parse(rp, B, H);
       
   454     uval = (uint) DECODE_SIGN_S1(uval);
       
   455     return getDeltaValue(this, uval, false);
       
   456 
       
   457   case cmk_BHS1D1sub:
       
   458     assert(S == 1 && D == 1 && c.isSubrange);
       
   459     uval = coding::parse(rp, B, H);
       
   460     uval = (uint) DECODE_SIGN_S1(uval);
       
   461     return getDeltaValue(this, uval, true);
       
   462 
       
   463   case cmk_DELTA5:
       
   464     assert(c.spec == DELTA5_spec);
       
   465     assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
       
   466     uval = coding::parse_lgH(rp, B5, H64, 6);
       
   467     sum += DECODE_SIGN_S1(uval);
       
   468     return sum;
       
   469 
       
   470   case cmk_BCI5:
       
   471     assert(c.spec == BCI5_spec);
       
   472     assert(B == B5 && H == H4 && S == 0 && D == 0);
       
   473     return coding::parse_lgH(rp, B5, H4, 2);
       
   474 
       
   475   case cmk_BRANCH5:
       
   476     assert(c.spec == BRANCH5_spec);
       
   477     assert(B == B5 && H == H4 && S == 2 && D == 0);
       
   478     uval = coding::parse_lgH(rp, B5, H4, 2);
       
   479     return decode_sign(S, uval);
       
   480 
       
   481   case cmk_pop:
       
   482     uval = coding::parse(rp, B, H);
       
   483     if (S != 0) {
       
   484       uval = (uint) decode_sign(S, uval);
       
   485     }
       
   486     if (D != 0) {
       
   487       assert(c.isSubrange | c.isFullRange);
       
   488       if (c.isSubrange)
       
   489         sum = c.sumInUnsignedRange(sum, (int) uval);
       
   490       else
       
   491         sum += (int) uval;
       
   492       uval = (uint) sum;
       
   493     }
       
   494     return getPopValue(this, uval);
       
   495 
       
   496   case cmk_pop_BHS0:
       
   497     assert(S == 0 && D == 0);
       
   498     uval = coding::parse(rp, B, H);
       
   499     return getPopValue(this, uval);
       
   500 
       
   501   case cmk_pop_BYTE1:
       
   502     assert(c.spec == BYTE1_spec);
       
   503     assert(B == 1 && H == 256 && S == 0 && D == 0);
       
   504     return getPopValue(this, *rp++ & 0xFF);
       
   505 
       
   506   default:
       
   507     break;
       
   508   }
       
   509   assert(false);
       
   510   return 0;
       
   511 }
       
   512 
       
   513 static maybe_inline
       
   514 int moreCentral(int x, int y) {  // used to find end of Pop.{F}
       
   515   // Suggested implementation from the Pack200 specification:
       
   516   uint kx = (x >> 31) ^ (x << 1);
       
   517   uint ky = (y >> 31) ^ (y << 1);
       
   518   return (kx < ky? x: y);
       
   519 }
       
   520 //static maybe_inline
       
   521 //int moreCentral2(int x, int y, int min) {
       
   522 //  // Strict implementation of buggy 150.7 specification.
       
   523 //  // The bug is that the spec. says absolute-value ties are broken
       
   524 //  // in favor of positive numbers, but the suggested implementation
       
   525 //  // (also mentioned in the spec.) breaks ties in favor of negative numbers.
       
   526 //  if ((x + y) != 0)
       
   527 //    return min;
       
   528 //  else
       
   529 //    // return the other value, which breaks a tie in the positive direction
       
   530 //    return (x > y)? x: y;
       
   531 //}
       
   532 
       
   533 static const byte* no_meta[] = {null};
       
   534 #define NO_META (*(byte**)no_meta)
       
   535 enum { POP_FAVORED_N = -2 };
       
   536 
       
   537 // mode bits
       
   538 #define DISABLE_RUN  1  // used immediately inside ACodee
       
   539 #define DISABLE_POP  2  // used recursively in all pop sub-bands
       
   540 
       
   541 // This function knows all about meta-coding.
       
   542 void coding_method::init(byte* &band_rp, byte* band_limit,
       
   543                          byte* &meta_rp, int mode,
       
   544                          coding* defc, int N,
       
   545                          intlist* valueSink) {
       
   546   assert(N != 0);
       
   547 
       
   548   assert(u != null);  // must be pre-initialized
       
   549   //if (u == null)  u = unpacker::current();  // expensive
       
   550 
       
   551   int op = (meta_rp == null) ? _meta_default :  (*meta_rp++ & 0xFF);
       
   552   coding* foundc = null;
       
   553   coding* to_free = null;
       
   554 
       
   555   if (op == _meta_default) {
       
   556     foundc = defc;
       
   557     // and fall through
       
   558 
       
   559   } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
       
   560     foundc = coding::findByIndex(op);
       
   561     // and fall through
       
   562 
       
   563   } else if (op == _meta_arb) {
       
   564     int args = (*meta_rp++ & 0xFF);
       
   565     // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
       
   566     int D = ((args >> 0) & 1);
       
   567     int S = ((args >> 1) & 3);
       
   568     int B = ((args >> 3) & -1) + 1;
       
   569     // & (H[1..256]-1)
       
   570     int H = (*meta_rp++ & 0xFF) + 1;
       
   571     foundc = coding::findBySpec(B, H, S, D);
       
   572     to_free = foundc;  // findBySpec may dynamically allocate
       
   573     if (foundc == null) {
       
   574       abort("illegal arb. coding");
       
   575       return;
       
   576     }
       
   577     // and fall through
       
   578 
       
   579   } else if (op >= _meta_run && op < _meta_pop) {
       
   580     int args = (op - _meta_run);
       
   581     // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
       
   582     int KX     = ((args >> 0) & 3);
       
   583     int KBFlag = ((args >> 2) & 1);
       
   584     int ABDef  = ((args >> 3) & -1);
       
   585     assert(ABDef <= 2);
       
   586     // & KB: one of [0..255] if KBFlag=1
       
   587     int KB     = (!KBFlag? 3: (*meta_rp++ & 0xFF));
       
   588     int K      = (KB+1) << (KX * 4);
       
   589     int N2 = (N >= 0) ? N-K : N;
       
   590     if (N == 0 || (N2 <= 0 && N2 != N)) {
       
   591       abort("illegal run encoding");
       
   592       return;
       
   593     }
       
   594     if ((mode & DISABLE_RUN) != 0) {
       
   595       abort("illegal nested run encoding");
       
   596       return;
       
   597     }
       
   598 
       
   599     // & Enc{ ACode } if ADef=0  (ABDef != 1)
       
   600     // No direct nesting of 'run' in ACode, but in BCode it's OK.
       
   601     int disRun = mode | DISABLE_RUN;
       
   602     if (ABDef == 1) {
       
   603       this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
       
   604     } else {
       
   605       this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
       
   606     }
       
   607     CHECK;
       
   608 
       
   609     // & Enc{ BCode } if BDef=0  (ABDef != 2)
       
   610     coding_method* tail = U_NEW(coding_method, 1);
       
   611     CHECK_NULL(tail);
       
   612     tail->u = u;
       
   613 
       
   614     // The 'run' codings may be nested indirectly via 'pop' codings.
       
   615     // This means that this->next may already be filled in, if
       
   616     // ACode was of type 'pop' with a 'run' token coding.
       
   617     // No problem:  Just chain the upcoming BCode onto the end.
       
   618     for (coding_method* self = this; ; self = self->next) {
       
   619       if (self->next == null) {
       
   620         self->next = tail;
       
   621         break;
       
   622       }
       
   623     }
       
   624 
       
   625     if (ABDef == 2) {
       
   626       tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
       
   627     } else {
       
   628       tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
       
   629     }
       
   630     // Note:  The preceding calls to init should be tail-recursive.
       
   631 
       
   632     return;  // done; no falling through
       
   633 
       
   634   } else if (op >= _meta_pop && op < _meta_limit) {
       
   635     int args = (op - _meta_pop);
       
   636     // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
       
   637     int FDef  = ((args >> 0) & 1);
       
   638     int UDef  = ((args >> 1) & 1);
       
   639     int TDefL = ((args >> 2) & -1);
       
   640     assert(TDefL <= 11);
       
   641     int TDef  = (TDefL > 0);
       
   642     int TL    = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
       
   643     int TH    = (256-TL);
       
   644     if (N <= 0) {
       
   645       abort("illegal pop encoding");
       
   646       return;
       
   647     }
       
   648     if ((mode & DISABLE_POP) != 0) {
       
   649       abort("illegal nested pop encoding");
       
   650       return;
       
   651     }
       
   652 
       
   653     // No indirect nesting of 'pop', but 'run' is OK.
       
   654     int disPop = DISABLE_POP;
       
   655 
       
   656     // & Enc{ FCode } if FDef=0
       
   657     int FN = POP_FAVORED_N;
       
   658     assert(valueSink == null);
       
   659     intlist fValueSink; fValueSink.init();
       
   660     coding_method fval;
       
   661     BYTES_OF(fval).clear(); fval.u = u;
       
   662     if (FDef != 0) {
       
   663       fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
       
   664     } else {
       
   665       fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
       
   666     }
       
   667     bytes fvbuf;
       
   668     fValues  = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
       
   669     fVlength = fValueSink.length();  // i.e., the parameter K
       
   670     fValueSink.free();
       
   671     CHECK;
       
   672 
       
   673     // Skip the first {F} run in all subsequent passes.
       
   674     // The next call to this->init(...) will set vs0.rp to point after the {F}.
       
   675 
       
   676     // & Enc{ TCode } if TDef=0  (TDefL==0)
       
   677     if (TDef != 0) {
       
   678       coding* tcode = coding::findBySpec(1, 256);  // BYTE1
       
   679       // find the most narrowly sufficient code:
       
   680       for (int B = 2; B <= B_MAX; B++) {
       
   681         if (fVlength <= tcode->umax)  break;  // found it
       
   682         tcode->free();
       
   683         tcode = coding::findBySpec(B, TH);
       
   684         CHECK_NULL(tcode);
       
   685       }
       
   686       if (!(fVlength <= tcode->umax)) {
       
   687         abort("pop.L value too small");
       
   688         return;
       
   689       }
       
   690       this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
       
   691       tcode->free();
       
   692     } else {
       
   693       this->init(band_rp, band_limit, meta_rp, disPop,  defc, N, null);
       
   694     }
       
   695     CHECK;
       
   696 
       
   697     // Count the number of zero tokens right now.
       
   698     // Also verify that they are in bounds.
       
   699     int UN = 0;   // one {U} for each zero in {T}
       
   700     value_stream vs = vs0;
       
   701     for (int i = 0; i < N; i++) {
       
   702       uint val = vs.getInt();
       
   703       if (val == 0)  UN += 1;
       
   704       if (!(val <= (uint)fVlength)) {
       
   705         abort("pop token out of range");
       
   706         return;
       
   707       }
       
   708     }
       
   709     vs.done();
       
   710 
       
   711     // & Enc{ UCode } if UDef=0
       
   712     if (UN != 0) {
       
   713       uValues = U_NEW(coding_method, 1);
       
   714       CHECK_NULL(uValues);
       
   715       uValues->u = u;
       
   716       if (UDef != 0) {
       
   717         uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
       
   718       } else {
       
   719         uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
       
   720       }
       
   721     } else {
       
   722       if (UDef == 0) {
       
   723         int uop = (*meta_rp++ & 0xFF);
       
   724         if (uop > _meta_canon_max)
       
   725           // %%% Spec. requires the more strict (uop != _meta_default).
       
   726           abort("bad meta-coding for empty pop/U");
       
   727       }
       
   728     }
       
   729 
       
   730     // Bug fix for 6259542
       
   731     // Last of all, adjust vs0.cmk to the 'pop' flavor
       
   732     for (coding_method* self = this; self != null; self = self->next) {
       
   733         coding_method_kind cmk2 = cmk_pop;
       
   734         switch (self->vs0.cmk) {
       
   735         case cmk_BHS0:   cmk2 = cmk_pop_BHS0;   break;
       
   736         case cmk_BYTE1:  cmk2 = cmk_pop_BYTE1;  break;
       
   737         default: break;
       
   738         }
       
   739         self->vs0.cmk = cmk2;
       
   740         if (self != this) {
       
   741           assert(self->fValues == null); // no double init
       
   742           self->fValues  = this->fValues;
       
   743           self->fVlength = this->fVlength;
       
   744           assert(self->uValues == null); // must stay null
       
   745         }
       
   746     }
       
   747 
       
   748     return;  // done; no falling through
       
   749 
       
   750   } else {
       
   751     abort("bad meta-coding");
       
   752     return;
       
   753   }
       
   754 
       
   755   // Common code here skips a series of values with one coding.
       
   756   assert(foundc != null);
       
   757 
       
   758   assert(vs0.cmk == cmk_ERROR);  // no garbage, please
       
   759   assert(vs0.rp == null);  // no garbage, please
       
   760   assert(vs0.rplimit == null);  // no garbage, please
       
   761   assert(vs0.sum == 0);  // no garbage, please
       
   762 
       
   763   vs0.init(band_rp, band_limit, foundc);
       
   764 
       
   765   // Done with foundc.  Free if necessary.
       
   766   if (to_free != null) {
       
   767     to_free->free();
       
   768     to_free = null;
       
   769   }
       
   770   foundc = null;
       
   771 
       
   772   coding& c = vs0.c;
       
   773   CODING_PRIVATE(c.spec);
       
   774   // assert sane N
       
   775   assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
       
   776 
       
   777   // Look at the values, or at least skip over them quickly.
       
   778   if (valueSink == null) {
       
   779     // Skip and ignore values in the first pass.
       
   780     c.parseMultiple(band_rp, N, band_limit, B, H);
       
   781   } else if (N >= 0) {
       
   782     // Pop coding, {F} sequence, initial run of values...
       
   783     assert((mode & DISABLE_POP) != 0);
       
   784     value_stream vs = vs0;
       
   785     for (int n = 0; n < N; n++) {
       
   786       int val = vs.getInt();
       
   787       valueSink->add(val);
       
   788     }
       
   789     band_rp = vs.rp;
       
   790   } else {
       
   791     // Pop coding, {F} sequence, final run of values...
       
   792     assert((mode & DISABLE_POP) != 0);
       
   793     assert(N == POP_FAVORED_N);
       
   794     int min = INT_MIN_VALUE;  // farthest from the center
       
   795     // min2 is based on the buggy specification of centrality in version 150.7
       
   796     // no known implementations transmit this value, but just in case...
       
   797     //int min2 = INT_MIN_VALUE;
       
   798     int last = 0;
       
   799     // if there were initial runs, find the potential sentinels in them:
       
   800     for (int i = 0; i < valueSink->length(); i++) {
       
   801       last = valueSink->get(i);
       
   802       min = moreCentral(min, last);
       
   803       //min2 = moreCentral2(min2, last, min);
       
   804     }
       
   805     value_stream vs = vs0;
       
   806     for (;;) {
       
   807       int val = vs.getInt();
       
   808       if (valueSink->length() > 0 &&
       
   809           (val == last || val == min)) //|| val == min2
       
   810         break;
       
   811       valueSink->add(val);
       
   812       CHECK;
       
   813       last = val;
       
   814       min = moreCentral(min, last);
       
   815       //min2 = moreCentral2(min2, last, min);
       
   816     }
       
   817     band_rp = vs.rp;
       
   818   }
       
   819   CHECK;
       
   820 
       
   821   // Get an accurate upper limit now.
       
   822   vs0.rplimit = band_rp;
       
   823   vs0.cm = this;
       
   824 
       
   825   return; // success
       
   826 }
       
   827 
       
   828 coding basic_codings[] = {
       
   829   // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
       
   830   CODING_INIT(3,128,0,0),
       
   831 
       
   832   // Fixed-length codings:
       
   833   CODING_INIT(1,256,0,0),
       
   834   CODING_INIT(1,256,1,0),
       
   835   CODING_INIT(1,256,0,1),
       
   836   CODING_INIT(1,256,1,1),
       
   837   CODING_INIT(2,256,0,0),
       
   838   CODING_INIT(2,256,1,0),
       
   839   CODING_INIT(2,256,0,1),
       
   840   CODING_INIT(2,256,1,1),
       
   841   CODING_INIT(3,256,0,0),
       
   842   CODING_INIT(3,256,1,0),
       
   843   CODING_INIT(3,256,0,1),
       
   844   CODING_INIT(3,256,1,1),
       
   845   CODING_INIT(4,256,0,0),
       
   846   CODING_INIT(4,256,1,0),
       
   847   CODING_INIT(4,256,0,1),
       
   848   CODING_INIT(4,256,1,1),
       
   849 
       
   850   // Full-range variable-length codings:
       
   851   CODING_INIT(5,  4,0,0),
       
   852   CODING_INIT(5,  4,1,0),
       
   853   CODING_INIT(5,  4,2,0),
       
   854   CODING_INIT(5, 16,0,0),
       
   855   CODING_INIT(5, 16,1,0),
       
   856   CODING_INIT(5, 16,2,0),
       
   857   CODING_INIT(5, 32,0,0),
       
   858   CODING_INIT(5, 32,1,0),
       
   859   CODING_INIT(5, 32,2,0),
       
   860   CODING_INIT(5, 64,0,0),
       
   861   CODING_INIT(5, 64,1,0),
       
   862   CODING_INIT(5, 64,2,0),
       
   863   CODING_INIT(5,128,0,0),
       
   864   CODING_INIT(5,128,1,0),
       
   865   CODING_INIT(5,128,2,0),
       
   866 
       
   867   CODING_INIT(5,  4,0,1),
       
   868   CODING_INIT(5,  4,1,1),
       
   869   CODING_INIT(5,  4,2,1),
       
   870   CODING_INIT(5, 16,0,1),
       
   871   CODING_INIT(5, 16,1,1),
       
   872   CODING_INIT(5, 16,2,1),
       
   873   CODING_INIT(5, 32,0,1),
       
   874   CODING_INIT(5, 32,1,1),
       
   875   CODING_INIT(5, 32,2,1),
       
   876   CODING_INIT(5, 64,0,1),
       
   877   CODING_INIT(5, 64,1,1),
       
   878   CODING_INIT(5, 64,2,1),
       
   879   CODING_INIT(5,128,0,1),
       
   880   CODING_INIT(5,128,1,1),
       
   881   CODING_INIT(5,128,2,1),
       
   882 
       
   883   // Variable length subrange codings:
       
   884   CODING_INIT(2,192,0,0),
       
   885   CODING_INIT(2,224,0,0),
       
   886   CODING_INIT(2,240,0,0),
       
   887   CODING_INIT(2,248,0,0),
       
   888   CODING_INIT(2,252,0,0),
       
   889 
       
   890   CODING_INIT(2,  8,0,1),
       
   891   CODING_INIT(2,  8,1,1),
       
   892   CODING_INIT(2, 16,0,1),
       
   893   CODING_INIT(2, 16,1,1),
       
   894   CODING_INIT(2, 32,0,1),
       
   895   CODING_INIT(2, 32,1,1),
       
   896   CODING_INIT(2, 64,0,1),
       
   897   CODING_INIT(2, 64,1,1),
       
   898   CODING_INIT(2,128,0,1),
       
   899   CODING_INIT(2,128,1,1),
       
   900   CODING_INIT(2,192,0,1),
       
   901   CODING_INIT(2,192,1,1),
       
   902   CODING_INIT(2,224,0,1),
       
   903   CODING_INIT(2,224,1,1),
       
   904   CODING_INIT(2,240,0,1),
       
   905   CODING_INIT(2,240,1,1),
       
   906   CODING_INIT(2,248,0,1),
       
   907   CODING_INIT(2,248,1,1),
       
   908 
       
   909   CODING_INIT(3,192,0,0),
       
   910   CODING_INIT(3,224,0,0),
       
   911   CODING_INIT(3,240,0,0),
       
   912   CODING_INIT(3,248,0,0),
       
   913   CODING_INIT(3,252,0,0),
       
   914 
       
   915   CODING_INIT(3,  8,0,1),
       
   916   CODING_INIT(3,  8,1,1),
       
   917   CODING_INIT(3, 16,0,1),
       
   918   CODING_INIT(3, 16,1,1),
       
   919   CODING_INIT(3, 32,0,1),
       
   920   CODING_INIT(3, 32,1,1),
       
   921   CODING_INIT(3, 64,0,1),
       
   922   CODING_INIT(3, 64,1,1),
       
   923   CODING_INIT(3,128,0,1),
       
   924   CODING_INIT(3,128,1,1),
       
   925   CODING_INIT(3,192,0,1),
       
   926   CODING_INIT(3,192,1,1),
       
   927   CODING_INIT(3,224,0,1),
       
   928   CODING_INIT(3,224,1,1),
       
   929   CODING_INIT(3,240,0,1),
       
   930   CODING_INIT(3,240,1,1),
       
   931   CODING_INIT(3,248,0,1),
       
   932   CODING_INIT(3,248,1,1),
       
   933 
       
   934   CODING_INIT(4,192,0,0),
       
   935   CODING_INIT(4,224,0,0),
       
   936   CODING_INIT(4,240,0,0),
       
   937   CODING_INIT(4,248,0,0),
       
   938   CODING_INIT(4,252,0,0),
       
   939 
       
   940   CODING_INIT(4,  8,0,1),
       
   941   CODING_INIT(4,  8,1,1),
       
   942   CODING_INIT(4, 16,0,1),
       
   943   CODING_INIT(4, 16,1,1),
       
   944   CODING_INIT(4, 32,0,1),
       
   945   CODING_INIT(4, 32,1,1),
       
   946   CODING_INIT(4, 64,0,1),
       
   947   CODING_INIT(4, 64,1,1),
       
   948   CODING_INIT(4,128,0,1),
       
   949   CODING_INIT(4,128,1,1),
       
   950   CODING_INIT(4,192,0,1),
       
   951   CODING_INIT(4,192,1,1),
       
   952   CODING_INIT(4,224,0,1),
       
   953   CODING_INIT(4,224,1,1),
       
   954   CODING_INIT(4,240,0,1),
       
   955   CODING_INIT(4,240,1,1),
       
   956   CODING_INIT(4,248,0,1),
       
   957   CODING_INIT(4,248,1,1),
       
   958   CODING_INIT(0,0,0,0)
       
   959 };
       
   960 #define BASIC_INDEX_LIMIT \
       
   961         (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)
       
   962 
       
   963 coding* coding::findByIndex(int idx) {
       
   964 #ifndef PRODUCT
       
   965   /* Tricky assert here, constants and gcc complains about it without local. */
       
   966   int index_limit = BASIC_INDEX_LIMIT;
       
   967   assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
       
   968 #endif
       
   969   if (idx >= _meta_canon_min && idx <= _meta_canon_max)
       
   970     return basic_codings[idx].init();
       
   971   else
       
   972     return null;
       
   973 }
       
   974 
       
   975 #ifndef PRODUCT
       
   976 const char* coding::string() {
       
   977   CODING_PRIVATE(spec);
       
   978   bytes buf;
       
   979   buf.malloc(100);
       
   980   char maxS[20], minS[20];
       
   981   sprintf(maxS, "%d", max);
       
   982   sprintf(minS, "%d", min);
       
   983   if (max == INT_MAX_VALUE)  strcpy(maxS, "max");
       
   984   if (min == INT_MIN_VALUE)  strcpy(minS, "min");
       
   985   sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
       
   986           B,H,S,D,L,minS,maxS);
       
   987   return (const char*) buf.ptr;
       
   988 }
       
   989 #endif