src/java.net.http/share/classes/jdk/internal/net/http/hpack/QuickHuffman.java
changeset 49944 4690a2871b44
child 50681 4254bed3c09d
child 56623 1d020b5d73f1
equal deleted inserted replaced
49943:8e1ed2a15845 49944:4690a2871b44
       
     1 /*
       
     2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package jdk.internal.net.http.hpack;
       
    27 
       
    28 import java.io.IOException;
       
    29 import java.nio.ByteBuffer;
       
    30 import java.util.List;
       
    31 import java.util.Objects;
       
    32 
       
    33 public final class QuickHuffman {
       
    34 
       
    35     /*
       
    36      * Huffman codes for encoding.
       
    37      *
       
    38      * EOS will never be matched, since there is no symbol for it, thus no need
       
    39      * to store it in the array. Thus, the length of the array is 256, not 257.
       
    40      * Code information for each character is encoded as follows:
       
    41      *
       
    42      *     MSB                             LSB
       
    43      *     +----------------+----------------+
       
    44      *     |~code           |         length~|
       
    45      *     +----------------+----------------+
       
    46      *     |<----- 32 ----->|<----- 32 ----->|
       
    47      *     |<------------- 64 -------------->|
       
    48      *
       
    49      * The leftmost 32 bits hold the code value. This value is aligned left
       
    50      * (or MSB). The rightmost 32 bits hold the length of the code value.
       
    51      * This length is aligned right (or LSB).
       
    52      */
       
    53     private static final long[] codes = new long[256];
       
    54 
       
    55     private static long codeValueOf(char c) {
       
    56         return codes[c] & 0xffffffff00000000L;
       
    57     }
       
    58 
       
    59     private static long codeLengthOf(char c) {
       
    60         return codes[c] & 0x00000000ffffffffL;
       
    61     }
       
    62 
       
    63     private static final int EOS_LENGTH = 30;
       
    64     private static final int EOS_LSB = 0x3fffffff;
       
    65     private static final long EOS_MSB = EOS_LSB << (64 - EOS_LENGTH);
       
    66 
       
    67     /*
       
    68      * Huffman codes for decoding.
       
    69      *
       
    70      * Root node contains 257 descendant nodes, including EOS.
       
    71      */
       
    72     private static final Node root;
       
    73 
       
    74     static {
       
    75         TemporaryNode tmpRoot = new TemporaryNode();
       
    76         addChar(tmpRoot,   0, 0x1ff8,     13);
       
    77         addChar(tmpRoot,   1, 0x7fffd8,   23);
       
    78         addChar(tmpRoot,   2, 0xfffffe2,  28);
       
    79         addChar(tmpRoot,   3, 0xfffffe3,  28);
       
    80         addChar(tmpRoot,   4, 0xfffffe4,  28);
       
    81         addChar(tmpRoot,   5, 0xfffffe5,  28);
       
    82         addChar(tmpRoot,   6, 0xfffffe6,  28);
       
    83         addChar(tmpRoot,   7, 0xfffffe7,  28);
       
    84         addChar(tmpRoot,   8, 0xfffffe8,  28);
       
    85         addChar(tmpRoot,   9, 0xffffea,   24);
       
    86         addChar(tmpRoot,  10, 0x3ffffffc, 30);
       
    87         addChar(tmpRoot,  11, 0xfffffe9,  28);
       
    88         addChar(tmpRoot,  12, 0xfffffea,  28);
       
    89         addChar(tmpRoot,  13, 0x3ffffffd, 30);
       
    90         addChar(tmpRoot,  14, 0xfffffeb,  28);
       
    91         addChar(tmpRoot,  15, 0xfffffec,  28);
       
    92         addChar(tmpRoot,  16, 0xfffffed,  28);
       
    93         addChar(tmpRoot,  17, 0xfffffee,  28);
       
    94         addChar(tmpRoot,  18, 0xfffffef,  28);
       
    95         addChar(tmpRoot,  19, 0xffffff0,  28);
       
    96         addChar(tmpRoot,  20, 0xffffff1,  28);
       
    97         addChar(tmpRoot,  21, 0xffffff2,  28);
       
    98         addChar(tmpRoot,  22, 0x3ffffffe, 30);
       
    99         addChar(tmpRoot,  23, 0xffffff3,  28);
       
   100         addChar(tmpRoot,  24, 0xffffff4,  28);
       
   101         addChar(tmpRoot,  25, 0xffffff5,  28);
       
   102         addChar(tmpRoot,  26, 0xffffff6,  28);
       
   103         addChar(tmpRoot,  27, 0xffffff7,  28);
       
   104         addChar(tmpRoot,  28, 0xffffff8,  28);
       
   105         addChar(tmpRoot,  29, 0xffffff9,  28);
       
   106         addChar(tmpRoot,  30, 0xffffffa,  28);
       
   107         addChar(tmpRoot,  31, 0xffffffb,  28);
       
   108         addChar(tmpRoot,  32, 0x14,        6);
       
   109         addChar(tmpRoot,  33, 0x3f8,      10);
       
   110         addChar(tmpRoot,  34, 0x3f9,      10);
       
   111         addChar(tmpRoot,  35, 0xffa,      12);
       
   112         addChar(tmpRoot,  36, 0x1ff9,     13);
       
   113         addChar(tmpRoot,  37, 0x15,        6);
       
   114         addChar(tmpRoot,  38, 0xf8,        8);
       
   115         addChar(tmpRoot,  39, 0x7fa,      11);
       
   116         addChar(tmpRoot,  40, 0x3fa,      10);
       
   117         addChar(tmpRoot,  41, 0x3fb,      10);
       
   118         addChar(tmpRoot,  42, 0xf9,        8);
       
   119         addChar(tmpRoot,  43, 0x7fb,      11);
       
   120         addChar(tmpRoot,  44, 0xfa,        8);
       
   121         addChar(tmpRoot,  45, 0x16,        6);
       
   122         addChar(tmpRoot,  46, 0x17,        6);
       
   123         addChar(tmpRoot,  47, 0x18,        6);
       
   124         addChar(tmpRoot,  48, 0x0,         5);
       
   125         addChar(tmpRoot,  49, 0x1,         5);
       
   126         addChar(tmpRoot,  50, 0x2,         5);
       
   127         addChar(tmpRoot,  51, 0x19,        6);
       
   128         addChar(tmpRoot,  52, 0x1a,        6);
       
   129         addChar(tmpRoot,  53, 0x1b,        6);
       
   130         addChar(tmpRoot,  54, 0x1c,        6);
       
   131         addChar(tmpRoot,  55, 0x1d,        6);
       
   132         addChar(tmpRoot,  56, 0x1e,        6);
       
   133         addChar(tmpRoot,  57, 0x1f,        6);
       
   134         addChar(tmpRoot,  58, 0x5c,        7);
       
   135         addChar(tmpRoot,  59, 0xfb,        8);
       
   136         addChar(tmpRoot,  60, 0x7ffc,     15);
       
   137         addChar(tmpRoot,  61, 0x20,        6);
       
   138         addChar(tmpRoot,  62, 0xffb,      12);
       
   139         addChar(tmpRoot,  63, 0x3fc,      10);
       
   140         addChar(tmpRoot,  64, 0x1ffa,     13);
       
   141         addChar(tmpRoot,  65, 0x21,        6);
       
   142         addChar(tmpRoot,  66, 0x5d,        7);
       
   143         addChar(tmpRoot,  67, 0x5e,        7);
       
   144         addChar(tmpRoot,  68, 0x5f,        7);
       
   145         addChar(tmpRoot,  69, 0x60,        7);
       
   146         addChar(tmpRoot,  70, 0x61,        7);
       
   147         addChar(tmpRoot,  71, 0x62,        7);
       
   148         addChar(tmpRoot,  72, 0x63,        7);
       
   149         addChar(tmpRoot,  73, 0x64,        7);
       
   150         addChar(tmpRoot,  74, 0x65,        7);
       
   151         addChar(tmpRoot,  75, 0x66,        7);
       
   152         addChar(tmpRoot,  76, 0x67,        7);
       
   153         addChar(tmpRoot,  77, 0x68,        7);
       
   154         addChar(tmpRoot,  78, 0x69,        7);
       
   155         addChar(tmpRoot,  79, 0x6a,        7);
       
   156         addChar(tmpRoot,  80, 0x6b,        7);
       
   157         addChar(tmpRoot,  81, 0x6c,        7);
       
   158         addChar(tmpRoot,  82, 0x6d,        7);
       
   159         addChar(tmpRoot,  83, 0x6e,        7);
       
   160         addChar(tmpRoot,  84, 0x6f,        7);
       
   161         addChar(tmpRoot,  85, 0x70,        7);
       
   162         addChar(tmpRoot,  86, 0x71,        7);
       
   163         addChar(tmpRoot,  87, 0x72,        7);
       
   164         addChar(tmpRoot,  88, 0xfc,        8);
       
   165         addChar(tmpRoot,  89, 0x73,        7);
       
   166         addChar(tmpRoot,  90, 0xfd,        8);
       
   167         addChar(tmpRoot,  91, 0x1ffb,     13);
       
   168         addChar(tmpRoot,  92, 0x7fff0,    19);
       
   169         addChar(tmpRoot,  93, 0x1ffc,     13);
       
   170         addChar(tmpRoot,  94, 0x3ffc,     14);
       
   171         addChar(tmpRoot,  95, 0x22,        6);
       
   172         addChar(tmpRoot,  96, 0x7ffd,     15);
       
   173         addChar(tmpRoot,  97, 0x3,         5);
       
   174         addChar(tmpRoot,  98, 0x23,        6);
       
   175         addChar(tmpRoot,  99, 0x4,         5);
       
   176         addChar(tmpRoot, 100, 0x24,        6);
       
   177         addChar(tmpRoot, 101, 0x5,         5);
       
   178         addChar(tmpRoot, 102, 0x25,        6);
       
   179         addChar(tmpRoot, 103, 0x26,        6);
       
   180         addChar(tmpRoot, 104, 0x27,        6);
       
   181         addChar(tmpRoot, 105, 0x6,         5);
       
   182         addChar(tmpRoot, 106, 0x74,        7);
       
   183         addChar(tmpRoot, 107, 0x75,        7);
       
   184         addChar(tmpRoot, 108, 0x28,        6);
       
   185         addChar(tmpRoot, 109, 0x29,        6);
       
   186         addChar(tmpRoot, 110, 0x2a,        6);
       
   187         addChar(tmpRoot, 111, 0x7,         5);
       
   188         addChar(tmpRoot, 112, 0x2b,        6);
       
   189         addChar(tmpRoot, 113, 0x76,        7);
       
   190         addChar(tmpRoot, 114, 0x2c,        6);
       
   191         addChar(tmpRoot, 115, 0x8,         5);
       
   192         addChar(tmpRoot, 116, 0x9,         5);
       
   193         addChar(tmpRoot, 117, 0x2d,        6);
       
   194         addChar(tmpRoot, 118, 0x77,        7);
       
   195         addChar(tmpRoot, 119, 0x78,        7);
       
   196         addChar(tmpRoot, 120, 0x79,        7);
       
   197         addChar(tmpRoot, 121, 0x7a,        7);
       
   198         addChar(tmpRoot, 122, 0x7b,        7);
       
   199         addChar(tmpRoot, 123, 0x7ffe,     15);
       
   200         addChar(tmpRoot, 124, 0x7fc,      11);
       
   201         addChar(tmpRoot, 125, 0x3ffd,     14);
       
   202         addChar(tmpRoot, 126, 0x1ffd,     13);
       
   203         addChar(tmpRoot, 127, 0xffffffc,  28);
       
   204         addChar(tmpRoot, 128, 0xfffe6,    20);
       
   205         addChar(tmpRoot, 129, 0x3fffd2,   22);
       
   206         addChar(tmpRoot, 130, 0xfffe7,    20);
       
   207         addChar(tmpRoot, 131, 0xfffe8,    20);
       
   208         addChar(tmpRoot, 132, 0x3fffd3,   22);
       
   209         addChar(tmpRoot, 133, 0x3fffd4,   22);
       
   210         addChar(tmpRoot, 134, 0x3fffd5,   22);
       
   211         addChar(tmpRoot, 135, 0x7fffd9,   23);
       
   212         addChar(tmpRoot, 136, 0x3fffd6,   22);
       
   213         addChar(tmpRoot, 137, 0x7fffda,   23);
       
   214         addChar(tmpRoot, 138, 0x7fffdb,   23);
       
   215         addChar(tmpRoot, 139, 0x7fffdc,   23);
       
   216         addChar(tmpRoot, 140, 0x7fffdd,   23);
       
   217         addChar(tmpRoot, 141, 0x7fffde,   23);
       
   218         addChar(tmpRoot, 142, 0xffffeb,   24);
       
   219         addChar(tmpRoot, 143, 0x7fffdf,   23);
       
   220         addChar(tmpRoot, 144, 0xffffec,   24);
       
   221         addChar(tmpRoot, 145, 0xffffed,   24);
       
   222         addChar(tmpRoot, 146, 0x3fffd7,   22);
       
   223         addChar(tmpRoot, 147, 0x7fffe0,   23);
       
   224         addChar(tmpRoot, 148, 0xffffee,   24);
       
   225         addChar(tmpRoot, 149, 0x7fffe1,   23);
       
   226         addChar(tmpRoot, 150, 0x7fffe2,   23);
       
   227         addChar(tmpRoot, 151, 0x7fffe3,   23);
       
   228         addChar(tmpRoot, 152, 0x7fffe4,   23);
       
   229         addChar(tmpRoot, 153, 0x1fffdc,   21);
       
   230         addChar(tmpRoot, 154, 0x3fffd8,   22);
       
   231         addChar(tmpRoot, 155, 0x7fffe5,   23);
       
   232         addChar(tmpRoot, 156, 0x3fffd9,   22);
       
   233         addChar(tmpRoot, 157, 0x7fffe6,   23);
       
   234         addChar(tmpRoot, 158, 0x7fffe7,   23);
       
   235         addChar(tmpRoot, 159, 0xffffef,   24);
       
   236         addChar(tmpRoot, 160, 0x3fffda,   22);
       
   237         addChar(tmpRoot, 161, 0x1fffdd,   21);
       
   238         addChar(tmpRoot, 162, 0xfffe9,    20);
       
   239         addChar(tmpRoot, 163, 0x3fffdb,   22);
       
   240         addChar(tmpRoot, 164, 0x3fffdc,   22);
       
   241         addChar(tmpRoot, 165, 0x7fffe8,   23);
       
   242         addChar(tmpRoot, 166, 0x7fffe9,   23);
       
   243         addChar(tmpRoot, 167, 0x1fffde,   21);
       
   244         addChar(tmpRoot, 168, 0x7fffea,   23);
       
   245         addChar(tmpRoot, 169, 0x3fffdd,   22);
       
   246         addChar(tmpRoot, 170, 0x3fffde,   22);
       
   247         addChar(tmpRoot, 171, 0xfffff0,   24);
       
   248         addChar(tmpRoot, 172, 0x1fffdf,   21);
       
   249         addChar(tmpRoot, 173, 0x3fffdf,   22);
       
   250         addChar(tmpRoot, 174, 0x7fffeb,   23);
       
   251         addChar(tmpRoot, 175, 0x7fffec,   23);
       
   252         addChar(tmpRoot, 176, 0x1fffe0,   21);
       
   253         addChar(tmpRoot, 177, 0x1fffe1,   21);
       
   254         addChar(tmpRoot, 178, 0x3fffe0,   22);
       
   255         addChar(tmpRoot, 179, 0x1fffe2,   21);
       
   256         addChar(tmpRoot, 180, 0x7fffed,   23);
       
   257         addChar(tmpRoot, 181, 0x3fffe1,   22);
       
   258         addChar(tmpRoot, 182, 0x7fffee,   23);
       
   259         addChar(tmpRoot, 183, 0x7fffef,   23);
       
   260         addChar(tmpRoot, 184, 0xfffea,    20);
       
   261         addChar(tmpRoot, 185, 0x3fffe2,   22);
       
   262         addChar(tmpRoot, 186, 0x3fffe3,   22);
       
   263         addChar(tmpRoot, 187, 0x3fffe4,   22);
       
   264         addChar(tmpRoot, 188, 0x7ffff0,   23);
       
   265         addChar(tmpRoot, 189, 0x3fffe5,   22);
       
   266         addChar(tmpRoot, 190, 0x3fffe6,   22);
       
   267         addChar(tmpRoot, 191, 0x7ffff1,   23);
       
   268         addChar(tmpRoot, 192, 0x3ffffe0,  26);
       
   269         addChar(tmpRoot, 193, 0x3ffffe1,  26);
       
   270         addChar(tmpRoot, 194, 0xfffeb,    20);
       
   271         addChar(tmpRoot, 195, 0x7fff1,    19);
       
   272         addChar(tmpRoot, 196, 0x3fffe7,   22);
       
   273         addChar(tmpRoot, 197, 0x7ffff2,   23);
       
   274         addChar(tmpRoot, 198, 0x3fffe8,   22);
       
   275         addChar(tmpRoot, 199, 0x1ffffec,  25);
       
   276         addChar(tmpRoot, 200, 0x3ffffe2,  26);
       
   277         addChar(tmpRoot, 201, 0x3ffffe3,  26);
       
   278         addChar(tmpRoot, 202, 0x3ffffe4,  26);
       
   279         addChar(tmpRoot, 203, 0x7ffffde,  27);
       
   280         addChar(tmpRoot, 204, 0x7ffffdf,  27);
       
   281         addChar(tmpRoot, 205, 0x3ffffe5,  26);
       
   282         addChar(tmpRoot, 206, 0xfffff1,   24);
       
   283         addChar(tmpRoot, 207, 0x1ffffed,  25);
       
   284         addChar(tmpRoot, 208, 0x7fff2,    19);
       
   285         addChar(tmpRoot, 209, 0x1fffe3,   21);
       
   286         addChar(tmpRoot, 210, 0x3ffffe6,  26);
       
   287         addChar(tmpRoot, 211, 0x7ffffe0,  27);
       
   288         addChar(tmpRoot, 212, 0x7ffffe1,  27);
       
   289         addChar(tmpRoot, 213, 0x3ffffe7,  26);
       
   290         addChar(tmpRoot, 214, 0x7ffffe2,  27);
       
   291         addChar(tmpRoot, 215, 0xfffff2,   24);
       
   292         addChar(tmpRoot, 216, 0x1fffe4,   21);
       
   293         addChar(tmpRoot, 217, 0x1fffe5,   21);
       
   294         addChar(tmpRoot, 218, 0x3ffffe8,  26);
       
   295         addChar(tmpRoot, 219, 0x3ffffe9,  26);
       
   296         addChar(tmpRoot, 220, 0xffffffd,  28);
       
   297         addChar(tmpRoot, 221, 0x7ffffe3,  27);
       
   298         addChar(tmpRoot, 222, 0x7ffffe4,  27);
       
   299         addChar(tmpRoot, 223, 0x7ffffe5,  27);
       
   300         addChar(tmpRoot, 224, 0xfffec,    20);
       
   301         addChar(tmpRoot, 225, 0xfffff3,   24);
       
   302         addChar(tmpRoot, 226, 0xfffed,    20);
       
   303         addChar(tmpRoot, 227, 0x1fffe6,   21);
       
   304         addChar(tmpRoot, 228, 0x3fffe9,   22);
       
   305         addChar(tmpRoot, 229, 0x1fffe7,   21);
       
   306         addChar(tmpRoot, 230, 0x1fffe8,   21);
       
   307         addChar(tmpRoot, 231, 0x7ffff3,   23);
       
   308         addChar(tmpRoot, 232, 0x3fffea,   22);
       
   309         addChar(tmpRoot, 233, 0x3fffeb,   22);
       
   310         addChar(tmpRoot, 234, 0x1ffffee,  25);
       
   311         addChar(tmpRoot, 235, 0x1ffffef,  25);
       
   312         addChar(tmpRoot, 236, 0xfffff4,   24);
       
   313         addChar(tmpRoot, 237, 0xfffff5,   24);
       
   314         addChar(tmpRoot, 238, 0x3ffffea,  26);
       
   315         addChar(tmpRoot, 239, 0x7ffff4,   23);
       
   316         addChar(tmpRoot, 240, 0x3ffffeb,  26);
       
   317         addChar(tmpRoot, 241, 0x7ffffe6,  27);
       
   318         addChar(tmpRoot, 242, 0x3ffffec,  26);
       
   319         addChar(tmpRoot, 243, 0x3ffffed,  26);
       
   320         addChar(tmpRoot, 244, 0x7ffffe7,  27);
       
   321         addChar(tmpRoot, 245, 0x7ffffe8,  27);
       
   322         addChar(tmpRoot, 246, 0x7ffffe9,  27);
       
   323         addChar(tmpRoot, 247, 0x7ffffea,  27);
       
   324         addChar(tmpRoot, 248, 0x7ffffeb,  27);
       
   325         addChar(tmpRoot, 249, 0xffffffe,  28);
       
   326         addChar(tmpRoot, 250, 0x7ffffec,  27);
       
   327         addChar(tmpRoot, 251, 0x7ffffed,  27);
       
   328         addChar(tmpRoot, 252, 0x7ffffee,  27);
       
   329         addChar(tmpRoot, 253, 0x7ffffef,  27);
       
   330         addChar(tmpRoot, 254, 0x7fffff0,  27);
       
   331         addChar(tmpRoot, 255, 0x3ffffee,  26);
       
   332         addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH);
       
   333 
       
   334         // The difference in performance can always be checked by not using
       
   335         // the immutable trie:
       
   336         //     root = tmpRoot;
       
   337         root = ImmutableNode.copyOf(tmpRoot);
       
   338     }
       
   339 
       
   340     private QuickHuffman() { }
       
   341 
       
   342     private static void addChar(Node root, int symbol, int code, int bitLength)
       
   343     {
       
   344         addLeaf(root, (char) symbol, code, bitLength, false);
       
   345         long value = ((long) code) << (64 - bitLength); // re-align MSB <- LSB
       
   346         codes[symbol] = value | bitLength;
       
   347     }
       
   348 
       
   349     private static void addEOS(Node root, int symbol, int code, int bitLength)
       
   350     {
       
   351         addLeaf(root, (char) symbol, code, bitLength, true);
       
   352     }
       
   353 
       
   354     private static void addLeaf(Node root,
       
   355                                 char symbol,
       
   356                                 int code,
       
   357                                 int bitLength,
       
   358                                 boolean isEOS)
       
   359     {
       
   360         assert 0 < bitLength && bitLength <= 32 : bitLength;
       
   361         Node curr = root;
       
   362         int nBytes = bytesForBits(bitLength);
       
   363         // The number of bits the code needs to be shifted to the left in order
       
   364         // to align with the byte #nBytes boundary:
       
   365         int align = (nBytes << 3) - bitLength;
       
   366         code <<= align;
       
   367         // descend the trie until the last element
       
   368         int l = 0;
       
   369         for (int i = 0, probe = 0xff << ((nBytes - 1) << 3);
       
   370              i < nBytes - 1;
       
   371              i++, probe >>>= 8)
       
   372         {
       
   373             curr.setEOSPath(curr.isEOSPath() | isEOS);
       
   374             int idx = (code & probe) >>> ((nBytes - 1 - i) << 3);
       
   375             curr = curr.getOrCreateChild(idx);
       
   376             curr.setLength(8);
       
   377             l += 8;
       
   378         }
       
   379         // Assign the same char to all byte variants. For example, if the code
       
   380         // and its length are 00011b and 5 respectively (letter 'a') then, in
       
   381         // order to be able to match any byte starting with 00011b prefix,
       
   382         // the following nodes need to be created:
       
   383         //
       
   384         //     00011000b, 00011001b, 00011010b, 00011011b, 00011100b, 00011101b,
       
   385         //     00011110b and 00011111b
       
   386         int idx = code & 0xff;
       
   387         curr.setEOSPath(curr.isEOSPath() | isEOS);
       
   388         for (int i = 0; i < (1 << align); i++) {
       
   389             Node child = curr.getOrCreateChild(idx | i);
       
   390             child.setSymbol(symbol);
       
   391             child.setEOSPath(child.isEOSPath() | isEOS);
       
   392             child.setLength(bitLength - l);
       
   393         }
       
   394     }
       
   395 
       
   396     /*
       
   397      * A node in the Huffman trie.
       
   398      */
       
   399     interface Node {
       
   400 
       
   401         boolean isEOSPath();
       
   402 
       
   403         void setEOSPath(boolean value);
       
   404 
       
   405         boolean isLeaf();
       
   406 
       
   407         Node getChild(int index);
       
   408 
       
   409         Node getOrCreateChild(int index);
       
   410 
       
   411         Node[] getChildren();
       
   412 
       
   413         char getSymbol();
       
   414 
       
   415         void setSymbol(char symbol);
       
   416 
       
   417         int getLength();
       
   418 
       
   419         void setLength(int value);
       
   420     }
       
   421 
       
   422     /*
       
   423      * Mutable nodes used for initial construction of the Huffman trie.
       
   424      * (These nodes are perfectly ok to be used after that.)
       
   425      */
       
   426     static final class TemporaryNode implements Node {
       
   427 
       
   428         private char symbol;
       
   429         private boolean eosPath;
       
   430         private TemporaryNode[] children;
       
   431         private int length;
       
   432 
       
   433         @Override
       
   434         public TemporaryNode getOrCreateChild(int index) {
       
   435             ensureChildrenExist();
       
   436             if (children[index] == null) {
       
   437                 children[index] = new TemporaryNode();
       
   438             }
       
   439             return children[index];
       
   440         }
       
   441 
       
   442         private void ensureChildrenExist() {
       
   443             if (children == null) {
       
   444                 children = new TemporaryNode[256];
       
   445             }
       
   446         }
       
   447 
       
   448         @Override
       
   449         public boolean isLeaf() {
       
   450             return children == null;
       
   451         }
       
   452 
       
   453         @Override
       
   454         public boolean isEOSPath() {
       
   455             return eosPath;
       
   456         }
       
   457 
       
   458         @Override
       
   459         public void setEOSPath(boolean value) {
       
   460             eosPath = value;
       
   461         }
       
   462 
       
   463         @Override
       
   464         public TemporaryNode getChild(int index) {
       
   465             ensureChildrenExist();
       
   466             return children[index];
       
   467         }
       
   468 
       
   469         @Override
       
   470         public Node[] getChildren() {
       
   471             if (children == null) {
       
   472                 return new Node[0];
       
   473             }
       
   474             return children;
       
   475         }
       
   476 
       
   477         @Override
       
   478         public char getSymbol() {
       
   479             return symbol;
       
   480         }
       
   481 
       
   482         @Override
       
   483         public int getLength() {
       
   484             return length;
       
   485         }
       
   486 
       
   487         @Override
       
   488         public void setSymbol(char value) {
       
   489             this.symbol = value;
       
   490         }
       
   491 
       
   492         @Override
       
   493         public void setLength(int value) {
       
   494             this.length = value;
       
   495         }
       
   496     }
       
   497 
       
   498     /*
       
   499      * Immutable node used to construct traversal-only Huffman trie.
       
   500      *
       
   501      * Once the trie has been built, the support of modifications is no longer
       
   502      * required. An immutable trie should be used. Not only it will help to
       
   503      * catch possible bugs, but hopefully speedup the traversal operations.
       
   504      */
       
   505     static final class ImmutableNode implements Node {
       
   506 
       
   507         private final char symbol;
       
   508         private final boolean eosPath;
       
   509         private final int length;
       
   510         private final List<ImmutableNode> children;
       
   511 
       
   512         public static ImmutableNode copyOf(Node node) {
       
   513             if (node.isLeaf()) {
       
   514                 return new ImmutableNode(node.getSymbol(), node.isEOSPath(),
       
   515                                          node.getLength());
       
   516             }
       
   517             Node[] children = node.getChildren();
       
   518             ImmutableNode[] immutableChildren = new ImmutableNode[children.length];
       
   519             for (int i = 0; i < children.length; i++) {
       
   520                 immutableChildren[i] = copyOf(children[i]);
       
   521             }
       
   522             return new ImmutableNode(node.isEOSPath(), node.getLength(),
       
   523                                      immutableChildren);
       
   524         }
       
   525 
       
   526         /* Creates a leaf node */
       
   527         private ImmutableNode(char symbol,
       
   528                               boolean eosPath,
       
   529                               int length) {
       
   530             this.symbol = symbol;
       
   531             this.eosPath = eosPath;
       
   532             this.length = length;
       
   533             this.children = List.of();
       
   534         }
       
   535 
       
   536         /* Creates a node with children */
       
   537         private ImmutableNode(boolean eosPath,
       
   538                               int length,
       
   539                               ImmutableNode[] children)
       
   540         {
       
   541             this.symbol = 0;
       
   542             this.eosPath = eosPath;
       
   543             this.length = length;
       
   544             if (children.length == 0) {
       
   545                 throw new IllegalArgumentException();
       
   546             }
       
   547             // A list produced by List.of should not be slower than array for
       
   548             // accessing elements by index, and hopefully use additional
       
   549             // optimizations (e.g. jdk.internal.vm.annotation.Stable)
       
   550             this.children = List.of(children);
       
   551         }
       
   552 
       
   553         @Override
       
   554         public boolean isLeaf() {
       
   555             return children.isEmpty();
       
   556         }
       
   557 
       
   558         @Override
       
   559         public boolean isEOSPath() {
       
   560             return eosPath;
       
   561         }
       
   562 
       
   563         @Override
       
   564         public void setEOSPath(boolean value) {
       
   565             throw new UnsupportedOperationException();
       
   566         }
       
   567 
       
   568         @Override
       
   569         public ImmutableNode getChild(int index) {
       
   570             return children.get(index);
       
   571         }
       
   572 
       
   573         @Override
       
   574         public ImmutableNode getOrCreateChild(int index) {
       
   575             throw new UnsupportedOperationException();
       
   576         }
       
   577 
       
   578         @Override
       
   579         public ImmutableNode[] getChildren() {
       
   580             // This method is not expected to be called on an immutable node.
       
   581             // If it is called, it requires some investigation.
       
   582             throw new UnsupportedOperationException();
       
   583         }
       
   584 
       
   585         @Override
       
   586         public char getSymbol() {
       
   587             return symbol;
       
   588         }
       
   589 
       
   590         @Override
       
   591         public void setSymbol(char symbol) {
       
   592             throw new UnsupportedOperationException();
       
   593         }
       
   594 
       
   595         @Override
       
   596         public int getLength() {
       
   597             return length;
       
   598         }
       
   599 
       
   600         @Override
       
   601         public void setLength(int value) {
       
   602             throw new UnsupportedOperationException();
       
   603         }
       
   604     }
       
   605 
       
   606     static final class Reader implements Huffman.Reader {
       
   607 
       
   608         private Node curr = root;  // current position in the trie
       
   609         private long buffer;       // bits left from the previous match (aligned to the left, or MSB)
       
   610         private int bufferLen;     // number of bits in the buffer
       
   611         private int len;           // length (in bits) of path to curr
       
   612         private boolean done;
       
   613 
       
   614         @Override
       
   615         public void read(ByteBuffer source,
       
   616                          Appendable destination,
       
   617                          boolean isLast) throws IOException
       
   618         {
       
   619             read(source, destination, true, isLast);
       
   620         }
       
   621 
       
   622         @Override
       
   623         public void reset() {
       
   624             curr = root;
       
   625             len = 0;
       
   626             buffer = 0;
       
   627             bufferLen = 0;
       
   628             done = false;
       
   629         }
       
   630 
       
   631         @SuppressWarnings("fallthrough")
       
   632         void read(ByteBuffer source,
       
   633                   Appendable destination,
       
   634                   boolean reportEOS, /* reportEOS is exposed for tests */
       
   635                   boolean isLast) throws IOException
       
   636         {
       
   637             while (!done) {
       
   638                 // read as much as possible (up to 8 bytes)
       
   639                 int remaining = source.remaining();
       
   640                 int nBytes = Math.min((64 - bufferLen) >> 3, remaining);
       
   641                 switch (nBytes) {
       
   642                     case 0:
       
   643                         break;
       
   644                     case 3:
       
   645                         readByte(source);
       
   646                     case 2:
       
   647                         readByte(source);
       
   648                     case 1:
       
   649                         readByte(source);
       
   650                         break;
       
   651                     case 7:
       
   652                         readByte(source);
       
   653                     case 6:
       
   654                         readByte(source);
       
   655                     case 5:
       
   656                         readByte(source);
       
   657                     case 4:
       
   658                         readInt(source);
       
   659                         break;
       
   660                     case 8:
       
   661                         readLong(source);
       
   662                         break;
       
   663                     default:
       
   664                         throw new InternalError(String.valueOf(nBytes));
       
   665                 }
       
   666                 // write as much as possible
       
   667                 while (true) {
       
   668                     if (bufferLen < 8) {
       
   669                         if (nBytes < remaining) { // read again
       
   670                             break;
       
   671                         } else if (!isLast) { // exit the method to accept more input
       
   672                             return;
       
   673                         } else if (bufferLen > 0) { // no more data is expected, pad
       
   674                             // (this padding may be done more than once)
       
   675                             buffer |= ((0xff00000000000000L >>> bufferLen)
       
   676                                     & 0xff00000000000000L);
       
   677                             // do not update bufferLen, since all those ones are
       
   678                             // synthetic and are appended merely for the sake of
       
   679                             // lookup
       
   680                         } else {
       
   681                             done = true;
       
   682                             break;
       
   683                         }
       
   684                     }
       
   685                     int idx = (int) (buffer >>> 56);
       
   686                     Node node = curr.getChild(idx);
       
   687                     if (node == null) { // TODO: TEST
       
   688                         throw new IOException("Unexpected byte");
       
   689                     }
       
   690                     if (node.isLeaf()) {
       
   691                         if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding)
       
   692                             throw new IOException(
       
   693                                     "Not a EOS prefix padding or unexpected end of data");
       
   694                         }
       
   695                         if (reportEOS && node.isEOSPath()) {
       
   696                             throw new IOException("Encountered EOS");
       
   697                         }
       
   698                         destination.append(node.getSymbol());
       
   699                         curr = root;
       
   700                         len = 0;
       
   701                     } else {
       
   702                         curr = node;
       
   703                         // because of the padding, we can't match more bits than
       
   704                         // there are currently in the buffer
       
   705                         len += Math.min(bufferLen, node.getLength());
       
   706                     }
       
   707                     buffer <<= node.getLength();
       
   708                     bufferLen -= node.getLength();
       
   709                 }
       
   710                 if (done && (curr.isEOSPath() && len > 7)) {
       
   711                     throw new IOException(
       
   712                             "Padding is too long (len=" + len + ") "
       
   713                                     + "or unexpected end of data");
       
   714                 }
       
   715             }
       
   716         }
       
   717 
       
   718         private void readLong(ByteBuffer source) {
       
   719             buffer = source.getLong();
       
   720             bufferLen = 64;
       
   721         }
       
   722 
       
   723         private void readInt(ByteBuffer source) {
       
   724             long b;
       
   725             b = source.getInt() & 0x00000000ffffffffL;
       
   726             buffer |= (b << (32 - bufferLen));
       
   727             bufferLen += 32;
       
   728         }
       
   729 
       
   730         private void readByte(ByteBuffer source) {
       
   731             long b = source.get() & 0x00000000000000ffL;
       
   732             buffer |= (b << (56 - bufferLen));
       
   733             bufferLen += 8;
       
   734         }
       
   735     }
       
   736 
       
   737     static final class Writer implements Huffman.Writer {
       
   738 
       
   739         private CharSequence source;
       
   740         private boolean padded;
       
   741         private int pos;
       
   742         private int end;
       
   743         private long buffer;
       
   744         private int bufferLen;
       
   745 
       
   746         @Override
       
   747         public QuickHuffman.Writer from(CharSequence input, int start, int end) {
       
   748             Objects.checkFromToIndex(start, end, input.length());
       
   749             this.pos = start;
       
   750             this.end = end;
       
   751             this.source = input;
       
   752             return this;
       
   753         }
       
   754 
       
   755         @SuppressWarnings("fallthrough")
       
   756         @Override
       
   757         public boolean write(ByteBuffer destination) {
       
   758             while (true) {
       
   759                 while (bufferLen < 32 && pos < end) {
       
   760                     char c = source.charAt(pos++);
       
   761                     buffer |= (codeValueOf(c) >>> bufferLen); // append
       
   762                     bufferLen += codeLengthOf(c);
       
   763                 }
       
   764                 if (bufferLen == 0) {
       
   765                     return true;
       
   766                 }
       
   767                 if (pos >= end && !padded) { // no more chars, pad
       
   768                     padded = true;
       
   769                     buffer |= (EOS_MSB >>> bufferLen);
       
   770                     bufferLen = bytesForBits(bufferLen) << 3;
       
   771                 }
       
   772                 // The number of bytes that can be written at once
       
   773                 // (calculating in bytes, not bits, since
       
   774                 //  destination.remaining() * 8 might overflow)
       
   775 
       
   776                 int nBytes = Math.min(bytesForBits(bufferLen), destination.remaining()); // ceil?
       
   777                 switch (nBytes) {
       
   778                     case 0:
       
   779                         return false;
       
   780                     case 1:
       
   781                     case 2:
       
   782                     case 3:
       
   783                         destination.put((byte) (buffer >>> 56));
       
   784                         buffer <<= 8;
       
   785                         bufferLen -= 8;
       
   786                         break;
       
   787                     default:
       
   788                         destination.putInt((int) (buffer >>> 32));
       
   789                         buffer <<= 32;
       
   790                         bufferLen -= 32;
       
   791                         break;
       
   792                 }
       
   793             }
       
   794         }
       
   795 
       
   796         @Override
       
   797         public QuickHuffman.Writer reset() {
       
   798             source = null;
       
   799             buffer = 0;
       
   800             bufferLen = 0;
       
   801             end = 0;
       
   802             pos = 0;
       
   803             padded = false;
       
   804             return this;
       
   805         }
       
   806 
       
   807         @Override
       
   808         public int lengthOf(CharSequence value, int start, int end) {
       
   809             int len = 0;
       
   810             for (int i = start; i < end; i++) {
       
   811                 char c = value.charAt(i);
       
   812                 len += codeLengthOf(c);
       
   813             }
       
   814             return bytesForBits(len);
       
   815         }
       
   816     }
       
   817 
       
   818     /*
       
   819      * Returns the number of bytes the given number of bits constitute.
       
   820      */
       
   821     private static int bytesForBits(int n) {
       
   822         assert (n / 8 + (n % 8 != 0 ? 1 : 0)) == (n + 7) / 8
       
   823                 && (n + 7) / 8 == ((n + 7) >> 3) : n;
       
   824         return (n + 7) >> 3;
       
   825     }
       
   826 }