--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.net.http/share/classes/jdk/internal/net/http/hpack/QuickHuffman.java Wed May 02 02:36:17 2018 -0700
@@ -0,0 +1,826 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.internal.net.http.hpack;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Objects;
+
+public final class QuickHuffman {
+
+ /*
+ * Huffman codes for encoding.
+ *
+ * EOS will never be matched, since there is no symbol for it, thus no need
+ * to store it in the array. Thus, the length of the array is 256, not 257.
+ * Code information for each character is encoded as follows:
+ *
+ * MSB LSB
+ * +----------------+----------------+
+ * |~code | length~|
+ * +----------------+----------------+
+ * |<----- 32 ----->|<----- 32 ----->|
+ * |<------------- 64 -------------->|
+ *
+ * The leftmost 32 bits hold the code value. This value is aligned left
+ * (or MSB). The rightmost 32 bits hold the length of the code value.
+ * This length is aligned right (or LSB).
+ */
+ private static final long[] codes = new long[256];
+
+ private static long codeValueOf(char c) {
+ return codes[c] & 0xffffffff00000000L;
+ }
+
+ private static long codeLengthOf(char c) {
+ return codes[c] & 0x00000000ffffffffL;
+ }
+
+ private static final int EOS_LENGTH = 30;
+ private static final int EOS_LSB = 0x3fffffff;
+ private static final long EOS_MSB = EOS_LSB << (64 - EOS_LENGTH);
+
+ /*
+ * Huffman codes for decoding.
+ *
+ * Root node contains 257 descendant nodes, including EOS.
+ */
+ private static final Node root;
+
+ static {
+ TemporaryNode tmpRoot = new TemporaryNode();
+ addChar(tmpRoot, 0, 0x1ff8, 13);
+ addChar(tmpRoot, 1, 0x7fffd8, 23);
+ addChar(tmpRoot, 2, 0xfffffe2, 28);
+ addChar(tmpRoot, 3, 0xfffffe3, 28);
+ addChar(tmpRoot, 4, 0xfffffe4, 28);
+ addChar(tmpRoot, 5, 0xfffffe5, 28);
+ addChar(tmpRoot, 6, 0xfffffe6, 28);
+ addChar(tmpRoot, 7, 0xfffffe7, 28);
+ addChar(tmpRoot, 8, 0xfffffe8, 28);
+ addChar(tmpRoot, 9, 0xffffea, 24);
+ addChar(tmpRoot, 10, 0x3ffffffc, 30);
+ addChar(tmpRoot, 11, 0xfffffe9, 28);
+ addChar(tmpRoot, 12, 0xfffffea, 28);
+ addChar(tmpRoot, 13, 0x3ffffffd, 30);
+ addChar(tmpRoot, 14, 0xfffffeb, 28);
+ addChar(tmpRoot, 15, 0xfffffec, 28);
+ addChar(tmpRoot, 16, 0xfffffed, 28);
+ addChar(tmpRoot, 17, 0xfffffee, 28);
+ addChar(tmpRoot, 18, 0xfffffef, 28);
+ addChar(tmpRoot, 19, 0xffffff0, 28);
+ addChar(tmpRoot, 20, 0xffffff1, 28);
+ addChar(tmpRoot, 21, 0xffffff2, 28);
+ addChar(tmpRoot, 22, 0x3ffffffe, 30);
+ addChar(tmpRoot, 23, 0xffffff3, 28);
+ addChar(tmpRoot, 24, 0xffffff4, 28);
+ addChar(tmpRoot, 25, 0xffffff5, 28);
+ addChar(tmpRoot, 26, 0xffffff6, 28);
+ addChar(tmpRoot, 27, 0xffffff7, 28);
+ addChar(tmpRoot, 28, 0xffffff8, 28);
+ addChar(tmpRoot, 29, 0xffffff9, 28);
+ addChar(tmpRoot, 30, 0xffffffa, 28);
+ addChar(tmpRoot, 31, 0xffffffb, 28);
+ addChar(tmpRoot, 32, 0x14, 6);
+ addChar(tmpRoot, 33, 0x3f8, 10);
+ addChar(tmpRoot, 34, 0x3f9, 10);
+ addChar(tmpRoot, 35, 0xffa, 12);
+ addChar(tmpRoot, 36, 0x1ff9, 13);
+ addChar(tmpRoot, 37, 0x15, 6);
+ addChar(tmpRoot, 38, 0xf8, 8);
+ addChar(tmpRoot, 39, 0x7fa, 11);
+ addChar(tmpRoot, 40, 0x3fa, 10);
+ addChar(tmpRoot, 41, 0x3fb, 10);
+ addChar(tmpRoot, 42, 0xf9, 8);
+ addChar(tmpRoot, 43, 0x7fb, 11);
+ addChar(tmpRoot, 44, 0xfa, 8);
+ addChar(tmpRoot, 45, 0x16, 6);
+ addChar(tmpRoot, 46, 0x17, 6);
+ addChar(tmpRoot, 47, 0x18, 6);
+ addChar(tmpRoot, 48, 0x0, 5);
+ addChar(tmpRoot, 49, 0x1, 5);
+ addChar(tmpRoot, 50, 0x2, 5);
+ addChar(tmpRoot, 51, 0x19, 6);
+ addChar(tmpRoot, 52, 0x1a, 6);
+ addChar(tmpRoot, 53, 0x1b, 6);
+ addChar(tmpRoot, 54, 0x1c, 6);
+ addChar(tmpRoot, 55, 0x1d, 6);
+ addChar(tmpRoot, 56, 0x1e, 6);
+ addChar(tmpRoot, 57, 0x1f, 6);
+ addChar(tmpRoot, 58, 0x5c, 7);
+ addChar(tmpRoot, 59, 0xfb, 8);
+ addChar(tmpRoot, 60, 0x7ffc, 15);
+ addChar(tmpRoot, 61, 0x20, 6);
+ addChar(tmpRoot, 62, 0xffb, 12);
+ addChar(tmpRoot, 63, 0x3fc, 10);
+ addChar(tmpRoot, 64, 0x1ffa, 13);
+ addChar(tmpRoot, 65, 0x21, 6);
+ addChar(tmpRoot, 66, 0x5d, 7);
+ addChar(tmpRoot, 67, 0x5e, 7);
+ addChar(tmpRoot, 68, 0x5f, 7);
+ addChar(tmpRoot, 69, 0x60, 7);
+ addChar(tmpRoot, 70, 0x61, 7);
+ addChar(tmpRoot, 71, 0x62, 7);
+ addChar(tmpRoot, 72, 0x63, 7);
+ addChar(tmpRoot, 73, 0x64, 7);
+ addChar(tmpRoot, 74, 0x65, 7);
+ addChar(tmpRoot, 75, 0x66, 7);
+ addChar(tmpRoot, 76, 0x67, 7);
+ addChar(tmpRoot, 77, 0x68, 7);
+ addChar(tmpRoot, 78, 0x69, 7);
+ addChar(tmpRoot, 79, 0x6a, 7);
+ addChar(tmpRoot, 80, 0x6b, 7);
+ addChar(tmpRoot, 81, 0x6c, 7);
+ addChar(tmpRoot, 82, 0x6d, 7);
+ addChar(tmpRoot, 83, 0x6e, 7);
+ addChar(tmpRoot, 84, 0x6f, 7);
+ addChar(tmpRoot, 85, 0x70, 7);
+ addChar(tmpRoot, 86, 0x71, 7);
+ addChar(tmpRoot, 87, 0x72, 7);
+ addChar(tmpRoot, 88, 0xfc, 8);
+ addChar(tmpRoot, 89, 0x73, 7);
+ addChar(tmpRoot, 90, 0xfd, 8);
+ addChar(tmpRoot, 91, 0x1ffb, 13);
+ addChar(tmpRoot, 92, 0x7fff0, 19);
+ addChar(tmpRoot, 93, 0x1ffc, 13);
+ addChar(tmpRoot, 94, 0x3ffc, 14);
+ addChar(tmpRoot, 95, 0x22, 6);
+ addChar(tmpRoot, 96, 0x7ffd, 15);
+ addChar(tmpRoot, 97, 0x3, 5);
+ addChar(tmpRoot, 98, 0x23, 6);
+ addChar(tmpRoot, 99, 0x4, 5);
+ addChar(tmpRoot, 100, 0x24, 6);
+ addChar(tmpRoot, 101, 0x5, 5);
+ addChar(tmpRoot, 102, 0x25, 6);
+ addChar(tmpRoot, 103, 0x26, 6);
+ addChar(tmpRoot, 104, 0x27, 6);
+ addChar(tmpRoot, 105, 0x6, 5);
+ addChar(tmpRoot, 106, 0x74, 7);
+ addChar(tmpRoot, 107, 0x75, 7);
+ addChar(tmpRoot, 108, 0x28, 6);
+ addChar(tmpRoot, 109, 0x29, 6);
+ addChar(tmpRoot, 110, 0x2a, 6);
+ addChar(tmpRoot, 111, 0x7, 5);
+ addChar(tmpRoot, 112, 0x2b, 6);
+ addChar(tmpRoot, 113, 0x76, 7);
+ addChar(tmpRoot, 114, 0x2c, 6);
+ addChar(tmpRoot, 115, 0x8, 5);
+ addChar(tmpRoot, 116, 0x9, 5);
+ addChar(tmpRoot, 117, 0x2d, 6);
+ addChar(tmpRoot, 118, 0x77, 7);
+ addChar(tmpRoot, 119, 0x78, 7);
+ addChar(tmpRoot, 120, 0x79, 7);
+ addChar(tmpRoot, 121, 0x7a, 7);
+ addChar(tmpRoot, 122, 0x7b, 7);
+ addChar(tmpRoot, 123, 0x7ffe, 15);
+ addChar(tmpRoot, 124, 0x7fc, 11);
+ addChar(tmpRoot, 125, 0x3ffd, 14);
+ addChar(tmpRoot, 126, 0x1ffd, 13);
+ addChar(tmpRoot, 127, 0xffffffc, 28);
+ addChar(tmpRoot, 128, 0xfffe6, 20);
+ addChar(tmpRoot, 129, 0x3fffd2, 22);
+ addChar(tmpRoot, 130, 0xfffe7, 20);
+ addChar(tmpRoot, 131, 0xfffe8, 20);
+ addChar(tmpRoot, 132, 0x3fffd3, 22);
+ addChar(tmpRoot, 133, 0x3fffd4, 22);
+ addChar(tmpRoot, 134, 0x3fffd5, 22);
+ addChar(tmpRoot, 135, 0x7fffd9, 23);
+ addChar(tmpRoot, 136, 0x3fffd6, 22);
+ addChar(tmpRoot, 137, 0x7fffda, 23);
+ addChar(tmpRoot, 138, 0x7fffdb, 23);
+ addChar(tmpRoot, 139, 0x7fffdc, 23);
+ addChar(tmpRoot, 140, 0x7fffdd, 23);
+ addChar(tmpRoot, 141, 0x7fffde, 23);
+ addChar(tmpRoot, 142, 0xffffeb, 24);
+ addChar(tmpRoot, 143, 0x7fffdf, 23);
+ addChar(tmpRoot, 144, 0xffffec, 24);
+ addChar(tmpRoot, 145, 0xffffed, 24);
+ addChar(tmpRoot, 146, 0x3fffd7, 22);
+ addChar(tmpRoot, 147, 0x7fffe0, 23);
+ addChar(tmpRoot, 148, 0xffffee, 24);
+ addChar(tmpRoot, 149, 0x7fffe1, 23);
+ addChar(tmpRoot, 150, 0x7fffe2, 23);
+ addChar(tmpRoot, 151, 0x7fffe3, 23);
+ addChar(tmpRoot, 152, 0x7fffe4, 23);
+ addChar(tmpRoot, 153, 0x1fffdc, 21);
+ addChar(tmpRoot, 154, 0x3fffd8, 22);
+ addChar(tmpRoot, 155, 0x7fffe5, 23);
+ addChar(tmpRoot, 156, 0x3fffd9, 22);
+ addChar(tmpRoot, 157, 0x7fffe6, 23);
+ addChar(tmpRoot, 158, 0x7fffe7, 23);
+ addChar(tmpRoot, 159, 0xffffef, 24);
+ addChar(tmpRoot, 160, 0x3fffda, 22);
+ addChar(tmpRoot, 161, 0x1fffdd, 21);
+ addChar(tmpRoot, 162, 0xfffe9, 20);
+ addChar(tmpRoot, 163, 0x3fffdb, 22);
+ addChar(tmpRoot, 164, 0x3fffdc, 22);
+ addChar(tmpRoot, 165, 0x7fffe8, 23);
+ addChar(tmpRoot, 166, 0x7fffe9, 23);
+ addChar(tmpRoot, 167, 0x1fffde, 21);
+ addChar(tmpRoot, 168, 0x7fffea, 23);
+ addChar(tmpRoot, 169, 0x3fffdd, 22);
+ addChar(tmpRoot, 170, 0x3fffde, 22);
+ addChar(tmpRoot, 171, 0xfffff0, 24);
+ addChar(tmpRoot, 172, 0x1fffdf, 21);
+ addChar(tmpRoot, 173, 0x3fffdf, 22);
+ addChar(tmpRoot, 174, 0x7fffeb, 23);
+ addChar(tmpRoot, 175, 0x7fffec, 23);
+ addChar(tmpRoot, 176, 0x1fffe0, 21);
+ addChar(tmpRoot, 177, 0x1fffe1, 21);
+ addChar(tmpRoot, 178, 0x3fffe0, 22);
+ addChar(tmpRoot, 179, 0x1fffe2, 21);
+ addChar(tmpRoot, 180, 0x7fffed, 23);
+ addChar(tmpRoot, 181, 0x3fffe1, 22);
+ addChar(tmpRoot, 182, 0x7fffee, 23);
+ addChar(tmpRoot, 183, 0x7fffef, 23);
+ addChar(tmpRoot, 184, 0xfffea, 20);
+ addChar(tmpRoot, 185, 0x3fffe2, 22);
+ addChar(tmpRoot, 186, 0x3fffe3, 22);
+ addChar(tmpRoot, 187, 0x3fffe4, 22);
+ addChar(tmpRoot, 188, 0x7ffff0, 23);
+ addChar(tmpRoot, 189, 0x3fffe5, 22);
+ addChar(tmpRoot, 190, 0x3fffe6, 22);
+ addChar(tmpRoot, 191, 0x7ffff1, 23);
+ addChar(tmpRoot, 192, 0x3ffffe0, 26);
+ addChar(tmpRoot, 193, 0x3ffffe1, 26);
+ addChar(tmpRoot, 194, 0xfffeb, 20);
+ addChar(tmpRoot, 195, 0x7fff1, 19);
+ addChar(tmpRoot, 196, 0x3fffe7, 22);
+ addChar(tmpRoot, 197, 0x7ffff2, 23);
+ addChar(tmpRoot, 198, 0x3fffe8, 22);
+ addChar(tmpRoot, 199, 0x1ffffec, 25);
+ addChar(tmpRoot, 200, 0x3ffffe2, 26);
+ addChar(tmpRoot, 201, 0x3ffffe3, 26);
+ addChar(tmpRoot, 202, 0x3ffffe4, 26);
+ addChar(tmpRoot, 203, 0x7ffffde, 27);
+ addChar(tmpRoot, 204, 0x7ffffdf, 27);
+ addChar(tmpRoot, 205, 0x3ffffe5, 26);
+ addChar(tmpRoot, 206, 0xfffff1, 24);
+ addChar(tmpRoot, 207, 0x1ffffed, 25);
+ addChar(tmpRoot, 208, 0x7fff2, 19);
+ addChar(tmpRoot, 209, 0x1fffe3, 21);
+ addChar(tmpRoot, 210, 0x3ffffe6, 26);
+ addChar(tmpRoot, 211, 0x7ffffe0, 27);
+ addChar(tmpRoot, 212, 0x7ffffe1, 27);
+ addChar(tmpRoot, 213, 0x3ffffe7, 26);
+ addChar(tmpRoot, 214, 0x7ffffe2, 27);
+ addChar(tmpRoot, 215, 0xfffff2, 24);
+ addChar(tmpRoot, 216, 0x1fffe4, 21);
+ addChar(tmpRoot, 217, 0x1fffe5, 21);
+ addChar(tmpRoot, 218, 0x3ffffe8, 26);
+ addChar(tmpRoot, 219, 0x3ffffe9, 26);
+ addChar(tmpRoot, 220, 0xffffffd, 28);
+ addChar(tmpRoot, 221, 0x7ffffe3, 27);
+ addChar(tmpRoot, 222, 0x7ffffe4, 27);
+ addChar(tmpRoot, 223, 0x7ffffe5, 27);
+ addChar(tmpRoot, 224, 0xfffec, 20);
+ addChar(tmpRoot, 225, 0xfffff3, 24);
+ addChar(tmpRoot, 226, 0xfffed, 20);
+ addChar(tmpRoot, 227, 0x1fffe6, 21);
+ addChar(tmpRoot, 228, 0x3fffe9, 22);
+ addChar(tmpRoot, 229, 0x1fffe7, 21);
+ addChar(tmpRoot, 230, 0x1fffe8, 21);
+ addChar(tmpRoot, 231, 0x7ffff3, 23);
+ addChar(tmpRoot, 232, 0x3fffea, 22);
+ addChar(tmpRoot, 233, 0x3fffeb, 22);
+ addChar(tmpRoot, 234, 0x1ffffee, 25);
+ addChar(tmpRoot, 235, 0x1ffffef, 25);
+ addChar(tmpRoot, 236, 0xfffff4, 24);
+ addChar(tmpRoot, 237, 0xfffff5, 24);
+ addChar(tmpRoot, 238, 0x3ffffea, 26);
+ addChar(tmpRoot, 239, 0x7ffff4, 23);
+ addChar(tmpRoot, 240, 0x3ffffeb, 26);
+ addChar(tmpRoot, 241, 0x7ffffe6, 27);
+ addChar(tmpRoot, 242, 0x3ffffec, 26);
+ addChar(tmpRoot, 243, 0x3ffffed, 26);
+ addChar(tmpRoot, 244, 0x7ffffe7, 27);
+ addChar(tmpRoot, 245, 0x7ffffe8, 27);
+ addChar(tmpRoot, 246, 0x7ffffe9, 27);
+ addChar(tmpRoot, 247, 0x7ffffea, 27);
+ addChar(tmpRoot, 248, 0x7ffffeb, 27);
+ addChar(tmpRoot, 249, 0xffffffe, 28);
+ addChar(tmpRoot, 250, 0x7ffffec, 27);
+ addChar(tmpRoot, 251, 0x7ffffed, 27);
+ addChar(tmpRoot, 252, 0x7ffffee, 27);
+ addChar(tmpRoot, 253, 0x7ffffef, 27);
+ addChar(tmpRoot, 254, 0x7fffff0, 27);
+ addChar(tmpRoot, 255, 0x3ffffee, 26);
+ addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH);
+
+ // The difference in performance can always be checked by not using
+ // the immutable trie:
+ // root = tmpRoot;
+ root = ImmutableNode.copyOf(tmpRoot);
+ }
+
+ private QuickHuffman() { }
+
+ private static void addChar(Node root, int symbol, int code, int bitLength)
+ {
+ addLeaf(root, (char) symbol, code, bitLength, false);
+ long value = ((long) code) << (64 - bitLength); // re-align MSB <- LSB
+ codes[symbol] = value | bitLength;
+ }
+
+ private static void addEOS(Node root, int symbol, int code, int bitLength)
+ {
+ addLeaf(root, (char) symbol, code, bitLength, true);
+ }
+
+ private static void addLeaf(Node root,
+ char symbol,
+ int code,
+ int bitLength,
+ boolean isEOS)
+ {
+ assert 0 < bitLength && bitLength <= 32 : bitLength;
+ Node curr = root;
+ int nBytes = bytesForBits(bitLength);
+ // The number of bits the code needs to be shifted to the left in order
+ // to align with the byte #nBytes boundary:
+ int align = (nBytes << 3) - bitLength;
+ code <<= align;
+ // descend the trie until the last element
+ int l = 0;
+ for (int i = 0, probe = 0xff << ((nBytes - 1) << 3);
+ i < nBytes - 1;
+ i++, probe >>>= 8)
+ {
+ curr.setEOSPath(curr.isEOSPath() | isEOS);
+ int idx = (code & probe) >>> ((nBytes - 1 - i) << 3);
+ curr = curr.getOrCreateChild(idx);
+ curr.setLength(8);
+ l += 8;
+ }
+ // Assign the same char to all byte variants. For example, if the code
+ // and its length are 00011b and 5 respectively (letter 'a') then, in
+ // order to be able to match any byte starting with 00011b prefix,
+ // the following nodes need to be created:
+ //
+ // 00011000b, 00011001b, 00011010b, 00011011b, 00011100b, 00011101b,
+ // 00011110b and 00011111b
+ int idx = code & 0xff;
+ curr.setEOSPath(curr.isEOSPath() | isEOS);
+ for (int i = 0; i < (1 << align); i++) {
+ Node child = curr.getOrCreateChild(idx | i);
+ child.setSymbol(symbol);
+ child.setEOSPath(child.isEOSPath() | isEOS);
+ child.setLength(bitLength - l);
+ }
+ }
+
+ /*
+ * A node in the Huffman trie.
+ */
+ interface Node {
+
+ boolean isEOSPath();
+
+ void setEOSPath(boolean value);
+
+ boolean isLeaf();
+
+ Node getChild(int index);
+
+ Node getOrCreateChild(int index);
+
+ Node[] getChildren();
+
+ char getSymbol();
+
+ void setSymbol(char symbol);
+
+ int getLength();
+
+ void setLength(int value);
+ }
+
+ /*
+ * Mutable nodes used for initial construction of the Huffman trie.
+ * (These nodes are perfectly ok to be used after that.)
+ */
+ static final class TemporaryNode implements Node {
+
+ private char symbol;
+ private boolean eosPath;
+ private TemporaryNode[] children;
+ private int length;
+
+ @Override
+ public TemporaryNode getOrCreateChild(int index) {
+ ensureChildrenExist();
+ if (children[index] == null) {
+ children[index] = new TemporaryNode();
+ }
+ return children[index];
+ }
+
+ private void ensureChildrenExist() {
+ if (children == null) {
+ children = new TemporaryNode[256];
+ }
+ }
+
+ @Override
+ public boolean isLeaf() {
+ return children == null;
+ }
+
+ @Override
+ public boolean isEOSPath() {
+ return eosPath;
+ }
+
+ @Override
+ public void setEOSPath(boolean value) {
+ eosPath = value;
+ }
+
+ @Override
+ public TemporaryNode getChild(int index) {
+ ensureChildrenExist();
+ return children[index];
+ }
+
+ @Override
+ public Node[] getChildren() {
+ if (children == null) {
+ return new Node[0];
+ }
+ return children;
+ }
+
+ @Override
+ public char getSymbol() {
+ return symbol;
+ }
+
+ @Override
+ public int getLength() {
+ return length;
+ }
+
+ @Override
+ public void setSymbol(char value) {
+ this.symbol = value;
+ }
+
+ @Override
+ public void setLength(int value) {
+ this.length = value;
+ }
+ }
+
+ /*
+ * Immutable node used to construct traversal-only Huffman trie.
+ *
+ * Once the trie has been built, the support of modifications is no longer
+ * required. An immutable trie should be used. Not only it will help to
+ * catch possible bugs, but hopefully speedup the traversal operations.
+ */
+ static final class ImmutableNode implements Node {
+
+ private final char symbol;
+ private final boolean eosPath;
+ private final int length;
+ private final List<ImmutableNode> children;
+
+ public static ImmutableNode copyOf(Node node) {
+ if (node.isLeaf()) {
+ return new ImmutableNode(node.getSymbol(), node.isEOSPath(),
+ node.getLength());
+ }
+ Node[] children = node.getChildren();
+ ImmutableNode[] immutableChildren = new ImmutableNode[children.length];
+ for (int i = 0; i < children.length; i++) {
+ immutableChildren[i] = copyOf(children[i]);
+ }
+ return new ImmutableNode(node.isEOSPath(), node.getLength(),
+ immutableChildren);
+ }
+
+ /* Creates a leaf node */
+ private ImmutableNode(char symbol,
+ boolean eosPath,
+ int length) {
+ this.symbol = symbol;
+ this.eosPath = eosPath;
+ this.length = length;
+ this.children = List.of();
+ }
+
+ /* Creates a node with children */
+ private ImmutableNode(boolean eosPath,
+ int length,
+ ImmutableNode[] children)
+ {
+ this.symbol = 0;
+ this.eosPath = eosPath;
+ this.length = length;
+ if (children.length == 0) {
+ throw new IllegalArgumentException();
+ }
+ // A list produced by List.of should not be slower than array for
+ // accessing elements by index, and hopefully use additional
+ // optimizations (e.g. jdk.internal.vm.annotation.Stable)
+ this.children = List.of(children);
+ }
+
+ @Override
+ public boolean isLeaf() {
+ return children.isEmpty();
+ }
+
+ @Override
+ public boolean isEOSPath() {
+ return eosPath;
+ }
+
+ @Override
+ public void setEOSPath(boolean value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ImmutableNode getChild(int index) {
+ return children.get(index);
+ }
+
+ @Override
+ public ImmutableNode getOrCreateChild(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ImmutableNode[] getChildren() {
+ // This method is not expected to be called on an immutable node.
+ // If it is called, it requires some investigation.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public char getSymbol() {
+ return symbol;
+ }
+
+ @Override
+ public void setSymbol(char symbol) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getLength() {
+ return length;
+ }
+
+ @Override
+ public void setLength(int value) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ static final class Reader implements Huffman.Reader {
+
+ private Node curr = root; // current position in the trie
+ private long buffer; // bits left from the previous match (aligned to the left, or MSB)
+ private int bufferLen; // number of bits in the buffer
+ private int len; // length (in bits) of path to curr
+ private boolean done;
+
+ @Override
+ public void read(ByteBuffer source,
+ Appendable destination,
+ boolean isLast) throws IOException
+ {
+ read(source, destination, true, isLast);
+ }
+
+ @Override
+ public void reset() {
+ curr = root;
+ len = 0;
+ buffer = 0;
+ bufferLen = 0;
+ done = false;
+ }
+
+ @SuppressWarnings("fallthrough")
+ void read(ByteBuffer source,
+ Appendable destination,
+ boolean reportEOS, /* reportEOS is exposed for tests */
+ boolean isLast) throws IOException
+ {
+ while (!done) {
+ // read as much as possible (up to 8 bytes)
+ int remaining = source.remaining();
+ int nBytes = Math.min((64 - bufferLen) >> 3, remaining);
+ switch (nBytes) {
+ case 0:
+ break;
+ case 3:
+ readByte(source);
+ case 2:
+ readByte(source);
+ case 1:
+ readByte(source);
+ break;
+ case 7:
+ readByte(source);
+ case 6:
+ readByte(source);
+ case 5:
+ readByte(source);
+ case 4:
+ readInt(source);
+ break;
+ case 8:
+ readLong(source);
+ break;
+ default:
+ throw new InternalError(String.valueOf(nBytes));
+ }
+ // write as much as possible
+ while (true) {
+ if (bufferLen < 8) {
+ if (nBytes < remaining) { // read again
+ break;
+ } else if (!isLast) { // exit the method to accept more input
+ return;
+ } else if (bufferLen > 0) { // no more data is expected, pad
+ // (this padding may be done more than once)
+ buffer |= ((0xff00000000000000L >>> bufferLen)
+ & 0xff00000000000000L);
+ // do not update bufferLen, since all those ones are
+ // synthetic and are appended merely for the sake of
+ // lookup
+ } else {
+ done = true;
+ break;
+ }
+ }
+ int idx = (int) (buffer >>> 56);
+ Node node = curr.getChild(idx);
+ if (node == null) { // TODO: TEST
+ throw new IOException("Unexpected byte");
+ }
+ if (node.isLeaf()) {
+ if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding)
+ throw new IOException(
+ "Not a EOS prefix padding or unexpected end of data");
+ }
+ if (reportEOS && node.isEOSPath()) {
+ throw new IOException("Encountered EOS");
+ }
+ destination.append(node.getSymbol());
+ curr = root;
+ len = 0;
+ } else {
+ curr = node;
+ // because of the padding, we can't match more bits than
+ // there are currently in the buffer
+ len += Math.min(bufferLen, node.getLength());
+ }
+ buffer <<= node.getLength();
+ bufferLen -= node.getLength();
+ }
+ if (done && (curr.isEOSPath() && len > 7)) {
+ throw new IOException(
+ "Padding is too long (len=" + len + ") "
+ + "or unexpected end of data");
+ }
+ }
+ }
+
+ private void readLong(ByteBuffer source) {
+ buffer = source.getLong();
+ bufferLen = 64;
+ }
+
+ private void readInt(ByteBuffer source) {
+ long b;
+ b = source.getInt() & 0x00000000ffffffffL;
+ buffer |= (b << (32 - bufferLen));
+ bufferLen += 32;
+ }
+
+ private void readByte(ByteBuffer source) {
+ long b = source.get() & 0x00000000000000ffL;
+ buffer |= (b << (56 - bufferLen));
+ bufferLen += 8;
+ }
+ }
+
+ static final class Writer implements Huffman.Writer {
+
+ private CharSequence source;
+ private boolean padded;
+ private int pos;
+ private int end;
+ private long buffer;
+ private int bufferLen;
+
+ @Override
+ public QuickHuffman.Writer from(CharSequence input, int start, int end) {
+ Objects.checkFromToIndex(start, end, input.length());
+ this.pos = start;
+ this.end = end;
+ this.source = input;
+ return this;
+ }
+
+ @SuppressWarnings("fallthrough")
+ @Override
+ public boolean write(ByteBuffer destination) {
+ while (true) {
+ while (bufferLen < 32 && pos < end) {
+ char c = source.charAt(pos++);
+ buffer |= (codeValueOf(c) >>> bufferLen); // append
+ bufferLen += codeLengthOf(c);
+ }
+ if (bufferLen == 0) {
+ return true;
+ }
+ if (pos >= end && !padded) { // no more chars, pad
+ padded = true;
+ buffer |= (EOS_MSB >>> bufferLen);
+ bufferLen = bytesForBits(bufferLen) << 3;
+ }
+ // The number of bytes that can be written at once
+ // (calculating in bytes, not bits, since
+ // destination.remaining() * 8 might overflow)
+
+ int nBytes = Math.min(bytesForBits(bufferLen), destination.remaining()); // ceil?
+ switch (nBytes) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ case 3:
+ destination.put((byte) (buffer >>> 56));
+ buffer <<= 8;
+ bufferLen -= 8;
+ break;
+ default:
+ destination.putInt((int) (buffer >>> 32));
+ buffer <<= 32;
+ bufferLen -= 32;
+ break;
+ }
+ }
+ }
+
+ @Override
+ public QuickHuffman.Writer reset() {
+ source = null;
+ buffer = 0;
+ bufferLen = 0;
+ end = 0;
+ pos = 0;
+ padded = false;
+ return this;
+ }
+
+ @Override
+ public int lengthOf(CharSequence value, int start, int end) {
+ int len = 0;
+ for (int i = start; i < end; i++) {
+ char c = value.charAt(i);
+ len += codeLengthOf(c);
+ }
+ return bytesForBits(len);
+ }
+ }
+
+ /*
+ * Returns the number of bytes the given number of bits constitute.
+ */
+ private static int bytesForBits(int n) {
+ assert (n / 8 + (n % 8 != 0 ? 1 : 0)) == (n + 7) / 8
+ && (n + 7) / 8 == ((n + 7) >> 3) : n;
+ return (n + 7) >> 3;
+ }
+}