src/java.net.http/share/classes/jdk/internal/net/http/hpack/Huffman.java
branchhttp-client-branch
changeset 56092 fd85b2bf2b0d
parent 56089 42208b2f224e
child 56451 9585061fdb04
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.net.http/share/classes/jdk/internal/net/http/hpack/Huffman.java	Wed Feb 07 21:45:37 2018 +0000
@@ -0,0 +1,681 @@
+/*
+ * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.internal.net.http.hpack;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static java.lang.String.format;
+
+/**
+ * Huffman coding table.
+ *
+ * <p> Instances of this class are safe for use by multiple threads.
+ *
+ * @since 9
+ */
+public final class Huffman {
+
+    // TODO: check if reset is done in both reader and writer
+
+    static final class Reader {
+
+        private Node curr; // position in the trie
+        private int len;   // length of the path from the root to 'curr'
+        private int p;     // byte probe
+
+        {
+            reset();
+        }
+
+        public void read(ByteBuffer source,
+                         Appendable destination,
+                         boolean isLast) throws IOException {
+            read(source, destination, true, isLast);
+        }
+
+        // Takes 'isLast' rather than returns whether the reading is done or
+        // not, for more informative exceptions.
+        void read(ByteBuffer source,
+                  Appendable destination,
+                  boolean reportEOS, /* reportEOS is exposed for tests */
+                  boolean isLast) throws IOException {
+            Node c = curr;
+            int l = len;
+            /*
+               Since ByteBuffer is itself stateful, its position is
+               remembered here NOT as a part of Reader's state,
+               but to set it back in the case of a failure
+             */
+            int pos = source.position();
+
+            while (source.hasRemaining()) {
+                int d = source.get();
+                for (; p != 0; p >>= 1) {
+                    c = c.getChild(p & d);
+                    l++;
+                    if (c.isLeaf()) {
+                        if (reportEOS && c.isEOSPath) {
+                            throw new IOException("Encountered EOS");
+                        }
+                        char ch;
+                        try {
+                            ch = c.getChar();
+                        } catch (IllegalStateException e) {
+                            source.position(pos); // do we need this?
+                            throw new IOException(e);
+                        }
+                        try {
+                            destination.append(ch);
+                        } catch (IOException e) {
+                            source.position(pos); // do we need this?
+                            throw e;
+                        }
+                        c = INSTANCE.root;
+                        l = 0;
+                    }
+                    curr = c;
+                    len = l;
+                }
+                resetProbe();
+                pos++;
+            }
+            if (!isLast) {
+                return; // it's too early to jump to any conclusions, let's wait
+            }
+            if (c.isLeaf()) {
+                return; // it's perfectly ok, no extra padding bits
+            }
+            if (c.isEOSPath && len <= 7) {
+                return; // it's ok, some extra padding bits
+            }
+            if (c.isEOSPath) {
+                throw new IOException(
+                        "Padding is too long (len=" + len + ") " +
+                                "or unexpected end of data");
+            }
+            throw new IOException(
+                    "Not a EOS prefix padding or unexpected end of data");
+        }
+
+        public void reset() {
+            curr = INSTANCE.root;
+            len = 0;
+            resetProbe();
+        }
+
+        private void resetProbe() {
+            p = 0x80;
+        }
+    }
+
+    static final class Writer {
+
+        private int pos;       // position in 'source'
+        private int avail = 8; // number of least significant bits available in 'curr'
+        private int curr;      // next byte to put to the destination
+        private int rem;       // number of least significant bits in 'code' yet to be processed
+        private int code;      // current code being written
+
+        private CharSequence source;
+        private int end;
+
+        public Writer from(CharSequence input, int start, int end) {
+            if (start < 0 || end < 0 || end > input.length() || start > end) {
+                throw new IndexOutOfBoundsException(
+                        String.format("input.length()=%s, start=%s, end=%s",
+                                input.length(), start, end));
+            }
+            pos = start;
+            this.end = end;
+            this.source = input;
+            return this;
+        }
+
+        public boolean write(ByteBuffer destination) {
+            for (; pos < end; pos++) {
+                if (rem == 0) {
+                    Code desc = INSTANCE.codeOf(source.charAt(pos));
+                    rem = desc.length;
+                    code = desc.code;
+                }
+                while (rem > 0) {
+                    if (rem < avail) {
+                        curr |= (code << (avail - rem));
+                        avail -= rem;
+                        rem = 0;
+                    } else {
+                        int c = (curr | (code >>> (rem - avail)));
+                        if (destination.hasRemaining()) {
+                            destination.put((byte) c);
+                        } else {
+                            return false;
+                        }
+                        curr = c;
+                        code <<= (32 - rem + avail);  // throw written bits off the cliff (is this Sparta?)
+                        code >>>= (32 - rem + avail); // return to the position
+                        rem -= avail;
+                        curr = 0;
+                        avail = 8;
+                    }
+                }
+            }
+
+            if (avail < 8) { // have to pad
+                if (destination.hasRemaining()) {
+                    destination.put((byte) (curr | (INSTANCE.EOS.code >>> (INSTANCE.EOS.length - avail))));
+                    avail = 8;
+                } else {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        public Writer reset() {
+            source = null;
+            end = -1;
+            pos = -1;
+            avail = 8;
+            curr = 0;
+            code = 0;
+            return this;
+        }
+    }
+
+    /**
+     * Shared instance.
+     */
+    public static final Huffman INSTANCE = new Huffman();
+
+    private final Code EOS = new Code(0x3fffffff, 30);
+    private final Code[] codes = new Code[257];
+    private final Node root = new Node() {
+        @Override
+        public String toString() { return "root"; }
+    };
+
+    // TODO: consider builder and immutable trie
+    private Huffman() {
+        // @formatter:off
+        addChar(0,   0x1ff8,     13);
+        addChar(1,   0x7fffd8,   23);
+        addChar(2,   0xfffffe2,  28);
+        addChar(3,   0xfffffe3,  28);
+        addChar(4,   0xfffffe4,  28);
+        addChar(5,   0xfffffe5,  28);
+        addChar(6,   0xfffffe6,  28);
+        addChar(7,   0xfffffe7,  28);
+        addChar(8,   0xfffffe8,  28);
+        addChar(9,   0xffffea,   24);
+        addChar(10,  0x3ffffffc, 30);
+        addChar(11,  0xfffffe9,  28);
+        addChar(12,  0xfffffea,  28);
+        addChar(13,  0x3ffffffd, 30);
+        addChar(14,  0xfffffeb,  28);
+        addChar(15,  0xfffffec,  28);
+        addChar(16,  0xfffffed,  28);
+        addChar(17,  0xfffffee,  28);
+        addChar(18,  0xfffffef,  28);
+        addChar(19,  0xffffff0,  28);
+        addChar(20,  0xffffff1,  28);
+        addChar(21,  0xffffff2,  28);
+        addChar(22,  0x3ffffffe, 30);
+        addChar(23,  0xffffff3,  28);
+        addChar(24,  0xffffff4,  28);
+        addChar(25,  0xffffff5,  28);
+        addChar(26,  0xffffff6,  28);
+        addChar(27,  0xffffff7,  28);
+        addChar(28,  0xffffff8,  28);
+        addChar(29,  0xffffff9,  28);
+        addChar(30,  0xffffffa,  28);
+        addChar(31,  0xffffffb,  28);
+        addChar(32,  0x14,        6);
+        addChar(33,  0x3f8,      10);
+        addChar(34,  0x3f9,      10);
+        addChar(35,  0xffa,      12);
+        addChar(36,  0x1ff9,     13);
+        addChar(37,  0x15,        6);
+        addChar(38,  0xf8,        8);
+        addChar(39,  0x7fa,      11);
+        addChar(40,  0x3fa,      10);
+        addChar(41,  0x3fb,      10);
+        addChar(42,  0xf9,        8);
+        addChar(43,  0x7fb,      11);
+        addChar(44,  0xfa,        8);
+        addChar(45,  0x16,        6);
+        addChar(46,  0x17,        6);
+        addChar(47,  0x18,        6);
+        addChar(48,  0x0,         5);
+        addChar(49,  0x1,         5);
+        addChar(50,  0x2,         5);
+        addChar(51,  0x19,        6);
+        addChar(52,  0x1a,        6);
+        addChar(53,  0x1b,        6);
+        addChar(54,  0x1c,        6);
+        addChar(55,  0x1d,        6);
+        addChar(56,  0x1e,        6);
+        addChar(57,  0x1f,        6);
+        addChar(58,  0x5c,        7);
+        addChar(59,  0xfb,        8);
+        addChar(60,  0x7ffc,     15);
+        addChar(61,  0x20,        6);
+        addChar(62,  0xffb,      12);
+        addChar(63,  0x3fc,      10);
+        addChar(64,  0x1ffa,     13);
+        addChar(65,  0x21,        6);
+        addChar(66,  0x5d,        7);
+        addChar(67,  0x5e,        7);
+        addChar(68,  0x5f,        7);
+        addChar(69,  0x60,        7);
+        addChar(70,  0x61,        7);
+        addChar(71,  0x62,        7);
+        addChar(72,  0x63,        7);
+        addChar(73,  0x64,        7);
+        addChar(74,  0x65,        7);
+        addChar(75,  0x66,        7);
+        addChar(76,  0x67,        7);
+        addChar(77,  0x68,        7);
+        addChar(78,  0x69,        7);
+        addChar(79,  0x6a,        7);
+        addChar(80,  0x6b,        7);
+        addChar(81,  0x6c,        7);
+        addChar(82,  0x6d,        7);
+        addChar(83,  0x6e,        7);
+        addChar(84,  0x6f,        7);
+        addChar(85,  0x70,        7);
+        addChar(86,  0x71,        7);
+        addChar(87,  0x72,        7);
+        addChar(88,  0xfc,        8);
+        addChar(89,  0x73,        7);
+        addChar(90,  0xfd,        8);
+        addChar(91,  0x1ffb,     13);
+        addChar(92,  0x7fff0,    19);
+        addChar(93,  0x1ffc,     13);
+        addChar(94,  0x3ffc,     14);
+        addChar(95,  0x22,        6);
+        addChar(96,  0x7ffd,     15);
+        addChar(97,  0x3,         5);
+        addChar(98,  0x23,        6);
+        addChar(99,  0x4,         5);
+        addChar(100, 0x24,        6);
+        addChar(101, 0x5,         5);
+        addChar(102, 0x25,        6);
+        addChar(103, 0x26,        6);
+        addChar(104, 0x27,        6);
+        addChar(105, 0x6,         5);
+        addChar(106, 0x74,        7);
+        addChar(107, 0x75,        7);
+        addChar(108, 0x28,        6);
+        addChar(109, 0x29,        6);
+        addChar(110, 0x2a,        6);
+        addChar(111, 0x7,         5);
+        addChar(112, 0x2b,        6);
+        addChar(113, 0x76,        7);
+        addChar(114, 0x2c,        6);
+        addChar(115, 0x8,         5);
+        addChar(116, 0x9,         5);
+        addChar(117, 0x2d,        6);
+        addChar(118, 0x77,        7);
+        addChar(119, 0x78,        7);
+        addChar(120, 0x79,        7);
+        addChar(121, 0x7a,        7);
+        addChar(122, 0x7b,        7);
+        addChar(123, 0x7ffe,     15);
+        addChar(124, 0x7fc,      11);
+        addChar(125, 0x3ffd,     14);
+        addChar(126, 0x1ffd,     13);
+        addChar(127, 0xffffffc,  28);
+        addChar(128, 0xfffe6,    20);
+        addChar(129, 0x3fffd2,   22);
+        addChar(130, 0xfffe7,    20);
+        addChar(131, 0xfffe8,    20);
+        addChar(132, 0x3fffd3,   22);
+        addChar(133, 0x3fffd4,   22);
+        addChar(134, 0x3fffd5,   22);
+        addChar(135, 0x7fffd9,   23);
+        addChar(136, 0x3fffd6,   22);
+        addChar(137, 0x7fffda,   23);
+        addChar(138, 0x7fffdb,   23);
+        addChar(139, 0x7fffdc,   23);
+        addChar(140, 0x7fffdd,   23);
+        addChar(141, 0x7fffde,   23);
+        addChar(142, 0xffffeb,   24);
+        addChar(143, 0x7fffdf,   23);
+        addChar(144, 0xffffec,   24);
+        addChar(145, 0xffffed,   24);
+        addChar(146, 0x3fffd7,   22);
+        addChar(147, 0x7fffe0,   23);
+        addChar(148, 0xffffee,   24);
+        addChar(149, 0x7fffe1,   23);
+        addChar(150, 0x7fffe2,   23);
+        addChar(151, 0x7fffe3,   23);
+        addChar(152, 0x7fffe4,   23);
+        addChar(153, 0x1fffdc,   21);
+        addChar(154, 0x3fffd8,   22);
+        addChar(155, 0x7fffe5,   23);
+        addChar(156, 0x3fffd9,   22);
+        addChar(157, 0x7fffe6,   23);
+        addChar(158, 0x7fffe7,   23);
+        addChar(159, 0xffffef,   24);
+        addChar(160, 0x3fffda,   22);
+        addChar(161, 0x1fffdd,   21);
+        addChar(162, 0xfffe9,    20);
+        addChar(163, 0x3fffdb,   22);
+        addChar(164, 0x3fffdc,   22);
+        addChar(165, 0x7fffe8,   23);
+        addChar(166, 0x7fffe9,   23);
+        addChar(167, 0x1fffde,   21);
+        addChar(168, 0x7fffea,   23);
+        addChar(169, 0x3fffdd,   22);
+        addChar(170, 0x3fffde,   22);
+        addChar(171, 0xfffff0,   24);
+        addChar(172, 0x1fffdf,   21);
+        addChar(173, 0x3fffdf,   22);
+        addChar(174, 0x7fffeb,   23);
+        addChar(175, 0x7fffec,   23);
+        addChar(176, 0x1fffe0,   21);
+        addChar(177, 0x1fffe1,   21);
+        addChar(178, 0x3fffe0,   22);
+        addChar(179, 0x1fffe2,   21);
+        addChar(180, 0x7fffed,   23);
+        addChar(181, 0x3fffe1,   22);
+        addChar(182, 0x7fffee,   23);
+        addChar(183, 0x7fffef,   23);
+        addChar(184, 0xfffea,    20);
+        addChar(185, 0x3fffe2,   22);
+        addChar(186, 0x3fffe3,   22);
+        addChar(187, 0x3fffe4,   22);
+        addChar(188, 0x7ffff0,   23);
+        addChar(189, 0x3fffe5,   22);
+        addChar(190, 0x3fffe6,   22);
+        addChar(191, 0x7ffff1,   23);
+        addChar(192, 0x3ffffe0,  26);
+        addChar(193, 0x3ffffe1,  26);
+        addChar(194, 0xfffeb,    20);
+        addChar(195, 0x7fff1,    19);
+        addChar(196, 0x3fffe7,   22);
+        addChar(197, 0x7ffff2,   23);
+        addChar(198, 0x3fffe8,   22);
+        addChar(199, 0x1ffffec,  25);
+        addChar(200, 0x3ffffe2,  26);
+        addChar(201, 0x3ffffe3,  26);
+        addChar(202, 0x3ffffe4,  26);
+        addChar(203, 0x7ffffde,  27);
+        addChar(204, 0x7ffffdf,  27);
+        addChar(205, 0x3ffffe5,  26);
+        addChar(206, 0xfffff1,   24);
+        addChar(207, 0x1ffffed,  25);
+        addChar(208, 0x7fff2,    19);
+        addChar(209, 0x1fffe3,   21);
+        addChar(210, 0x3ffffe6,  26);
+        addChar(211, 0x7ffffe0,  27);
+        addChar(212, 0x7ffffe1,  27);
+        addChar(213, 0x3ffffe7,  26);
+        addChar(214, 0x7ffffe2,  27);
+        addChar(215, 0xfffff2,   24);
+        addChar(216, 0x1fffe4,   21);
+        addChar(217, 0x1fffe5,   21);
+        addChar(218, 0x3ffffe8,  26);
+        addChar(219, 0x3ffffe9,  26);
+        addChar(220, 0xffffffd,  28);
+        addChar(221, 0x7ffffe3,  27);
+        addChar(222, 0x7ffffe4,  27);
+        addChar(223, 0x7ffffe5,  27);
+        addChar(224, 0xfffec,    20);
+        addChar(225, 0xfffff3,   24);
+        addChar(226, 0xfffed,    20);
+        addChar(227, 0x1fffe6,   21);
+        addChar(228, 0x3fffe9,   22);
+        addChar(229, 0x1fffe7,   21);
+        addChar(230, 0x1fffe8,   21);
+        addChar(231, 0x7ffff3,   23);
+        addChar(232, 0x3fffea,   22);
+        addChar(233, 0x3fffeb,   22);
+        addChar(234, 0x1ffffee,  25);
+        addChar(235, 0x1ffffef,  25);
+        addChar(236, 0xfffff4,   24);
+        addChar(237, 0xfffff5,   24);
+        addChar(238, 0x3ffffea,  26);
+        addChar(239, 0x7ffff4,   23);
+        addChar(240, 0x3ffffeb,  26);
+        addChar(241, 0x7ffffe6,  27);
+        addChar(242, 0x3ffffec,  26);
+        addChar(243, 0x3ffffed,  26);
+        addChar(244, 0x7ffffe7,  27);
+        addChar(245, 0x7ffffe8,  27);
+        addChar(246, 0x7ffffe9,  27);
+        addChar(247, 0x7ffffea,  27);
+        addChar(248, 0x7ffffeb,  27);
+        addChar(249, 0xffffffe,  28);
+        addChar(250, 0x7ffffec,  27);
+        addChar(251, 0x7ffffed,  27);
+        addChar(252, 0x7ffffee,  27);
+        addChar(253, 0x7ffffef,  27);
+        addChar(254, 0x7fffff0,  27);
+        addChar(255, 0x3ffffee,  26);
+        addEOS (256, EOS.code,   EOS.length);
+        // @formatter:on
+    }
+
+
+    /**
+     * Calculates the number of bytes required to represent the given {@code
+     * CharSequence} with the Huffman coding.
+     *
+     * @param value
+     *         characters
+     *
+     * @return number of bytes
+     *
+     * @throws NullPointerException
+     *         if the value is null
+     */
+    public int lengthOf(CharSequence value) {
+        return lengthOf(value, 0, value.length());
+    }
+
+    /**
+     * Calculates the number of bytes required to represent a subsequence of the
+     * given {@code CharSequence} with the Huffman coding.
+     *
+     * @param value
+     *         characters
+     * @param start
+     *         the start index, inclusive
+     * @param end
+     *         the end index, exclusive
+     *
+     * @return number of bytes
+     *
+     * @throws NullPointerException
+     *         if the value is null
+     * @throws IndexOutOfBoundsException
+     *         if any invocation of {@code value.charAt(i)}, where
+     *         {@code start <= i < end} would throw an IndexOutOfBoundsException
+     */
+    public int lengthOf(CharSequence value, int start, int end) {
+        int len = 0;
+        for (int i = start; i < end; i++) {
+            char c = value.charAt(i);
+            len += INSTANCE.codeOf(c).length;
+        }
+        // Integer division with ceiling, assumption:
+        assert (len / 8 + (len % 8 != 0 ? 1 : 0)) == (len + 7) / 8 : len;
+        return (len + 7) / 8;
+    }
+
+    private void addChar(int c, int code, int bitLength) {
+        addLeaf(c, code, bitLength, false);
+        codes[c] = new Code(code, bitLength);
+    }
+
+    private void addEOS(int c, int code, int bitLength) {
+        addLeaf(c, code, bitLength, true);
+        codes[c] = new Code(code, bitLength);
+    }
+
+    private void addLeaf(int c, int code, int bitLength, boolean isEOS) {
+        if (bitLength < 1) {
+            throw new IllegalArgumentException("bitLength < 1");
+        }
+        Node curr = root;
+        for (int p = 1 << bitLength - 1; p != 0 && !curr.isLeaf(); p = p >> 1) {
+            curr.isEOSPath |= isEOS; // If it's already true, it can't become false
+            curr = curr.addChildIfAbsent(p & code);
+        }
+        curr.isEOSPath |= isEOS; // The last one needs to have this property as well
+        if (curr.isLeaf()) {
+            throw new IllegalStateException("Specified code is already taken");
+        }
+        curr.setChar((char) c);
+    }
+
+    private Code codeOf(char c) {
+        if (c > 255) {
+            throw new IllegalArgumentException("char=" + ((int) c));
+        }
+        return codes[c];
+    }
+
+    //
+    // For debugging/testing purposes
+    //
+    Node getRoot() {
+        return root;
+    }
+
+    //
+    // Guarantees:
+    //
+    //  if (isLeaf() == true) => getChar() is a legal call
+    //  if (isLeaf() == false) => getChild(i) is a legal call (though it can
+    //                                                           return null)
+    //
+    static class Node {
+
+        Node left;
+        Node right;
+        boolean isEOSPath;
+
+        boolean charIsSet;
+        char c;
+
+        Node getChild(int selector) {
+            if (isLeaf()) {
+                throw new IllegalStateException("This is a leaf node");
+            }
+            Node result = selector == 0 ? left : right;
+            if (result == null) {
+                throw new IllegalStateException(format(
+                        "Node doesn't have a child (selector=%s)", selector));
+            }
+            return result;
+        }
+
+        boolean isLeaf() {
+            return charIsSet;
+        }
+
+        char getChar() {
+            if (!isLeaf()) {
+                throw new IllegalStateException("This node is not a leaf node");
+            }
+            return c;
+        }
+
+        void setChar(char c) {
+            if (charIsSet) {
+                throw new IllegalStateException(
+                        "This node has been taken already");
+            }
+            if (left != null || right != null) {
+                throw new IllegalStateException("The node cannot be made "
+                        + "a leaf as it's already has a child");
+            }
+            this.c = c;
+            charIsSet = true;
+        }
+
+        Node addChildIfAbsent(int i) {
+            if (charIsSet) {
+                throw new IllegalStateException("The node cannot have a child "
+                        + "as it's already a leaf node");
+            }
+            Node child;
+            if (i == 0) {
+                if ((child = left) == null) {
+                    child = left = new Node();
+                }
+            } else {
+                if ((child = right) == null) {
+                    child = right = new Node();
+                }
+            }
+            return child;
+        }
+
+        @Override
+        public String toString() {
+            if (isLeaf()) {
+                if (isEOSPath) {
+                    return "EOS";
+                } else {
+                    return format("char: (%3s) '%s'", (int) c, c);
+                }
+            }
+            return "/\\";
+        }
+    }
+
+    // TODO: value-based class?
+    // FIXME: can we re-use Node instead of this class?
+    private static final class Code {
+
+        final int code;
+        final int length;
+
+        private Code(int code, int length) {
+            this.code = code;
+            this.length = length;
+        }
+
+        public int getCode() {
+            return code;
+        }
+
+        public int getLength() {
+            return length;
+        }
+
+        @Override
+        public String toString() {
+            long p = 1 << length;
+            return Long.toBinaryString(code + p).substring(1)
+                    + ", length=" + length;
+        }
+    }
+}