23 * questions. |
23 * questions. |
24 */ |
24 */ |
25 |
25 |
26 package jdk.internal.net.http.hpack; |
26 package jdk.internal.net.http.hpack; |
27 |
27 |
|
28 import jdk.internal.net.http.hpack.HPACK.BufferUpdateConsumer; |
|
29 |
28 import java.io.IOException; |
30 import java.io.IOException; |
29 import java.nio.ByteBuffer; |
31 import java.nio.ByteBuffer; |
30 import java.util.List; |
32 import java.util.List; |
31 import java.util.Objects; |
33 import java.util.Objects; |
32 |
34 |
|
35 import static jdk.internal.net.http.hpack.HPACK.bytesForBits; |
|
36 |
33 public final class QuickHuffman { |
37 public final class QuickHuffman { |
34 |
38 |
35 /* |
39 /* |
36 * Huffman codes for encoding. |
40 * Mapping of characters to their code information. Code information |
37 * |
41 * consists of a code value and a code length. Both components are packed in |
38 * EOS will never be matched, since there is no symbol for it, thus no need |
42 * a single long as follows: |
39 * to store it in the array. Thus, the length of the array is 256, not 257. |
|
40 * Code information for each character is encoded as follows: |
|
41 * |
43 * |
42 * MSB LSB |
44 * MSB LSB |
43 * +----------------+----------------+ |
45 * +----------------+----------------+ |
44 * |~code | length~| |
46 * |~code | length~| |
45 * +----------------+----------------+ |
47 * +----------------+----------------+ |
46 * |<----- 32 ----->|<----- 32 ----->| |
48 * |<----- 32 ----->|<----- 32 ----->| |
47 * |<------------- 64 -------------->| |
49 * |<------------- 64 -------------->| |
48 * |
50 * |
49 * The leftmost 32 bits hold the code value. This value is aligned left |
51 * The leftmost 32 bits hold the code value. This value is aligned left (or |
50 * (or MSB). The rightmost 32 bits hold the length of the code value. |
52 * MSB). The rightmost 32 bits hold the code length. This length is aligned |
51 * This length is aligned right (or LSB). |
53 * right (or LSB). Such structure is possible thanks to codes being up to 30 |
|
54 * bits long and their lengths being up to 5 bits long (length = 5..30). |
|
55 * This allows both components to fit into long and, thus, better locality. |
|
56 * |
|
57 * Input strings never contain EOS. Thus there's no need to provide EOS |
|
58 * mapping. Hence, the length of the array is 256, not 257. |
52 */ |
59 */ |
53 private static final long[] codes = new long[256]; |
60 private static final long[] codes = new long[256]; |
54 |
61 |
55 private static long codeValueOf(char c) { |
62 private static long codeValueOf(char c) { |
56 return codes[c] & 0xffffffff00000000L; |
63 return codes[c] & 0xffffffff00000000L; |
60 return codes[c] & 0x00000000ffffffffL; |
67 return codes[c] & 0x00000000ffffffffL; |
61 } |
68 } |
62 |
69 |
63 private static final int EOS_LENGTH = 30; |
70 private static final int EOS_LENGTH = 30; |
64 private static final int EOS_LSB = 0x3fffffff; |
71 private static final int EOS_LSB = 0x3fffffff; |
65 private static final long EOS_MSB = EOS_LSB << (64 - EOS_LENGTH); |
72 // EOS_LSB is casted to long before the shift, to allow long shift (6 bits) |
|
73 // instead of int shift (5 bits): |
|
74 private static final long EOS_MSB = ((long) EOS_LSB) << (64 - EOS_LENGTH); |
66 |
75 |
67 /* |
76 /* |
68 * Huffman codes for decoding. |
77 * Huffman trie. |
69 * |
78 * |
70 * Root node contains 257 descendant nodes, including EOS. |
79 * The root node leads to 257 descendant leaf nodes, of which 256 nodes |
|
80 * correspond to characters and 1 node correspond to EOS. |
71 */ |
81 */ |
72 private static final Node root; |
82 private static final Node root = buildTrie(); |
73 |
83 |
74 static { |
84 private static Node buildTrie() { |
75 TemporaryNode tmpRoot = new TemporaryNode(); |
85 TemporaryNode tmpRoot = new TemporaryNode(); |
76 addChar(tmpRoot, 0, 0x1ff8, 13); |
86 addChar(tmpRoot, 0, 0x1ff8, 13); |
77 addChar(tmpRoot, 1, 0x7fffd8, 23); |
87 addChar(tmpRoot, 1, 0x7fffd8, 23); |
78 addChar(tmpRoot, 2, 0xfffffe2, 28); |
88 addChar(tmpRoot, 2, 0xfffffe2, 28); |
79 addChar(tmpRoot, 3, 0xfffffe3, 28); |
89 addChar(tmpRoot, 3, 0xfffffe3, 28); |
331 addChar(tmpRoot, 255, 0x3ffffee, 26); |
341 addChar(tmpRoot, 255, 0x3ffffee, 26); |
332 addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH); |
342 addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH); |
333 |
343 |
334 // The difference in performance can always be checked by not using |
344 // The difference in performance can always be checked by not using |
335 // the immutable trie: |
345 // the immutable trie: |
336 // root = tmpRoot; |
346 // return tmpRoot; |
337 root = ImmutableNode.copyOf(tmpRoot); |
347 return ImmutableNode.copyOf(tmpRoot); |
338 } |
348 } |
339 |
349 |
340 private QuickHuffman() { } |
350 private QuickHuffman() { } |
341 |
351 |
342 private static void addChar(Node root, int symbol, int code, int bitLength) |
352 private static void addChar(Node root, int symbol, int code, int bitLength) |
603 } |
613 } |
604 } |
614 } |
605 |
615 |
606 static final class Reader implements Huffman.Reader { |
616 static final class Reader implements Huffman.Reader { |
607 |
617 |
|
618 private final BufferUpdateConsumer UPDATER = |
|
619 (buf, bufLen) -> { |
|
620 buffer = buf; |
|
621 bufferLen = bufLen; |
|
622 }; |
|
623 |
608 private Node curr = root; // current position in the trie |
624 private Node curr = root; // current position in the trie |
609 private long buffer; // bits left from the previous match (aligned to the left, or MSB) |
625 private long buffer; // bits left from the previous match (aligned to the left, or MSB) |
610 private int bufferLen; // number of bits in the buffer |
626 private int bufferLen; // number of bits in the buffer |
611 private int len; // length (in bits) of path to curr |
627 private int len; // length (in bits) of path to curr |
612 private boolean done; |
628 private boolean done; |
614 @Override |
630 @Override |
615 public void read(ByteBuffer source, |
631 public void read(ByteBuffer source, |
616 Appendable destination, |
632 Appendable destination, |
617 boolean isLast) throws IOException |
633 boolean isLast) throws IOException |
618 { |
634 { |
619 read(source, destination, true, isLast); |
|
620 } |
|
621 |
|
622 @Override |
|
623 public void reset() { |
|
624 curr = root; |
|
625 len = 0; |
|
626 buffer = 0; |
|
627 bufferLen = 0; |
|
628 done = false; |
|
629 } |
|
630 |
|
631 @SuppressWarnings("fallthrough") |
|
632 void read(ByteBuffer source, |
|
633 Appendable destination, |
|
634 boolean reportEOS, /* reportEOS is exposed for tests */ |
|
635 boolean isLast) throws IOException |
|
636 { |
|
637 while (!done) { |
635 while (!done) { |
638 // read as much as possible (up to 8 bytes) |
|
639 int remaining = source.remaining(); |
636 int remaining = source.remaining(); |
640 int nBytes = Math.min((64 - bufferLen) >> 3, remaining); |
637 int nBytes = HPACK.read(source, buffer, bufferLen, UPDATER); |
641 switch (nBytes) { |
|
642 case 0: |
|
643 break; |
|
644 case 3: |
|
645 readByte(source); |
|
646 case 2: |
|
647 readByte(source); |
|
648 case 1: |
|
649 readByte(source); |
|
650 break; |
|
651 case 7: |
|
652 readByte(source); |
|
653 case 6: |
|
654 readByte(source); |
|
655 case 5: |
|
656 readByte(source); |
|
657 case 4: |
|
658 readInt(source); |
|
659 break; |
|
660 case 8: |
|
661 readLong(source); |
|
662 break; |
|
663 default: |
|
664 throw new InternalError(String.valueOf(nBytes)); |
|
665 } |
|
666 // write as much as possible |
638 // write as much as possible |
667 while (true) { |
639 while (true) { |
668 if (bufferLen < 8) { |
640 if (bufferLen < 8) { |
669 if (nBytes < remaining) { // read again |
641 if (nBytes < remaining) { // read again |
670 break; |
642 break; |
671 } else if (!isLast) { // exit the method to accept more input |
643 } else if (!isLast) { // exit the method to accept more input |
672 return; |
644 return; |
673 } else if (bufferLen > 0) { // no more data is expected, pad |
645 } else if (bufferLen > 0) { // no more data is expected, pad |
674 // (this padding may be done more than once) |
646 // (this padding may be done more than once) |
675 buffer |= ((0xff00000000000000L >>> bufferLen) |
647 buffer |= ((0xff00000000000000L >>> bufferLen) |
676 & 0xff00000000000000L); |
648 & 0xff00000000000000L); |
677 // do not update bufferLen, since all those ones are |
649 // do not update bufferLen, since all those ones are |
678 // synthetic and are appended merely for the sake of |
650 // synthetic and are appended merely for the sake of |
679 // lookup |
651 // lookup |
690 if (node.isLeaf()) { |
662 if (node.isLeaf()) { |
691 if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding) |
663 if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding) |
692 throw new IOException( |
664 throw new IOException( |
693 "Not a EOS prefix padding or unexpected end of data"); |
665 "Not a EOS prefix padding or unexpected end of data"); |
694 } |
666 } |
695 if (reportEOS && node.isEOSPath()) { |
667 if (node.isEOSPath()) { |
696 throw new IOException("Encountered EOS"); |
668 throw new IOException("Encountered EOS"); |
697 } |
669 } |
698 destination.append(node.getSymbol()); |
670 destination.append(node.getSymbol()); |
699 curr = root; |
671 curr = root; |
700 len = 0; |
672 len = 0; |
713 + "or unexpected end of data"); |
685 + "or unexpected end of data"); |
714 } |
686 } |
715 } |
687 } |
716 } |
688 } |
717 |
689 |
718 private void readLong(ByteBuffer source) { |
690 @Override |
719 buffer = source.getLong(); |
691 public void reset() { |
720 bufferLen = 64; |
692 curr = root; |
721 } |
693 len = 0; |
722 |
694 buffer = 0; |
723 private void readInt(ByteBuffer source) { |
695 bufferLen = 0; |
724 long b; |
696 done = false; |
725 b = source.getInt() & 0x00000000ffffffffL; |
|
726 buffer |= (b << (32 - bufferLen)); |
|
727 bufferLen += 32; |
|
728 } |
|
729 |
|
730 private void readByte(ByteBuffer source) { |
|
731 long b = source.get() & 0x00000000000000ffL; |
|
732 buffer |= (b << (56 - bufferLen)); |
|
733 bufferLen += 8; |
|
734 } |
697 } |
735 } |
698 } |
736 |
699 |
737 static final class Writer implements Huffman.Writer { |
700 static final class Writer implements Huffman.Writer { |
|
701 |
|
702 private final BufferUpdateConsumer UPDATER = |
|
703 (buf, bufLen) -> { |
|
704 buffer = buf; |
|
705 bufferLen = bufLen; |
|
706 }; |
738 |
707 |
739 private CharSequence source; |
708 private CharSequence source; |
740 private boolean padded; |
709 private boolean padded; |
741 private int pos; |
710 private int pos; |
742 private int end; |
711 private int end; |
750 this.end = end; |
719 this.end = end; |
751 this.source = input; |
720 this.source = input; |
752 return this; |
721 return this; |
753 } |
722 } |
754 |
723 |
755 @SuppressWarnings("fallthrough") |
|
756 @Override |
724 @Override |
757 public boolean write(ByteBuffer destination) { |
725 public boolean write(ByteBuffer destination) { |
758 while (true) { |
726 while (true) { |
759 while (bufferLen < 32 && pos < end) { |
727 while (true) { // stuff codes into long |
760 char c = source.charAt(pos++); |
728 if (pos >= end) { |
761 buffer |= (codeValueOf(c) >>> bufferLen); // append |
729 break; |
762 bufferLen += codeLengthOf(c); |
730 } |
|
731 char c = source.charAt(pos); |
|
732 if (c > 255) { |
|
733 throw new IllegalArgumentException("char=" + ((int) c)); |
|
734 } |
|
735 long len = codeLengthOf(c); |
|
736 if (bufferLen + len <= 64) { |
|
737 buffer |= (codeValueOf(c) >>> bufferLen); // append |
|
738 bufferLen += len; |
|
739 pos++; |
|
740 } else { |
|
741 break; |
|
742 } |
763 } |
743 } |
764 if (bufferLen == 0) { |
744 if (bufferLen == 0) { |
765 return true; |
745 return true; |
766 } |
746 } |
767 if (pos >= end && !padded) { // no more chars, pad |
747 if (pos >= end && !padded) { // no more input chars are expected, pad |
768 padded = true; |
748 padded = true; |
769 buffer |= (EOS_MSB >>> bufferLen); |
749 // A long shift to 64 will result in the same long, not |
770 bufferLen = bytesForBits(bufferLen) << 3; |
750 // necessarily 0L. In which case padding will be performed |
|
751 // incorrectly. If bufferLen is equal to 64, the shift (and |
|
752 // padding) in not performed. |
|
753 // (see https://docs.oracle.com/javase/specs/jls/se10/html/jls-15.html#jls-15.19) |
|
754 if (bufferLen != 64) { |
|
755 buffer |= (EOS_MSB >>> bufferLen); |
|
756 bufferLen = bytesForBits(bufferLen) << 3; |
|
757 } |
771 } |
758 } |
772 // The number of bytes that can be written at once |
759 int nBytes = HPACK.write(buffer, bufferLen, UPDATER, destination); |
773 // (calculating in bytes, not bits, since |
760 if (nBytes == 0) { |
774 // destination.remaining() * 8 might overflow) |
761 return false; |
775 |
|
776 int nBytes = Math.min(bytesForBits(bufferLen), destination.remaining()); // ceil? |
|
777 switch (nBytes) { |
|
778 case 0: |
|
779 return false; |
|
780 case 1: |
|
781 case 2: |
|
782 case 3: |
|
783 destination.put((byte) (buffer >>> 56)); |
|
784 buffer <<= 8; |
|
785 bufferLen -= 8; |
|
786 break; |
|
787 default: |
|
788 destination.putInt((int) (buffer >>> 32)); |
|
789 buffer <<= 32; |
|
790 bufferLen -= 32; |
|
791 break; |
|
792 } |
762 } |
793 } |
763 } |
794 } |
764 } |
795 |
765 |
796 @Override |
766 @Override |