src/java.net.http/share/classes/jdk/internal/net/http/hpack/QuickHuffman.java
changeset 50681 4254bed3c09d
parent 49944 4690a2871b44
child 56795 03ece2518428
equal deleted inserted replaced
50678:818a23db260c 50681:4254bed3c09d
    23  * questions.
    23  * questions.
    24  */
    24  */
    25 
    25 
    26 package jdk.internal.net.http.hpack;
    26 package jdk.internal.net.http.hpack;
    27 
    27 
       
    28 import jdk.internal.net.http.hpack.HPACK.BufferUpdateConsumer;
       
    29 
    28 import java.io.IOException;
    30 import java.io.IOException;
    29 import java.nio.ByteBuffer;
    31 import java.nio.ByteBuffer;
    30 import java.util.List;
    32 import java.util.List;
    31 import java.util.Objects;
    33 import java.util.Objects;
    32 
    34 
       
    35 import static jdk.internal.net.http.hpack.HPACK.bytesForBits;
       
    36 
    33 public final class QuickHuffman {
    37 public final class QuickHuffman {
    34 
    38 
    35     /*
    39     /*
    36      * Huffman codes for encoding.
    40      * Mapping of characters to their code information. Code information
    37      *
    41      * consists of a code value and a code length. Both components are packed in
    38      * EOS will never be matched, since there is no symbol for it, thus no need
    42      * a single long as follows:
    39      * to store it in the array. Thus, the length of the array is 256, not 257.
       
    40      * Code information for each character is encoded as follows:
       
    41      *
    43      *
    42      *     MSB                             LSB
    44      *     MSB                             LSB
    43      *     +----------------+----------------+
    45      *     +----------------+----------------+
    44      *     |~code           |         length~|
    46      *     |~code           |         length~|
    45      *     +----------------+----------------+
    47      *     +----------------+----------------+
    46      *     |<----- 32 ----->|<----- 32 ----->|
    48      *     |<----- 32 ----->|<----- 32 ----->|
    47      *     |<------------- 64 -------------->|
    49      *     |<------------- 64 -------------->|
    48      *
    50      *
    49      * The leftmost 32 bits hold the code value. This value is aligned left
    51      * The leftmost 32 bits hold the code value. This value is aligned left (or
    50      * (or MSB). The rightmost 32 bits hold the length of the code value.
    52      * MSB). The rightmost 32 bits hold the code length. This length is aligned
    51      * This length is aligned right (or LSB).
    53      * right (or LSB). Such structure is possible thanks to codes being up to 30
       
    54      * bits long and their lengths being up to 5 bits long (length = 5..30).
       
    55      * This allows both components to fit into long and, thus, better locality.
       
    56      *
       
    57      * Input strings never contain EOS. Thus there's no need to provide EOS
       
    58      * mapping. Hence, the length of the array is 256, not 257.
    52      */
    59      */
    53     private static final long[] codes = new long[256];
    60     private static final long[] codes = new long[256];
    54 
    61 
    55     private static long codeValueOf(char c) {
    62     private static long codeValueOf(char c) {
    56         return codes[c] & 0xffffffff00000000L;
    63         return codes[c] & 0xffffffff00000000L;
    60         return codes[c] & 0x00000000ffffffffL;
    67         return codes[c] & 0x00000000ffffffffL;
    61     }
    68     }
    62 
    69 
    63     private static final int EOS_LENGTH = 30;
    70     private static final int EOS_LENGTH = 30;
    64     private static final int EOS_LSB = 0x3fffffff;
    71     private static final int EOS_LSB = 0x3fffffff;
    65     private static final long EOS_MSB = EOS_LSB << (64 - EOS_LENGTH);
    72     // EOS_LSB is casted to long before the shift, to allow long shift (6 bits)
       
    73     // instead of int shift (5 bits):
       
    74     private static final long EOS_MSB = ((long) EOS_LSB) << (64 - EOS_LENGTH);
    66 
    75 
    67     /*
    76     /*
    68      * Huffman codes for decoding.
    77      * Huffman trie.
    69      *
    78      *
    70      * Root node contains 257 descendant nodes, including EOS.
    79      * The root node leads to 257 descendant leaf nodes, of which 256 nodes
       
    80      * correspond to characters and 1 node correspond to EOS.
    71      */
    81      */
    72     private static final Node root;
    82     private static final Node root = buildTrie();
    73 
    83 
    74     static {
    84     private static Node buildTrie() {
    75         TemporaryNode tmpRoot = new TemporaryNode();
    85         TemporaryNode tmpRoot = new TemporaryNode();
    76         addChar(tmpRoot,   0, 0x1ff8,     13);
    86         addChar(tmpRoot,   0, 0x1ff8,     13);
    77         addChar(tmpRoot,   1, 0x7fffd8,   23);
    87         addChar(tmpRoot,   1, 0x7fffd8,   23);
    78         addChar(tmpRoot,   2, 0xfffffe2,  28);
    88         addChar(tmpRoot,   2, 0xfffffe2,  28);
    79         addChar(tmpRoot,   3, 0xfffffe3,  28);
    89         addChar(tmpRoot,   3, 0xfffffe3,  28);
   331         addChar(tmpRoot, 255, 0x3ffffee,  26);
   341         addChar(tmpRoot, 255, 0x3ffffee,  26);
   332         addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH);
   342         addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH);
   333 
   343 
   334         // The difference in performance can always be checked by not using
   344         // The difference in performance can always be checked by not using
   335         // the immutable trie:
   345         // the immutable trie:
   336         //     root = tmpRoot;
   346         //     return tmpRoot;
   337         root = ImmutableNode.copyOf(tmpRoot);
   347         return ImmutableNode.copyOf(tmpRoot);
   338     }
   348     }
   339 
   349 
   340     private QuickHuffman() { }
   350     private QuickHuffman() { }
   341 
   351 
   342     private static void addChar(Node root, int symbol, int code, int bitLength)
   352     private static void addChar(Node root, int symbol, int code, int bitLength)
   603         }
   613         }
   604     }
   614     }
   605 
   615 
   606     static final class Reader implements Huffman.Reader {
   616     static final class Reader implements Huffman.Reader {
   607 
   617 
       
   618         private final BufferUpdateConsumer UPDATER =
       
   619                 (buf, bufLen) -> {
       
   620                     buffer = buf;
       
   621                     bufferLen = bufLen;
       
   622                 };
       
   623 
   608         private Node curr = root;  // current position in the trie
   624         private Node curr = root;  // current position in the trie
   609         private long buffer;       // bits left from the previous match (aligned to the left, or MSB)
   625         private long buffer;       // bits left from the previous match (aligned to the left, or MSB)
   610         private int bufferLen;     // number of bits in the buffer
   626         private int bufferLen;     // number of bits in the buffer
   611         private int len;           // length (in bits) of path to curr
   627         private int len;           // length (in bits) of path to curr
   612         private boolean done;
   628         private boolean done;
   614         @Override
   630         @Override
   615         public void read(ByteBuffer source,
   631         public void read(ByteBuffer source,
   616                          Appendable destination,
   632                          Appendable destination,
   617                          boolean isLast) throws IOException
   633                          boolean isLast) throws IOException
   618         {
   634         {
   619             read(source, destination, true, isLast);
       
   620         }
       
   621 
       
   622         @Override
       
   623         public void reset() {
       
   624             curr = root;
       
   625             len = 0;
       
   626             buffer = 0;
       
   627             bufferLen = 0;
       
   628             done = false;
       
   629         }
       
   630 
       
   631         @SuppressWarnings("fallthrough")
       
   632         void read(ByteBuffer source,
       
   633                   Appendable destination,
       
   634                   boolean reportEOS, /* reportEOS is exposed for tests */
       
   635                   boolean isLast) throws IOException
       
   636         {
       
   637             while (!done) {
   635             while (!done) {
   638                 // read as much as possible (up to 8 bytes)
       
   639                 int remaining = source.remaining();
   636                 int remaining = source.remaining();
   640                 int nBytes = Math.min((64 - bufferLen) >> 3, remaining);
   637                 int nBytes = HPACK.read(source, buffer, bufferLen, UPDATER);
   641                 switch (nBytes) {
       
   642                     case 0:
       
   643                         break;
       
   644                     case 3:
       
   645                         readByte(source);
       
   646                     case 2:
       
   647                         readByte(source);
       
   648                     case 1:
       
   649                         readByte(source);
       
   650                         break;
       
   651                     case 7:
       
   652                         readByte(source);
       
   653                     case 6:
       
   654                         readByte(source);
       
   655                     case 5:
       
   656                         readByte(source);
       
   657                     case 4:
       
   658                         readInt(source);
       
   659                         break;
       
   660                     case 8:
       
   661                         readLong(source);
       
   662                         break;
       
   663                     default:
       
   664                         throw new InternalError(String.valueOf(nBytes));
       
   665                 }
       
   666                 // write as much as possible
   638                 // write as much as possible
   667                 while (true) {
   639                 while (true) {
   668                     if (bufferLen < 8) {
   640                     if (bufferLen < 8) {
   669                         if (nBytes < remaining) { // read again
   641                         if (nBytes < remaining) { // read again
   670                             break;
   642                             break;
   671                         } else if (!isLast) { // exit the method to accept more input
   643                         } else if (!isLast) { // exit the method to accept more input
   672                             return;
   644                             return;
   673                         } else if (bufferLen > 0) { // no more data is expected, pad
   645                         } else if (bufferLen > 0) { // no more data is expected, pad
   674                             // (this padding may be done more than once)
   646                                                     // (this padding may be done more than once)
   675                             buffer |= ((0xff00000000000000L >>> bufferLen)
   647                             buffer |= ((0xff00000000000000L >>> bufferLen)
   676                                     & 0xff00000000000000L);
   648                                     & 0xff00000000000000L);
   677                             // do not update bufferLen, since all those ones are
   649                             // do not update bufferLen, since all those ones are
   678                             // synthetic and are appended merely for the sake of
   650                             // synthetic and are appended merely for the sake of
   679                             // lookup
   651                             // lookup
   690                     if (node.isLeaf()) {
   662                     if (node.isLeaf()) {
   691                         if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding)
   663                         if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding)
   692                             throw new IOException(
   664                             throw new IOException(
   693                                     "Not a EOS prefix padding or unexpected end of data");
   665                                     "Not a EOS prefix padding or unexpected end of data");
   694                         }
   666                         }
   695                         if (reportEOS && node.isEOSPath()) {
   667                         if (node.isEOSPath()) {
   696                             throw new IOException("Encountered EOS");
   668                             throw new IOException("Encountered EOS");
   697                         }
   669                         }
   698                         destination.append(node.getSymbol());
   670                         destination.append(node.getSymbol());
   699                         curr = root;
   671                         curr = root;
   700                         len = 0;
   672                         len = 0;
   713                                     + "or unexpected end of data");
   685                                     + "or unexpected end of data");
   714                 }
   686                 }
   715             }
   687             }
   716         }
   688         }
   717 
   689 
   718         private void readLong(ByteBuffer source) {
   690         @Override
   719             buffer = source.getLong();
   691         public void reset() {
   720             bufferLen = 64;
   692             curr = root;
   721         }
   693             len = 0;
   722 
   694             buffer = 0;
   723         private void readInt(ByteBuffer source) {
   695             bufferLen = 0;
   724             long b;
   696             done = false;
   725             b = source.getInt() & 0x00000000ffffffffL;
       
   726             buffer |= (b << (32 - bufferLen));
       
   727             bufferLen += 32;
       
   728         }
       
   729 
       
   730         private void readByte(ByteBuffer source) {
       
   731             long b = source.get() & 0x00000000000000ffL;
       
   732             buffer |= (b << (56 - bufferLen));
       
   733             bufferLen += 8;
       
   734         }
   697         }
   735     }
   698     }
   736 
   699 
   737     static final class Writer implements Huffman.Writer {
   700     static final class Writer implements Huffman.Writer {
       
   701 
       
   702         private final BufferUpdateConsumer UPDATER =
       
   703                 (buf, bufLen) -> {
       
   704                     buffer = buf;
       
   705                     bufferLen = bufLen;
       
   706                 };
   738 
   707 
   739         private CharSequence source;
   708         private CharSequence source;
   740         private boolean padded;
   709         private boolean padded;
   741         private int pos;
   710         private int pos;
   742         private int end;
   711         private int end;
   750             this.end = end;
   719             this.end = end;
   751             this.source = input;
   720             this.source = input;
   752             return this;
   721             return this;
   753         }
   722         }
   754 
   723 
   755         @SuppressWarnings("fallthrough")
       
   756         @Override
   724         @Override
   757         public boolean write(ByteBuffer destination) {
   725         public boolean write(ByteBuffer destination) {
   758             while (true) {
   726             while (true) {
   759                 while (bufferLen < 32 && pos < end) {
   727                 while (true) { // stuff codes into long
   760                     char c = source.charAt(pos++);
   728                     if (pos >= end) {
   761                     buffer |= (codeValueOf(c) >>> bufferLen); // append
   729                         break;
   762                     bufferLen += codeLengthOf(c);
   730                     }
       
   731                     char c = source.charAt(pos);
       
   732                     if (c > 255) {
       
   733                         throw new IllegalArgumentException("char=" + ((int) c));
       
   734                     }
       
   735                     long len = codeLengthOf(c);
       
   736                     if (bufferLen + len <= 64) {
       
   737                         buffer |= (codeValueOf(c) >>> bufferLen); // append
       
   738                         bufferLen += len;
       
   739                         pos++;
       
   740                     } else {
       
   741                         break;
       
   742                     }
   763                 }
   743                 }
   764                 if (bufferLen == 0) {
   744                 if (bufferLen == 0) {
   765                     return true;
   745                     return true;
   766                 }
   746                 }
   767                 if (pos >= end && !padded) { // no more chars, pad
   747                 if (pos >= end && !padded) { // no more input chars are expected, pad
   768                     padded = true;
   748                     padded = true;
   769                     buffer |= (EOS_MSB >>> bufferLen);
   749                     // A long shift to 64 will result in the same long, not
   770                     bufferLen = bytesForBits(bufferLen) << 3;
   750                     // necessarily 0L. In which case padding will be performed
       
   751                     // incorrectly. If bufferLen is equal to 64, the shift (and
       
   752                     // padding) in not performed.
       
   753                     // (see https://docs.oracle.com/javase/specs/jls/se10/html/jls-15.html#jls-15.19)
       
   754                     if (bufferLen != 64) {
       
   755                         buffer |= (EOS_MSB >>> bufferLen);
       
   756                         bufferLen = bytesForBits(bufferLen) << 3;
       
   757                     }
   771                 }
   758                 }
   772                 // The number of bytes that can be written at once
   759                 int nBytes = HPACK.write(buffer, bufferLen, UPDATER, destination);
   773                 // (calculating in bytes, not bits, since
   760                 if (nBytes == 0) {
   774                 //  destination.remaining() * 8 might overflow)
   761                     return false;
   775 
       
   776                 int nBytes = Math.min(bytesForBits(bufferLen), destination.remaining()); // ceil?
       
   777                 switch (nBytes) {
       
   778                     case 0:
       
   779                         return false;
       
   780                     case 1:
       
   781                     case 2:
       
   782                     case 3:
       
   783                         destination.put((byte) (buffer >>> 56));
       
   784                         buffer <<= 8;
       
   785                         bufferLen -= 8;
       
   786                         break;
       
   787                     default:
       
   788                         destination.putInt((int) (buffer >>> 32));
       
   789                         buffer <<= 32;
       
   790                         bufferLen -= 32;
       
   791                         break;
       
   792                 }
   762                 }
   793             }
   763             }
   794         }
   764         }
   795 
   765 
   796         @Override
   766         @Override
   812                 len += codeLengthOf(c);
   782                 len += codeLengthOf(c);
   813             }
   783             }
   814             return bytesForBits(len);
   784             return bytesForBits(len);
   815         }
   785         }
   816     }
   786     }
   817 
       
   818     /*
       
   819      * Returns the number of bytes the given number of bits constitute.
       
   820      */
       
   821     private static int bytesForBits(int n) {
       
   822         assert (n / 8 + (n % 8 != 0 ? 1 : 0)) == (n + 7) / 8
       
   823                 && (n + 7) / 8 == ((n + 7) >> 3) : n;
       
   824         return (n + 7) >> 3;
       
   825     }
       
   826 }
   787 }