src/java.net.http/share/classes/jdk/internal/net/http/hpack/Huffman.java
branchhttp-client-branch
changeset 56092 fd85b2bf2b0d
parent 56089 42208b2f224e
child 56451 9585061fdb04
equal deleted inserted replaced
56091:aedd6133e7a0 56092:fd85b2bf2b0d
       
     1 /*
       
     2  * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 package jdk.internal.net.http.hpack;
       
    26 
       
    27 import java.io.IOException;
       
    28 import java.nio.ByteBuffer;
       
    29 
       
    30 import static java.lang.String.format;
       
    31 
       
    32 /**
       
    33  * Huffman coding table.
       
    34  *
       
    35  * <p> Instances of this class are safe for use by multiple threads.
       
    36  *
       
    37  * @since 9
       
    38  */
       
    39 public final class Huffman {
       
    40 
       
    41     // TODO: check if reset is done in both reader and writer
       
    42 
       
    43     static final class Reader {
       
    44 
       
    45         private Node curr; // position in the trie
       
    46         private int len;   // length of the path from the root to 'curr'
       
    47         private int p;     // byte probe
       
    48 
       
    49         {
       
    50             reset();
       
    51         }
       
    52 
       
    53         public void read(ByteBuffer source,
       
    54                          Appendable destination,
       
    55                          boolean isLast) throws IOException {
       
    56             read(source, destination, true, isLast);
       
    57         }
       
    58 
       
    59         // Takes 'isLast' rather than returns whether the reading is done or
       
    60         // not, for more informative exceptions.
       
    61         void read(ByteBuffer source,
       
    62                   Appendable destination,
       
    63                   boolean reportEOS, /* reportEOS is exposed for tests */
       
    64                   boolean isLast) throws IOException {
       
    65             Node c = curr;
       
    66             int l = len;
       
    67             /*
       
    68                Since ByteBuffer is itself stateful, its position is
       
    69                remembered here NOT as a part of Reader's state,
       
    70                but to set it back in the case of a failure
       
    71              */
       
    72             int pos = source.position();
       
    73 
       
    74             while (source.hasRemaining()) {
       
    75                 int d = source.get();
       
    76                 for (; p != 0; p >>= 1) {
       
    77                     c = c.getChild(p & d);
       
    78                     l++;
       
    79                     if (c.isLeaf()) {
       
    80                         if (reportEOS && c.isEOSPath) {
       
    81                             throw new IOException("Encountered EOS");
       
    82                         }
       
    83                         char ch;
       
    84                         try {
       
    85                             ch = c.getChar();
       
    86                         } catch (IllegalStateException e) {
       
    87                             source.position(pos); // do we need this?
       
    88                             throw new IOException(e);
       
    89                         }
       
    90                         try {
       
    91                             destination.append(ch);
       
    92                         } catch (IOException e) {
       
    93                             source.position(pos); // do we need this?
       
    94                             throw e;
       
    95                         }
       
    96                         c = INSTANCE.root;
       
    97                         l = 0;
       
    98                     }
       
    99                     curr = c;
       
   100                     len = l;
       
   101                 }
       
   102                 resetProbe();
       
   103                 pos++;
       
   104             }
       
   105             if (!isLast) {
       
   106                 return; // it's too early to jump to any conclusions, let's wait
       
   107             }
       
   108             if (c.isLeaf()) {
       
   109                 return; // it's perfectly ok, no extra padding bits
       
   110             }
       
   111             if (c.isEOSPath && len <= 7) {
       
   112                 return; // it's ok, some extra padding bits
       
   113             }
       
   114             if (c.isEOSPath) {
       
   115                 throw new IOException(
       
   116                         "Padding is too long (len=" + len + ") " +
       
   117                                 "or unexpected end of data");
       
   118             }
       
   119             throw new IOException(
       
   120                     "Not a EOS prefix padding or unexpected end of data");
       
   121         }
       
   122 
       
   123         public void reset() {
       
   124             curr = INSTANCE.root;
       
   125             len = 0;
       
   126             resetProbe();
       
   127         }
       
   128 
       
   129         private void resetProbe() {
       
   130             p = 0x80;
       
   131         }
       
   132     }
       
   133 
       
   134     static final class Writer {
       
   135 
       
   136         private int pos;       // position in 'source'
       
   137         private int avail = 8; // number of least significant bits available in 'curr'
       
   138         private int curr;      // next byte to put to the destination
       
   139         private int rem;       // number of least significant bits in 'code' yet to be processed
       
   140         private int code;      // current code being written
       
   141 
       
   142         private CharSequence source;
       
   143         private int end;
       
   144 
       
   145         public Writer from(CharSequence input, int start, int end) {
       
   146             if (start < 0 || end < 0 || end > input.length() || start > end) {
       
   147                 throw new IndexOutOfBoundsException(
       
   148                         String.format("input.length()=%s, start=%s, end=%s",
       
   149                                 input.length(), start, end));
       
   150             }
       
   151             pos = start;
       
   152             this.end = end;
       
   153             this.source = input;
       
   154             return this;
       
   155         }
       
   156 
       
   157         public boolean write(ByteBuffer destination) {
       
   158             for (; pos < end; pos++) {
       
   159                 if (rem == 0) {
       
   160                     Code desc = INSTANCE.codeOf(source.charAt(pos));
       
   161                     rem = desc.length;
       
   162                     code = desc.code;
       
   163                 }
       
   164                 while (rem > 0) {
       
   165                     if (rem < avail) {
       
   166                         curr |= (code << (avail - rem));
       
   167                         avail -= rem;
       
   168                         rem = 0;
       
   169                     } else {
       
   170                         int c = (curr | (code >>> (rem - avail)));
       
   171                         if (destination.hasRemaining()) {
       
   172                             destination.put((byte) c);
       
   173                         } else {
       
   174                             return false;
       
   175                         }
       
   176                         curr = c;
       
   177                         code <<= (32 - rem + avail);  // throw written bits off the cliff (is this Sparta?)
       
   178                         code >>>= (32 - rem + avail); // return to the position
       
   179                         rem -= avail;
       
   180                         curr = 0;
       
   181                         avail = 8;
       
   182                     }
       
   183                 }
       
   184             }
       
   185 
       
   186             if (avail < 8) { // have to pad
       
   187                 if (destination.hasRemaining()) {
       
   188                     destination.put((byte) (curr | (INSTANCE.EOS.code >>> (INSTANCE.EOS.length - avail))));
       
   189                     avail = 8;
       
   190                 } else {
       
   191                     return false;
       
   192                 }
       
   193             }
       
   194 
       
   195             return true;
       
   196         }
       
   197 
       
   198         public Writer reset() {
       
   199             source = null;
       
   200             end = -1;
       
   201             pos = -1;
       
   202             avail = 8;
       
   203             curr = 0;
       
   204             code = 0;
       
   205             return this;
       
   206         }
       
   207     }
       
   208 
       
   209     /**
       
   210      * Shared instance.
       
   211      */
       
   212     public static final Huffman INSTANCE = new Huffman();
       
   213 
       
   214     private final Code EOS = new Code(0x3fffffff, 30);
       
   215     private final Code[] codes = new Code[257];
       
   216     private final Node root = new Node() {
       
   217         @Override
       
   218         public String toString() { return "root"; }
       
   219     };
       
   220 
       
   221     // TODO: consider builder and immutable trie
       
   222     private Huffman() {
       
   223         // @formatter:off
       
   224         addChar(0,   0x1ff8,     13);
       
   225         addChar(1,   0x7fffd8,   23);
       
   226         addChar(2,   0xfffffe2,  28);
       
   227         addChar(3,   0xfffffe3,  28);
       
   228         addChar(4,   0xfffffe4,  28);
       
   229         addChar(5,   0xfffffe5,  28);
       
   230         addChar(6,   0xfffffe6,  28);
       
   231         addChar(7,   0xfffffe7,  28);
       
   232         addChar(8,   0xfffffe8,  28);
       
   233         addChar(9,   0xffffea,   24);
       
   234         addChar(10,  0x3ffffffc, 30);
       
   235         addChar(11,  0xfffffe9,  28);
       
   236         addChar(12,  0xfffffea,  28);
       
   237         addChar(13,  0x3ffffffd, 30);
       
   238         addChar(14,  0xfffffeb,  28);
       
   239         addChar(15,  0xfffffec,  28);
       
   240         addChar(16,  0xfffffed,  28);
       
   241         addChar(17,  0xfffffee,  28);
       
   242         addChar(18,  0xfffffef,  28);
       
   243         addChar(19,  0xffffff0,  28);
       
   244         addChar(20,  0xffffff1,  28);
       
   245         addChar(21,  0xffffff2,  28);
       
   246         addChar(22,  0x3ffffffe, 30);
       
   247         addChar(23,  0xffffff3,  28);
       
   248         addChar(24,  0xffffff4,  28);
       
   249         addChar(25,  0xffffff5,  28);
       
   250         addChar(26,  0xffffff6,  28);
       
   251         addChar(27,  0xffffff7,  28);
       
   252         addChar(28,  0xffffff8,  28);
       
   253         addChar(29,  0xffffff9,  28);
       
   254         addChar(30,  0xffffffa,  28);
       
   255         addChar(31,  0xffffffb,  28);
       
   256         addChar(32,  0x14,        6);
       
   257         addChar(33,  0x3f8,      10);
       
   258         addChar(34,  0x3f9,      10);
       
   259         addChar(35,  0xffa,      12);
       
   260         addChar(36,  0x1ff9,     13);
       
   261         addChar(37,  0x15,        6);
       
   262         addChar(38,  0xf8,        8);
       
   263         addChar(39,  0x7fa,      11);
       
   264         addChar(40,  0x3fa,      10);
       
   265         addChar(41,  0x3fb,      10);
       
   266         addChar(42,  0xf9,        8);
       
   267         addChar(43,  0x7fb,      11);
       
   268         addChar(44,  0xfa,        8);
       
   269         addChar(45,  0x16,        6);
       
   270         addChar(46,  0x17,        6);
       
   271         addChar(47,  0x18,        6);
       
   272         addChar(48,  0x0,         5);
       
   273         addChar(49,  0x1,         5);
       
   274         addChar(50,  0x2,         5);
       
   275         addChar(51,  0x19,        6);
       
   276         addChar(52,  0x1a,        6);
       
   277         addChar(53,  0x1b,        6);
       
   278         addChar(54,  0x1c,        6);
       
   279         addChar(55,  0x1d,        6);
       
   280         addChar(56,  0x1e,        6);
       
   281         addChar(57,  0x1f,        6);
       
   282         addChar(58,  0x5c,        7);
       
   283         addChar(59,  0xfb,        8);
       
   284         addChar(60,  0x7ffc,     15);
       
   285         addChar(61,  0x20,        6);
       
   286         addChar(62,  0xffb,      12);
       
   287         addChar(63,  0x3fc,      10);
       
   288         addChar(64,  0x1ffa,     13);
       
   289         addChar(65,  0x21,        6);
       
   290         addChar(66,  0x5d,        7);
       
   291         addChar(67,  0x5e,        7);
       
   292         addChar(68,  0x5f,        7);
       
   293         addChar(69,  0x60,        7);
       
   294         addChar(70,  0x61,        7);
       
   295         addChar(71,  0x62,        7);
       
   296         addChar(72,  0x63,        7);
       
   297         addChar(73,  0x64,        7);
       
   298         addChar(74,  0x65,        7);
       
   299         addChar(75,  0x66,        7);
       
   300         addChar(76,  0x67,        7);
       
   301         addChar(77,  0x68,        7);
       
   302         addChar(78,  0x69,        7);
       
   303         addChar(79,  0x6a,        7);
       
   304         addChar(80,  0x6b,        7);
       
   305         addChar(81,  0x6c,        7);
       
   306         addChar(82,  0x6d,        7);
       
   307         addChar(83,  0x6e,        7);
       
   308         addChar(84,  0x6f,        7);
       
   309         addChar(85,  0x70,        7);
       
   310         addChar(86,  0x71,        7);
       
   311         addChar(87,  0x72,        7);
       
   312         addChar(88,  0xfc,        8);
       
   313         addChar(89,  0x73,        7);
       
   314         addChar(90,  0xfd,        8);
       
   315         addChar(91,  0x1ffb,     13);
       
   316         addChar(92,  0x7fff0,    19);
       
   317         addChar(93,  0x1ffc,     13);
       
   318         addChar(94,  0x3ffc,     14);
       
   319         addChar(95,  0x22,        6);
       
   320         addChar(96,  0x7ffd,     15);
       
   321         addChar(97,  0x3,         5);
       
   322         addChar(98,  0x23,        6);
       
   323         addChar(99,  0x4,         5);
       
   324         addChar(100, 0x24,        6);
       
   325         addChar(101, 0x5,         5);
       
   326         addChar(102, 0x25,        6);
       
   327         addChar(103, 0x26,        6);
       
   328         addChar(104, 0x27,        6);
       
   329         addChar(105, 0x6,         5);
       
   330         addChar(106, 0x74,        7);
       
   331         addChar(107, 0x75,        7);
       
   332         addChar(108, 0x28,        6);
       
   333         addChar(109, 0x29,        6);
       
   334         addChar(110, 0x2a,        6);
       
   335         addChar(111, 0x7,         5);
       
   336         addChar(112, 0x2b,        6);
       
   337         addChar(113, 0x76,        7);
       
   338         addChar(114, 0x2c,        6);
       
   339         addChar(115, 0x8,         5);
       
   340         addChar(116, 0x9,         5);
       
   341         addChar(117, 0x2d,        6);
       
   342         addChar(118, 0x77,        7);
       
   343         addChar(119, 0x78,        7);
       
   344         addChar(120, 0x79,        7);
       
   345         addChar(121, 0x7a,        7);
       
   346         addChar(122, 0x7b,        7);
       
   347         addChar(123, 0x7ffe,     15);
       
   348         addChar(124, 0x7fc,      11);
       
   349         addChar(125, 0x3ffd,     14);
       
   350         addChar(126, 0x1ffd,     13);
       
   351         addChar(127, 0xffffffc,  28);
       
   352         addChar(128, 0xfffe6,    20);
       
   353         addChar(129, 0x3fffd2,   22);
       
   354         addChar(130, 0xfffe7,    20);
       
   355         addChar(131, 0xfffe8,    20);
       
   356         addChar(132, 0x3fffd3,   22);
       
   357         addChar(133, 0x3fffd4,   22);
       
   358         addChar(134, 0x3fffd5,   22);
       
   359         addChar(135, 0x7fffd9,   23);
       
   360         addChar(136, 0x3fffd6,   22);
       
   361         addChar(137, 0x7fffda,   23);
       
   362         addChar(138, 0x7fffdb,   23);
       
   363         addChar(139, 0x7fffdc,   23);
       
   364         addChar(140, 0x7fffdd,   23);
       
   365         addChar(141, 0x7fffde,   23);
       
   366         addChar(142, 0xffffeb,   24);
       
   367         addChar(143, 0x7fffdf,   23);
       
   368         addChar(144, 0xffffec,   24);
       
   369         addChar(145, 0xffffed,   24);
       
   370         addChar(146, 0x3fffd7,   22);
       
   371         addChar(147, 0x7fffe0,   23);
       
   372         addChar(148, 0xffffee,   24);
       
   373         addChar(149, 0x7fffe1,   23);
       
   374         addChar(150, 0x7fffe2,   23);
       
   375         addChar(151, 0x7fffe3,   23);
       
   376         addChar(152, 0x7fffe4,   23);
       
   377         addChar(153, 0x1fffdc,   21);
       
   378         addChar(154, 0x3fffd8,   22);
       
   379         addChar(155, 0x7fffe5,   23);
       
   380         addChar(156, 0x3fffd9,   22);
       
   381         addChar(157, 0x7fffe6,   23);
       
   382         addChar(158, 0x7fffe7,   23);
       
   383         addChar(159, 0xffffef,   24);
       
   384         addChar(160, 0x3fffda,   22);
       
   385         addChar(161, 0x1fffdd,   21);
       
   386         addChar(162, 0xfffe9,    20);
       
   387         addChar(163, 0x3fffdb,   22);
       
   388         addChar(164, 0x3fffdc,   22);
       
   389         addChar(165, 0x7fffe8,   23);
       
   390         addChar(166, 0x7fffe9,   23);
       
   391         addChar(167, 0x1fffde,   21);
       
   392         addChar(168, 0x7fffea,   23);
       
   393         addChar(169, 0x3fffdd,   22);
       
   394         addChar(170, 0x3fffde,   22);
       
   395         addChar(171, 0xfffff0,   24);
       
   396         addChar(172, 0x1fffdf,   21);
       
   397         addChar(173, 0x3fffdf,   22);
       
   398         addChar(174, 0x7fffeb,   23);
       
   399         addChar(175, 0x7fffec,   23);
       
   400         addChar(176, 0x1fffe0,   21);
       
   401         addChar(177, 0x1fffe1,   21);
       
   402         addChar(178, 0x3fffe0,   22);
       
   403         addChar(179, 0x1fffe2,   21);
       
   404         addChar(180, 0x7fffed,   23);
       
   405         addChar(181, 0x3fffe1,   22);
       
   406         addChar(182, 0x7fffee,   23);
       
   407         addChar(183, 0x7fffef,   23);
       
   408         addChar(184, 0xfffea,    20);
       
   409         addChar(185, 0x3fffe2,   22);
       
   410         addChar(186, 0x3fffe3,   22);
       
   411         addChar(187, 0x3fffe4,   22);
       
   412         addChar(188, 0x7ffff0,   23);
       
   413         addChar(189, 0x3fffe5,   22);
       
   414         addChar(190, 0x3fffe6,   22);
       
   415         addChar(191, 0x7ffff1,   23);
       
   416         addChar(192, 0x3ffffe0,  26);
       
   417         addChar(193, 0x3ffffe1,  26);
       
   418         addChar(194, 0xfffeb,    20);
       
   419         addChar(195, 0x7fff1,    19);
       
   420         addChar(196, 0x3fffe7,   22);
       
   421         addChar(197, 0x7ffff2,   23);
       
   422         addChar(198, 0x3fffe8,   22);
       
   423         addChar(199, 0x1ffffec,  25);
       
   424         addChar(200, 0x3ffffe2,  26);
       
   425         addChar(201, 0x3ffffe3,  26);
       
   426         addChar(202, 0x3ffffe4,  26);
       
   427         addChar(203, 0x7ffffde,  27);
       
   428         addChar(204, 0x7ffffdf,  27);
       
   429         addChar(205, 0x3ffffe5,  26);
       
   430         addChar(206, 0xfffff1,   24);
       
   431         addChar(207, 0x1ffffed,  25);
       
   432         addChar(208, 0x7fff2,    19);
       
   433         addChar(209, 0x1fffe3,   21);
       
   434         addChar(210, 0x3ffffe6,  26);
       
   435         addChar(211, 0x7ffffe0,  27);
       
   436         addChar(212, 0x7ffffe1,  27);
       
   437         addChar(213, 0x3ffffe7,  26);
       
   438         addChar(214, 0x7ffffe2,  27);
       
   439         addChar(215, 0xfffff2,   24);
       
   440         addChar(216, 0x1fffe4,   21);
       
   441         addChar(217, 0x1fffe5,   21);
       
   442         addChar(218, 0x3ffffe8,  26);
       
   443         addChar(219, 0x3ffffe9,  26);
       
   444         addChar(220, 0xffffffd,  28);
       
   445         addChar(221, 0x7ffffe3,  27);
       
   446         addChar(222, 0x7ffffe4,  27);
       
   447         addChar(223, 0x7ffffe5,  27);
       
   448         addChar(224, 0xfffec,    20);
       
   449         addChar(225, 0xfffff3,   24);
       
   450         addChar(226, 0xfffed,    20);
       
   451         addChar(227, 0x1fffe6,   21);
       
   452         addChar(228, 0x3fffe9,   22);
       
   453         addChar(229, 0x1fffe7,   21);
       
   454         addChar(230, 0x1fffe8,   21);
       
   455         addChar(231, 0x7ffff3,   23);
       
   456         addChar(232, 0x3fffea,   22);
       
   457         addChar(233, 0x3fffeb,   22);
       
   458         addChar(234, 0x1ffffee,  25);
       
   459         addChar(235, 0x1ffffef,  25);
       
   460         addChar(236, 0xfffff4,   24);
       
   461         addChar(237, 0xfffff5,   24);
       
   462         addChar(238, 0x3ffffea,  26);
       
   463         addChar(239, 0x7ffff4,   23);
       
   464         addChar(240, 0x3ffffeb,  26);
       
   465         addChar(241, 0x7ffffe6,  27);
       
   466         addChar(242, 0x3ffffec,  26);
       
   467         addChar(243, 0x3ffffed,  26);
       
   468         addChar(244, 0x7ffffe7,  27);
       
   469         addChar(245, 0x7ffffe8,  27);
       
   470         addChar(246, 0x7ffffe9,  27);
       
   471         addChar(247, 0x7ffffea,  27);
       
   472         addChar(248, 0x7ffffeb,  27);
       
   473         addChar(249, 0xffffffe,  28);
       
   474         addChar(250, 0x7ffffec,  27);
       
   475         addChar(251, 0x7ffffed,  27);
       
   476         addChar(252, 0x7ffffee,  27);
       
   477         addChar(253, 0x7ffffef,  27);
       
   478         addChar(254, 0x7fffff0,  27);
       
   479         addChar(255, 0x3ffffee,  26);
       
   480         addEOS (256, EOS.code,   EOS.length);
       
   481         // @formatter:on
       
   482     }
       
   483 
       
   484 
       
   485     /**
       
   486      * Calculates the number of bytes required to represent the given {@code
       
   487      * CharSequence} with the Huffman coding.
       
   488      *
       
   489      * @param value
       
   490      *         characters
       
   491      *
       
   492      * @return number of bytes
       
   493      *
       
   494      * @throws NullPointerException
       
   495      *         if the value is null
       
   496      */
       
   497     public int lengthOf(CharSequence value) {
       
   498         return lengthOf(value, 0, value.length());
       
   499     }
       
   500 
       
   501     /**
       
   502      * Calculates the number of bytes required to represent a subsequence of the
       
   503      * given {@code CharSequence} with the Huffman coding.
       
   504      *
       
   505      * @param value
       
   506      *         characters
       
   507      * @param start
       
   508      *         the start index, inclusive
       
   509      * @param end
       
   510      *         the end index, exclusive
       
   511      *
       
   512      * @return number of bytes
       
   513      *
       
   514      * @throws NullPointerException
       
   515      *         if the value is null
       
   516      * @throws IndexOutOfBoundsException
       
   517      *         if any invocation of {@code value.charAt(i)}, where
       
   518      *         {@code start <= i < end} would throw an IndexOutOfBoundsException
       
   519      */
       
   520     public int lengthOf(CharSequence value, int start, int end) {
       
   521         int len = 0;
       
   522         for (int i = start; i < end; i++) {
       
   523             char c = value.charAt(i);
       
   524             len += INSTANCE.codeOf(c).length;
       
   525         }
       
   526         // Integer division with ceiling, assumption:
       
   527         assert (len / 8 + (len % 8 != 0 ? 1 : 0)) == (len + 7) / 8 : len;
       
   528         return (len + 7) / 8;
       
   529     }
       
   530 
       
   531     private void addChar(int c, int code, int bitLength) {
       
   532         addLeaf(c, code, bitLength, false);
       
   533         codes[c] = new Code(code, bitLength);
       
   534     }
       
   535 
       
   536     private void addEOS(int c, int code, int bitLength) {
       
   537         addLeaf(c, code, bitLength, true);
       
   538         codes[c] = new Code(code, bitLength);
       
   539     }
       
   540 
       
   541     private void addLeaf(int c, int code, int bitLength, boolean isEOS) {
       
   542         if (bitLength < 1) {
       
   543             throw new IllegalArgumentException("bitLength < 1");
       
   544         }
       
   545         Node curr = root;
       
   546         for (int p = 1 << bitLength - 1; p != 0 && !curr.isLeaf(); p = p >> 1) {
       
   547             curr.isEOSPath |= isEOS; // If it's already true, it can't become false
       
   548             curr = curr.addChildIfAbsent(p & code);
       
   549         }
       
   550         curr.isEOSPath |= isEOS; // The last one needs to have this property as well
       
   551         if (curr.isLeaf()) {
       
   552             throw new IllegalStateException("Specified code is already taken");
       
   553         }
       
   554         curr.setChar((char) c);
       
   555     }
       
   556 
       
   557     private Code codeOf(char c) {
       
   558         if (c > 255) {
       
   559             throw new IllegalArgumentException("char=" + ((int) c));
       
   560         }
       
   561         return codes[c];
       
   562     }
       
   563 
       
   564     //
       
   565     // For debugging/testing purposes
       
   566     //
       
   567     Node getRoot() {
       
   568         return root;
       
   569     }
       
   570 
       
   571     //
       
   572     // Guarantees:
       
   573     //
       
   574     //  if (isLeaf() == true) => getChar() is a legal call
       
   575     //  if (isLeaf() == false) => getChild(i) is a legal call (though it can
       
   576     //                                                           return null)
       
   577     //
       
   578     static class Node {
       
   579 
       
   580         Node left;
       
   581         Node right;
       
   582         boolean isEOSPath;
       
   583 
       
   584         boolean charIsSet;
       
   585         char c;
       
   586 
       
   587         Node getChild(int selector) {
       
   588             if (isLeaf()) {
       
   589                 throw new IllegalStateException("This is a leaf node");
       
   590             }
       
   591             Node result = selector == 0 ? left : right;
       
   592             if (result == null) {
       
   593                 throw new IllegalStateException(format(
       
   594                         "Node doesn't have a child (selector=%s)", selector));
       
   595             }
       
   596             return result;
       
   597         }
       
   598 
       
   599         boolean isLeaf() {
       
   600             return charIsSet;
       
   601         }
       
   602 
       
   603         char getChar() {
       
   604             if (!isLeaf()) {
       
   605                 throw new IllegalStateException("This node is not a leaf node");
       
   606             }
       
   607             return c;
       
   608         }
       
   609 
       
   610         void setChar(char c) {
       
   611             if (charIsSet) {
       
   612                 throw new IllegalStateException(
       
   613                         "This node has been taken already");
       
   614             }
       
   615             if (left != null || right != null) {
       
   616                 throw new IllegalStateException("The node cannot be made "
       
   617                         + "a leaf as it's already has a child");
       
   618             }
       
   619             this.c = c;
       
   620             charIsSet = true;
       
   621         }
       
   622 
       
   623         Node addChildIfAbsent(int i) {
       
   624             if (charIsSet) {
       
   625                 throw new IllegalStateException("The node cannot have a child "
       
   626                         + "as it's already a leaf node");
       
   627             }
       
   628             Node child;
       
   629             if (i == 0) {
       
   630                 if ((child = left) == null) {
       
   631                     child = left = new Node();
       
   632                 }
       
   633             } else {
       
   634                 if ((child = right) == null) {
       
   635                     child = right = new Node();
       
   636                 }
       
   637             }
       
   638             return child;
       
   639         }
       
   640 
       
   641         @Override
       
   642         public String toString() {
       
   643             if (isLeaf()) {
       
   644                 if (isEOSPath) {
       
   645                     return "EOS";
       
   646                 } else {
       
   647                     return format("char: (%3s) '%s'", (int) c, c);
       
   648                 }
       
   649             }
       
   650             return "/\\";
       
   651         }
       
   652     }
       
   653 
       
   654     // TODO: value-based class?
       
   655     // FIXME: can we re-use Node instead of this class?
       
   656     private static final class Code {
       
   657 
       
   658         final int code;
       
   659         final int length;
       
   660 
       
   661         private Code(int code, int length) {
       
   662             this.code = code;
       
   663             this.length = length;
       
   664         }
       
   665 
       
   666         public int getCode() {
       
   667             return code;
       
   668         }
       
   669 
       
   670         public int getLength() {
       
   671             return length;
       
   672         }
       
   673 
       
   674         @Override
       
   675         public String toString() {
       
   676             long p = 1 << length;
       
   677             return Long.toBinaryString(code + p).substring(1)
       
   678                     + ", length=" + length;
       
   679         }
       
   680     }
       
   681 }