49944
|
1 |
/*
|
|
2 |
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
|
24 |
*/
|
|
25 |
|
|
26 |
package jdk.internal.net.http.hpack;
|
|
27 |
|
50681
|
28 |
import jdk.internal.net.http.hpack.HPACK.BufferUpdateConsumer;
|
|
29 |
|
49944
|
30 |
import java.io.IOException;
|
|
31 |
import java.nio.ByteBuffer;
|
|
32 |
import java.util.List;
|
|
33 |
import java.util.Objects;
|
|
34 |
|
50681
|
35 |
import static jdk.internal.net.http.hpack.HPACK.bytesForBits;
|
|
36 |
|
49944
|
37 |
public final class QuickHuffman {
|
|
38 |
|
|
39 |
/*
|
50681
|
40 |
* Mapping of characters to their code information. Code information
|
|
41 |
* consists of a code value and a code length. Both components are packed in
|
|
42 |
* a single long as follows:
|
49944
|
43 |
*
|
|
44 |
* MSB LSB
|
|
45 |
* +----------------+----------------+
|
|
46 |
* |~code | length~|
|
|
47 |
* +----------------+----------------+
|
|
48 |
* |<----- 32 ----->|<----- 32 ----->|
|
|
49 |
* |<------------- 64 -------------->|
|
|
50 |
*
|
50681
|
51 |
* The leftmost 32 bits hold the code value. This value is aligned left (or
|
|
52 |
* MSB). The rightmost 32 bits hold the code length. This length is aligned
|
|
53 |
* right (or LSB). Such structure is possible thanks to codes being up to 30
|
|
54 |
* bits long and their lengths being up to 5 bits long (length = 5..30).
|
|
55 |
* This allows both components to fit into long and, thus, better locality.
|
|
56 |
*
|
|
57 |
* Input strings never contain EOS. Thus there's no need to provide EOS
|
|
58 |
* mapping. Hence, the length of the array is 256, not 257.
|
49944
|
59 |
*/
|
|
60 |
private static final long[] codes = new long[256];
|
|
61 |
|
|
62 |
private static long codeValueOf(char c) {
|
|
63 |
return codes[c] & 0xffffffff00000000L;
|
|
64 |
}
|
|
65 |
|
|
66 |
private static long codeLengthOf(char c) {
|
|
67 |
return codes[c] & 0x00000000ffffffffL;
|
|
68 |
}
|
|
69 |
|
|
70 |
private static final int EOS_LENGTH = 30;
|
|
71 |
private static final int EOS_LSB = 0x3fffffff;
|
50681
|
72 |
// EOS_LSB is casted to long before the shift, to allow long shift (6 bits)
|
|
73 |
// instead of int shift (5 bits):
|
|
74 |
private static final long EOS_MSB = ((long) EOS_LSB) << (64 - EOS_LENGTH);
|
49944
|
75 |
|
|
76 |
/*
|
50681
|
77 |
* Huffman trie.
|
49944
|
78 |
*
|
50681
|
79 |
* The root node leads to 257 descendant leaf nodes, of which 256 nodes
|
|
80 |
* correspond to characters and 1 node correspond to EOS.
|
49944
|
81 |
*/
|
50681
|
82 |
private static final Node root = buildTrie();
|
49944
|
83 |
|
50681
|
84 |
private static Node buildTrie() {
|
49944
|
85 |
TemporaryNode tmpRoot = new TemporaryNode();
|
|
86 |
addChar(tmpRoot, 0, 0x1ff8, 13);
|
|
87 |
addChar(tmpRoot, 1, 0x7fffd8, 23);
|
|
88 |
addChar(tmpRoot, 2, 0xfffffe2, 28);
|
|
89 |
addChar(tmpRoot, 3, 0xfffffe3, 28);
|
|
90 |
addChar(tmpRoot, 4, 0xfffffe4, 28);
|
|
91 |
addChar(tmpRoot, 5, 0xfffffe5, 28);
|
|
92 |
addChar(tmpRoot, 6, 0xfffffe6, 28);
|
|
93 |
addChar(tmpRoot, 7, 0xfffffe7, 28);
|
|
94 |
addChar(tmpRoot, 8, 0xfffffe8, 28);
|
|
95 |
addChar(tmpRoot, 9, 0xffffea, 24);
|
|
96 |
addChar(tmpRoot, 10, 0x3ffffffc, 30);
|
|
97 |
addChar(tmpRoot, 11, 0xfffffe9, 28);
|
|
98 |
addChar(tmpRoot, 12, 0xfffffea, 28);
|
|
99 |
addChar(tmpRoot, 13, 0x3ffffffd, 30);
|
|
100 |
addChar(tmpRoot, 14, 0xfffffeb, 28);
|
|
101 |
addChar(tmpRoot, 15, 0xfffffec, 28);
|
|
102 |
addChar(tmpRoot, 16, 0xfffffed, 28);
|
|
103 |
addChar(tmpRoot, 17, 0xfffffee, 28);
|
|
104 |
addChar(tmpRoot, 18, 0xfffffef, 28);
|
|
105 |
addChar(tmpRoot, 19, 0xffffff0, 28);
|
|
106 |
addChar(tmpRoot, 20, 0xffffff1, 28);
|
|
107 |
addChar(tmpRoot, 21, 0xffffff2, 28);
|
|
108 |
addChar(tmpRoot, 22, 0x3ffffffe, 30);
|
|
109 |
addChar(tmpRoot, 23, 0xffffff3, 28);
|
|
110 |
addChar(tmpRoot, 24, 0xffffff4, 28);
|
|
111 |
addChar(tmpRoot, 25, 0xffffff5, 28);
|
|
112 |
addChar(tmpRoot, 26, 0xffffff6, 28);
|
|
113 |
addChar(tmpRoot, 27, 0xffffff7, 28);
|
|
114 |
addChar(tmpRoot, 28, 0xffffff8, 28);
|
|
115 |
addChar(tmpRoot, 29, 0xffffff9, 28);
|
|
116 |
addChar(tmpRoot, 30, 0xffffffa, 28);
|
|
117 |
addChar(tmpRoot, 31, 0xffffffb, 28);
|
|
118 |
addChar(tmpRoot, 32, 0x14, 6);
|
|
119 |
addChar(tmpRoot, 33, 0x3f8, 10);
|
|
120 |
addChar(tmpRoot, 34, 0x3f9, 10);
|
|
121 |
addChar(tmpRoot, 35, 0xffa, 12);
|
|
122 |
addChar(tmpRoot, 36, 0x1ff9, 13);
|
|
123 |
addChar(tmpRoot, 37, 0x15, 6);
|
|
124 |
addChar(tmpRoot, 38, 0xf8, 8);
|
|
125 |
addChar(tmpRoot, 39, 0x7fa, 11);
|
|
126 |
addChar(tmpRoot, 40, 0x3fa, 10);
|
|
127 |
addChar(tmpRoot, 41, 0x3fb, 10);
|
|
128 |
addChar(tmpRoot, 42, 0xf9, 8);
|
|
129 |
addChar(tmpRoot, 43, 0x7fb, 11);
|
|
130 |
addChar(tmpRoot, 44, 0xfa, 8);
|
|
131 |
addChar(tmpRoot, 45, 0x16, 6);
|
|
132 |
addChar(tmpRoot, 46, 0x17, 6);
|
|
133 |
addChar(tmpRoot, 47, 0x18, 6);
|
|
134 |
addChar(tmpRoot, 48, 0x0, 5);
|
|
135 |
addChar(tmpRoot, 49, 0x1, 5);
|
|
136 |
addChar(tmpRoot, 50, 0x2, 5);
|
|
137 |
addChar(tmpRoot, 51, 0x19, 6);
|
|
138 |
addChar(tmpRoot, 52, 0x1a, 6);
|
|
139 |
addChar(tmpRoot, 53, 0x1b, 6);
|
|
140 |
addChar(tmpRoot, 54, 0x1c, 6);
|
|
141 |
addChar(tmpRoot, 55, 0x1d, 6);
|
|
142 |
addChar(tmpRoot, 56, 0x1e, 6);
|
|
143 |
addChar(tmpRoot, 57, 0x1f, 6);
|
|
144 |
addChar(tmpRoot, 58, 0x5c, 7);
|
|
145 |
addChar(tmpRoot, 59, 0xfb, 8);
|
|
146 |
addChar(tmpRoot, 60, 0x7ffc, 15);
|
|
147 |
addChar(tmpRoot, 61, 0x20, 6);
|
|
148 |
addChar(tmpRoot, 62, 0xffb, 12);
|
|
149 |
addChar(tmpRoot, 63, 0x3fc, 10);
|
|
150 |
addChar(tmpRoot, 64, 0x1ffa, 13);
|
|
151 |
addChar(tmpRoot, 65, 0x21, 6);
|
|
152 |
addChar(tmpRoot, 66, 0x5d, 7);
|
|
153 |
addChar(tmpRoot, 67, 0x5e, 7);
|
|
154 |
addChar(tmpRoot, 68, 0x5f, 7);
|
|
155 |
addChar(tmpRoot, 69, 0x60, 7);
|
|
156 |
addChar(tmpRoot, 70, 0x61, 7);
|
|
157 |
addChar(tmpRoot, 71, 0x62, 7);
|
|
158 |
addChar(tmpRoot, 72, 0x63, 7);
|
|
159 |
addChar(tmpRoot, 73, 0x64, 7);
|
|
160 |
addChar(tmpRoot, 74, 0x65, 7);
|
|
161 |
addChar(tmpRoot, 75, 0x66, 7);
|
|
162 |
addChar(tmpRoot, 76, 0x67, 7);
|
|
163 |
addChar(tmpRoot, 77, 0x68, 7);
|
|
164 |
addChar(tmpRoot, 78, 0x69, 7);
|
|
165 |
addChar(tmpRoot, 79, 0x6a, 7);
|
|
166 |
addChar(tmpRoot, 80, 0x6b, 7);
|
|
167 |
addChar(tmpRoot, 81, 0x6c, 7);
|
|
168 |
addChar(tmpRoot, 82, 0x6d, 7);
|
|
169 |
addChar(tmpRoot, 83, 0x6e, 7);
|
|
170 |
addChar(tmpRoot, 84, 0x6f, 7);
|
|
171 |
addChar(tmpRoot, 85, 0x70, 7);
|
|
172 |
addChar(tmpRoot, 86, 0x71, 7);
|
|
173 |
addChar(tmpRoot, 87, 0x72, 7);
|
|
174 |
addChar(tmpRoot, 88, 0xfc, 8);
|
|
175 |
addChar(tmpRoot, 89, 0x73, 7);
|
|
176 |
addChar(tmpRoot, 90, 0xfd, 8);
|
|
177 |
addChar(tmpRoot, 91, 0x1ffb, 13);
|
|
178 |
addChar(tmpRoot, 92, 0x7fff0, 19);
|
|
179 |
addChar(tmpRoot, 93, 0x1ffc, 13);
|
|
180 |
addChar(tmpRoot, 94, 0x3ffc, 14);
|
|
181 |
addChar(tmpRoot, 95, 0x22, 6);
|
|
182 |
addChar(tmpRoot, 96, 0x7ffd, 15);
|
|
183 |
addChar(tmpRoot, 97, 0x3, 5);
|
|
184 |
addChar(tmpRoot, 98, 0x23, 6);
|
|
185 |
addChar(tmpRoot, 99, 0x4, 5);
|
|
186 |
addChar(tmpRoot, 100, 0x24, 6);
|
|
187 |
addChar(tmpRoot, 101, 0x5, 5);
|
|
188 |
addChar(tmpRoot, 102, 0x25, 6);
|
|
189 |
addChar(tmpRoot, 103, 0x26, 6);
|
|
190 |
addChar(tmpRoot, 104, 0x27, 6);
|
|
191 |
addChar(tmpRoot, 105, 0x6, 5);
|
|
192 |
addChar(tmpRoot, 106, 0x74, 7);
|
|
193 |
addChar(tmpRoot, 107, 0x75, 7);
|
|
194 |
addChar(tmpRoot, 108, 0x28, 6);
|
|
195 |
addChar(tmpRoot, 109, 0x29, 6);
|
|
196 |
addChar(tmpRoot, 110, 0x2a, 6);
|
|
197 |
addChar(tmpRoot, 111, 0x7, 5);
|
|
198 |
addChar(tmpRoot, 112, 0x2b, 6);
|
|
199 |
addChar(tmpRoot, 113, 0x76, 7);
|
|
200 |
addChar(tmpRoot, 114, 0x2c, 6);
|
|
201 |
addChar(tmpRoot, 115, 0x8, 5);
|
|
202 |
addChar(tmpRoot, 116, 0x9, 5);
|
|
203 |
addChar(tmpRoot, 117, 0x2d, 6);
|
|
204 |
addChar(tmpRoot, 118, 0x77, 7);
|
|
205 |
addChar(tmpRoot, 119, 0x78, 7);
|
|
206 |
addChar(tmpRoot, 120, 0x79, 7);
|
|
207 |
addChar(tmpRoot, 121, 0x7a, 7);
|
|
208 |
addChar(tmpRoot, 122, 0x7b, 7);
|
|
209 |
addChar(tmpRoot, 123, 0x7ffe, 15);
|
|
210 |
addChar(tmpRoot, 124, 0x7fc, 11);
|
|
211 |
addChar(tmpRoot, 125, 0x3ffd, 14);
|
|
212 |
addChar(tmpRoot, 126, 0x1ffd, 13);
|
|
213 |
addChar(tmpRoot, 127, 0xffffffc, 28);
|
|
214 |
addChar(tmpRoot, 128, 0xfffe6, 20);
|
|
215 |
addChar(tmpRoot, 129, 0x3fffd2, 22);
|
|
216 |
addChar(tmpRoot, 130, 0xfffe7, 20);
|
|
217 |
addChar(tmpRoot, 131, 0xfffe8, 20);
|
|
218 |
addChar(tmpRoot, 132, 0x3fffd3, 22);
|
|
219 |
addChar(tmpRoot, 133, 0x3fffd4, 22);
|
|
220 |
addChar(tmpRoot, 134, 0x3fffd5, 22);
|
|
221 |
addChar(tmpRoot, 135, 0x7fffd9, 23);
|
|
222 |
addChar(tmpRoot, 136, 0x3fffd6, 22);
|
|
223 |
addChar(tmpRoot, 137, 0x7fffda, 23);
|
|
224 |
addChar(tmpRoot, 138, 0x7fffdb, 23);
|
|
225 |
addChar(tmpRoot, 139, 0x7fffdc, 23);
|
|
226 |
addChar(tmpRoot, 140, 0x7fffdd, 23);
|
|
227 |
addChar(tmpRoot, 141, 0x7fffde, 23);
|
|
228 |
addChar(tmpRoot, 142, 0xffffeb, 24);
|
|
229 |
addChar(tmpRoot, 143, 0x7fffdf, 23);
|
|
230 |
addChar(tmpRoot, 144, 0xffffec, 24);
|
|
231 |
addChar(tmpRoot, 145, 0xffffed, 24);
|
|
232 |
addChar(tmpRoot, 146, 0x3fffd7, 22);
|
|
233 |
addChar(tmpRoot, 147, 0x7fffe0, 23);
|
|
234 |
addChar(tmpRoot, 148, 0xffffee, 24);
|
|
235 |
addChar(tmpRoot, 149, 0x7fffe1, 23);
|
|
236 |
addChar(tmpRoot, 150, 0x7fffe2, 23);
|
|
237 |
addChar(tmpRoot, 151, 0x7fffe3, 23);
|
|
238 |
addChar(tmpRoot, 152, 0x7fffe4, 23);
|
|
239 |
addChar(tmpRoot, 153, 0x1fffdc, 21);
|
|
240 |
addChar(tmpRoot, 154, 0x3fffd8, 22);
|
|
241 |
addChar(tmpRoot, 155, 0x7fffe5, 23);
|
|
242 |
addChar(tmpRoot, 156, 0x3fffd9, 22);
|
|
243 |
addChar(tmpRoot, 157, 0x7fffe6, 23);
|
|
244 |
addChar(tmpRoot, 158, 0x7fffe7, 23);
|
|
245 |
addChar(tmpRoot, 159, 0xffffef, 24);
|
|
246 |
addChar(tmpRoot, 160, 0x3fffda, 22);
|
|
247 |
addChar(tmpRoot, 161, 0x1fffdd, 21);
|
|
248 |
addChar(tmpRoot, 162, 0xfffe9, 20);
|
|
249 |
addChar(tmpRoot, 163, 0x3fffdb, 22);
|
|
250 |
addChar(tmpRoot, 164, 0x3fffdc, 22);
|
|
251 |
addChar(tmpRoot, 165, 0x7fffe8, 23);
|
|
252 |
addChar(tmpRoot, 166, 0x7fffe9, 23);
|
|
253 |
addChar(tmpRoot, 167, 0x1fffde, 21);
|
|
254 |
addChar(tmpRoot, 168, 0x7fffea, 23);
|
|
255 |
addChar(tmpRoot, 169, 0x3fffdd, 22);
|
|
256 |
addChar(tmpRoot, 170, 0x3fffde, 22);
|
|
257 |
addChar(tmpRoot, 171, 0xfffff0, 24);
|
|
258 |
addChar(tmpRoot, 172, 0x1fffdf, 21);
|
|
259 |
addChar(tmpRoot, 173, 0x3fffdf, 22);
|
|
260 |
addChar(tmpRoot, 174, 0x7fffeb, 23);
|
|
261 |
addChar(tmpRoot, 175, 0x7fffec, 23);
|
|
262 |
addChar(tmpRoot, 176, 0x1fffe0, 21);
|
|
263 |
addChar(tmpRoot, 177, 0x1fffe1, 21);
|
|
264 |
addChar(tmpRoot, 178, 0x3fffe0, 22);
|
|
265 |
addChar(tmpRoot, 179, 0x1fffe2, 21);
|
|
266 |
addChar(tmpRoot, 180, 0x7fffed, 23);
|
|
267 |
addChar(tmpRoot, 181, 0x3fffe1, 22);
|
|
268 |
addChar(tmpRoot, 182, 0x7fffee, 23);
|
|
269 |
addChar(tmpRoot, 183, 0x7fffef, 23);
|
|
270 |
addChar(tmpRoot, 184, 0xfffea, 20);
|
|
271 |
addChar(tmpRoot, 185, 0x3fffe2, 22);
|
|
272 |
addChar(tmpRoot, 186, 0x3fffe3, 22);
|
|
273 |
addChar(tmpRoot, 187, 0x3fffe4, 22);
|
|
274 |
addChar(tmpRoot, 188, 0x7ffff0, 23);
|
|
275 |
addChar(tmpRoot, 189, 0x3fffe5, 22);
|
|
276 |
addChar(tmpRoot, 190, 0x3fffe6, 22);
|
|
277 |
addChar(tmpRoot, 191, 0x7ffff1, 23);
|
|
278 |
addChar(tmpRoot, 192, 0x3ffffe0, 26);
|
|
279 |
addChar(tmpRoot, 193, 0x3ffffe1, 26);
|
|
280 |
addChar(tmpRoot, 194, 0xfffeb, 20);
|
|
281 |
addChar(tmpRoot, 195, 0x7fff1, 19);
|
|
282 |
addChar(tmpRoot, 196, 0x3fffe7, 22);
|
|
283 |
addChar(tmpRoot, 197, 0x7ffff2, 23);
|
|
284 |
addChar(tmpRoot, 198, 0x3fffe8, 22);
|
|
285 |
addChar(tmpRoot, 199, 0x1ffffec, 25);
|
|
286 |
addChar(tmpRoot, 200, 0x3ffffe2, 26);
|
|
287 |
addChar(tmpRoot, 201, 0x3ffffe3, 26);
|
|
288 |
addChar(tmpRoot, 202, 0x3ffffe4, 26);
|
|
289 |
addChar(tmpRoot, 203, 0x7ffffde, 27);
|
|
290 |
addChar(tmpRoot, 204, 0x7ffffdf, 27);
|
|
291 |
addChar(tmpRoot, 205, 0x3ffffe5, 26);
|
|
292 |
addChar(tmpRoot, 206, 0xfffff1, 24);
|
|
293 |
addChar(tmpRoot, 207, 0x1ffffed, 25);
|
|
294 |
addChar(tmpRoot, 208, 0x7fff2, 19);
|
|
295 |
addChar(tmpRoot, 209, 0x1fffe3, 21);
|
|
296 |
addChar(tmpRoot, 210, 0x3ffffe6, 26);
|
|
297 |
addChar(tmpRoot, 211, 0x7ffffe0, 27);
|
|
298 |
addChar(tmpRoot, 212, 0x7ffffe1, 27);
|
|
299 |
addChar(tmpRoot, 213, 0x3ffffe7, 26);
|
|
300 |
addChar(tmpRoot, 214, 0x7ffffe2, 27);
|
|
301 |
addChar(tmpRoot, 215, 0xfffff2, 24);
|
|
302 |
addChar(tmpRoot, 216, 0x1fffe4, 21);
|
|
303 |
addChar(tmpRoot, 217, 0x1fffe5, 21);
|
|
304 |
addChar(tmpRoot, 218, 0x3ffffe8, 26);
|
|
305 |
addChar(tmpRoot, 219, 0x3ffffe9, 26);
|
|
306 |
addChar(tmpRoot, 220, 0xffffffd, 28);
|
|
307 |
addChar(tmpRoot, 221, 0x7ffffe3, 27);
|
|
308 |
addChar(tmpRoot, 222, 0x7ffffe4, 27);
|
|
309 |
addChar(tmpRoot, 223, 0x7ffffe5, 27);
|
|
310 |
addChar(tmpRoot, 224, 0xfffec, 20);
|
|
311 |
addChar(tmpRoot, 225, 0xfffff3, 24);
|
|
312 |
addChar(tmpRoot, 226, 0xfffed, 20);
|
|
313 |
addChar(tmpRoot, 227, 0x1fffe6, 21);
|
|
314 |
addChar(tmpRoot, 228, 0x3fffe9, 22);
|
|
315 |
addChar(tmpRoot, 229, 0x1fffe7, 21);
|
|
316 |
addChar(tmpRoot, 230, 0x1fffe8, 21);
|
|
317 |
addChar(tmpRoot, 231, 0x7ffff3, 23);
|
|
318 |
addChar(tmpRoot, 232, 0x3fffea, 22);
|
|
319 |
addChar(tmpRoot, 233, 0x3fffeb, 22);
|
|
320 |
addChar(tmpRoot, 234, 0x1ffffee, 25);
|
|
321 |
addChar(tmpRoot, 235, 0x1ffffef, 25);
|
|
322 |
addChar(tmpRoot, 236, 0xfffff4, 24);
|
|
323 |
addChar(tmpRoot, 237, 0xfffff5, 24);
|
|
324 |
addChar(tmpRoot, 238, 0x3ffffea, 26);
|
|
325 |
addChar(tmpRoot, 239, 0x7ffff4, 23);
|
|
326 |
addChar(tmpRoot, 240, 0x3ffffeb, 26);
|
|
327 |
addChar(tmpRoot, 241, 0x7ffffe6, 27);
|
|
328 |
addChar(tmpRoot, 242, 0x3ffffec, 26);
|
|
329 |
addChar(tmpRoot, 243, 0x3ffffed, 26);
|
|
330 |
addChar(tmpRoot, 244, 0x7ffffe7, 27);
|
|
331 |
addChar(tmpRoot, 245, 0x7ffffe8, 27);
|
|
332 |
addChar(tmpRoot, 246, 0x7ffffe9, 27);
|
|
333 |
addChar(tmpRoot, 247, 0x7ffffea, 27);
|
|
334 |
addChar(tmpRoot, 248, 0x7ffffeb, 27);
|
|
335 |
addChar(tmpRoot, 249, 0xffffffe, 28);
|
|
336 |
addChar(tmpRoot, 250, 0x7ffffec, 27);
|
|
337 |
addChar(tmpRoot, 251, 0x7ffffed, 27);
|
|
338 |
addChar(tmpRoot, 252, 0x7ffffee, 27);
|
|
339 |
addChar(tmpRoot, 253, 0x7ffffef, 27);
|
|
340 |
addChar(tmpRoot, 254, 0x7fffff0, 27);
|
|
341 |
addChar(tmpRoot, 255, 0x3ffffee, 26);
|
|
342 |
addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH);
|
|
343 |
|
|
344 |
// The difference in performance can always be checked by not using
|
|
345 |
// the immutable trie:
|
50681
|
346 |
// return tmpRoot;
|
|
347 |
return ImmutableNode.copyOf(tmpRoot);
|
49944
|
348 |
}
|
|
349 |
|
|
350 |
private QuickHuffman() { }
|
|
351 |
|
|
352 |
private static void addChar(Node root, int symbol, int code, int bitLength)
|
|
353 |
{
|
|
354 |
addLeaf(root, (char) symbol, code, bitLength, false);
|
|
355 |
long value = ((long) code) << (64 - bitLength); // re-align MSB <- LSB
|
|
356 |
codes[symbol] = value | bitLength;
|
|
357 |
}
|
|
358 |
|
|
359 |
private static void addEOS(Node root, int symbol, int code, int bitLength)
|
|
360 |
{
|
|
361 |
addLeaf(root, (char) symbol, code, bitLength, true);
|
|
362 |
}
|
|
363 |
|
|
364 |
private static void addLeaf(Node root,
|
|
365 |
char symbol,
|
|
366 |
int code,
|
|
367 |
int bitLength,
|
|
368 |
boolean isEOS)
|
|
369 |
{
|
|
370 |
assert 0 < bitLength && bitLength <= 32 : bitLength;
|
|
371 |
Node curr = root;
|
|
372 |
int nBytes = bytesForBits(bitLength);
|
|
373 |
// The number of bits the code needs to be shifted to the left in order
|
|
374 |
// to align with the byte #nBytes boundary:
|
|
375 |
int align = (nBytes << 3) - bitLength;
|
|
376 |
code <<= align;
|
|
377 |
// descend the trie until the last element
|
|
378 |
int l = 0;
|
|
379 |
for (int i = 0, probe = 0xff << ((nBytes - 1) << 3);
|
|
380 |
i < nBytes - 1;
|
|
381 |
i++, probe >>>= 8)
|
|
382 |
{
|
|
383 |
curr.setEOSPath(curr.isEOSPath() | isEOS);
|
|
384 |
int idx = (code & probe) >>> ((nBytes - 1 - i) << 3);
|
|
385 |
curr = curr.getOrCreateChild(idx);
|
|
386 |
curr.setLength(8);
|
|
387 |
l += 8;
|
|
388 |
}
|
|
389 |
// Assign the same char to all byte variants. For example, if the code
|
|
390 |
// and its length are 00011b and 5 respectively (letter 'a') then, in
|
|
391 |
// order to be able to match any byte starting with 00011b prefix,
|
|
392 |
// the following nodes need to be created:
|
|
393 |
//
|
|
394 |
// 00011000b, 00011001b, 00011010b, 00011011b, 00011100b, 00011101b,
|
|
395 |
// 00011110b and 00011111b
|
|
396 |
int idx = code & 0xff;
|
|
397 |
curr.setEOSPath(curr.isEOSPath() | isEOS);
|
|
398 |
for (int i = 0; i < (1 << align); i++) {
|
|
399 |
Node child = curr.getOrCreateChild(idx | i);
|
|
400 |
child.setSymbol(symbol);
|
|
401 |
child.setEOSPath(child.isEOSPath() | isEOS);
|
|
402 |
child.setLength(bitLength - l);
|
|
403 |
}
|
|
404 |
}
|
|
405 |
|
|
406 |
/*
|
|
407 |
* A node in the Huffman trie.
|
|
408 |
*/
|
|
409 |
interface Node {
|
|
410 |
|
|
411 |
boolean isEOSPath();
|
|
412 |
|
|
413 |
void setEOSPath(boolean value);
|
|
414 |
|
|
415 |
boolean isLeaf();
|
|
416 |
|
|
417 |
Node getChild(int index);
|
|
418 |
|
|
419 |
Node getOrCreateChild(int index);
|
|
420 |
|
|
421 |
Node[] getChildren();
|
|
422 |
|
|
423 |
char getSymbol();
|
|
424 |
|
|
425 |
void setSymbol(char symbol);
|
|
426 |
|
|
427 |
int getLength();
|
|
428 |
|
|
429 |
void setLength(int value);
|
|
430 |
}
|
|
431 |
|
|
432 |
/*
|
|
433 |
* Mutable nodes used for initial construction of the Huffman trie.
|
|
434 |
* (These nodes are perfectly ok to be used after that.)
|
|
435 |
*/
|
|
436 |
static final class TemporaryNode implements Node {
|
|
437 |
|
|
438 |
private char symbol;
|
|
439 |
private boolean eosPath;
|
|
440 |
private TemporaryNode[] children;
|
|
441 |
private int length;
|
|
442 |
|
|
443 |
@Override
|
|
444 |
public TemporaryNode getOrCreateChild(int index) {
|
|
445 |
ensureChildrenExist();
|
|
446 |
if (children[index] == null) {
|
|
447 |
children[index] = new TemporaryNode();
|
|
448 |
}
|
|
449 |
return children[index];
|
|
450 |
}
|
|
451 |
|
|
452 |
private void ensureChildrenExist() {
|
|
453 |
if (children == null) {
|
|
454 |
children = new TemporaryNode[256];
|
|
455 |
}
|
|
456 |
}
|
|
457 |
|
|
458 |
@Override
|
|
459 |
public boolean isLeaf() {
|
|
460 |
return children == null;
|
|
461 |
}
|
|
462 |
|
|
463 |
@Override
|
|
464 |
public boolean isEOSPath() {
|
|
465 |
return eosPath;
|
|
466 |
}
|
|
467 |
|
|
468 |
@Override
|
|
469 |
public void setEOSPath(boolean value) {
|
|
470 |
eosPath = value;
|
|
471 |
}
|
|
472 |
|
|
473 |
@Override
|
|
474 |
public TemporaryNode getChild(int index) {
|
|
475 |
ensureChildrenExist();
|
|
476 |
return children[index];
|
|
477 |
}
|
|
478 |
|
|
479 |
@Override
|
|
480 |
public Node[] getChildren() {
|
|
481 |
if (children == null) {
|
|
482 |
return new Node[0];
|
|
483 |
}
|
|
484 |
return children;
|
|
485 |
}
|
|
486 |
|
|
487 |
@Override
|
|
488 |
public char getSymbol() {
|
|
489 |
return symbol;
|
|
490 |
}
|
|
491 |
|
|
492 |
@Override
|
|
493 |
public int getLength() {
|
|
494 |
return length;
|
|
495 |
}
|
|
496 |
|
|
497 |
@Override
|
|
498 |
public void setSymbol(char value) {
|
|
499 |
this.symbol = value;
|
|
500 |
}
|
|
501 |
|
|
502 |
@Override
|
|
503 |
public void setLength(int value) {
|
|
504 |
this.length = value;
|
|
505 |
}
|
|
506 |
}
|
|
507 |
|
|
508 |
/*
|
|
509 |
* Immutable node used to construct traversal-only Huffman trie.
|
|
510 |
*
|
|
511 |
* Once the trie has been built, the support of modifications is no longer
|
|
512 |
* required. An immutable trie should be used. Not only it will help to
|
|
513 |
* catch possible bugs, but hopefully speedup the traversal operations.
|
|
514 |
*/
|
|
515 |
static final class ImmutableNode implements Node {
|
|
516 |
|
|
517 |
private final char symbol;
|
|
518 |
private final boolean eosPath;
|
|
519 |
private final int length;
|
|
520 |
private final List<ImmutableNode> children;
|
|
521 |
|
|
522 |
public static ImmutableNode copyOf(Node node) {
|
|
523 |
if (node.isLeaf()) {
|
|
524 |
return new ImmutableNode(node.getSymbol(), node.isEOSPath(),
|
|
525 |
node.getLength());
|
|
526 |
}
|
|
527 |
Node[] children = node.getChildren();
|
|
528 |
ImmutableNode[] immutableChildren = new ImmutableNode[children.length];
|
|
529 |
for (int i = 0; i < children.length; i++) {
|
|
530 |
immutableChildren[i] = copyOf(children[i]);
|
|
531 |
}
|
|
532 |
return new ImmutableNode(node.isEOSPath(), node.getLength(),
|
|
533 |
immutableChildren);
|
|
534 |
}
|
|
535 |
|
|
536 |
/* Creates a leaf node */
|
|
537 |
private ImmutableNode(char symbol,
|
|
538 |
boolean eosPath,
|
|
539 |
int length) {
|
|
540 |
this.symbol = symbol;
|
|
541 |
this.eosPath = eosPath;
|
|
542 |
this.length = length;
|
|
543 |
this.children = List.of();
|
|
544 |
}
|
|
545 |
|
|
546 |
/* Creates a node with children */
|
|
547 |
private ImmutableNode(boolean eosPath,
|
|
548 |
int length,
|
|
549 |
ImmutableNode[] children)
|
|
550 |
{
|
|
551 |
this.symbol = 0;
|
|
552 |
this.eosPath = eosPath;
|
|
553 |
this.length = length;
|
|
554 |
if (children.length == 0) {
|
|
555 |
throw new IllegalArgumentException();
|
|
556 |
}
|
|
557 |
// A list produced by List.of should not be slower than array for
|
|
558 |
// accessing elements by index, and hopefully use additional
|
|
559 |
// optimizations (e.g. jdk.internal.vm.annotation.Stable)
|
|
560 |
this.children = List.of(children);
|
|
561 |
}
|
|
562 |
|
|
563 |
@Override
|
|
564 |
public boolean isLeaf() {
|
|
565 |
return children.isEmpty();
|
|
566 |
}
|
|
567 |
|
|
568 |
@Override
|
|
569 |
public boolean isEOSPath() {
|
|
570 |
return eosPath;
|
|
571 |
}
|
|
572 |
|
|
573 |
@Override
|
|
574 |
public void setEOSPath(boolean value) {
|
|
575 |
throw new UnsupportedOperationException();
|
|
576 |
}
|
|
577 |
|
|
578 |
@Override
|
|
579 |
public ImmutableNode getChild(int index) {
|
|
580 |
return children.get(index);
|
|
581 |
}
|
|
582 |
|
|
583 |
@Override
|
|
584 |
public ImmutableNode getOrCreateChild(int index) {
|
|
585 |
throw new UnsupportedOperationException();
|
|
586 |
}
|
|
587 |
|
|
588 |
@Override
|
|
589 |
public ImmutableNode[] getChildren() {
|
|
590 |
// This method is not expected to be called on an immutable node.
|
|
591 |
// If it is called, it requires some investigation.
|
|
592 |
throw new UnsupportedOperationException();
|
|
593 |
}
|
|
594 |
|
|
595 |
@Override
|
|
596 |
public char getSymbol() {
|
|
597 |
return symbol;
|
|
598 |
}
|
|
599 |
|
|
600 |
@Override
|
|
601 |
public void setSymbol(char symbol) {
|
|
602 |
throw new UnsupportedOperationException();
|
|
603 |
}
|
|
604 |
|
|
605 |
@Override
|
|
606 |
public int getLength() {
|
|
607 |
return length;
|
|
608 |
}
|
|
609 |
|
|
610 |
@Override
|
|
611 |
public void setLength(int value) {
|
|
612 |
throw new UnsupportedOperationException();
|
|
613 |
}
|
|
614 |
}
|
|
615 |
|
|
616 |
static final class Reader implements Huffman.Reader {
|
|
617 |
|
50681
|
618 |
private final BufferUpdateConsumer UPDATER =
|
|
619 |
(buf, bufLen) -> {
|
|
620 |
buffer = buf;
|
|
621 |
bufferLen = bufLen;
|
|
622 |
};
|
|
623 |
|
49944
|
624 |
private Node curr = root; // current position in the trie
|
|
625 |
private long buffer; // bits left from the previous match (aligned to the left, or MSB)
|
|
626 |
private int bufferLen; // number of bits in the buffer
|
|
627 |
private int len; // length (in bits) of path to curr
|
|
628 |
private boolean done;
|
|
629 |
|
|
630 |
@Override
|
|
631 |
public void read(ByteBuffer source,
|
|
632 |
Appendable destination,
|
|
633 |
boolean isLast) throws IOException
|
|
634 |
{
|
|
635 |
while (!done) {
|
|
636 |
int remaining = source.remaining();
|
50681
|
637 |
int nBytes = HPACK.read(source, buffer, bufferLen, UPDATER);
|
49944
|
638 |
// write as much as possible
|
|
639 |
while (true) {
|
|
640 |
if (bufferLen < 8) {
|
|
641 |
if (nBytes < remaining) { // read again
|
|
642 |
break;
|
|
643 |
} else if (!isLast) { // exit the method to accept more input
|
|
644 |
return;
|
|
645 |
} else if (bufferLen > 0) { // no more data is expected, pad
|
50681
|
646 |
// (this padding may be done more than once)
|
49944
|
647 |
buffer |= ((0xff00000000000000L >>> bufferLen)
|
|
648 |
& 0xff00000000000000L);
|
|
649 |
// do not update bufferLen, since all those ones are
|
|
650 |
// synthetic and are appended merely for the sake of
|
|
651 |
// lookup
|
|
652 |
} else {
|
|
653 |
done = true;
|
|
654 |
break;
|
|
655 |
}
|
|
656 |
}
|
|
657 |
int idx = (int) (buffer >>> 56);
|
|
658 |
Node node = curr.getChild(idx);
|
|
659 |
if (node == null) { // TODO: TEST
|
|
660 |
throw new IOException("Unexpected byte");
|
|
661 |
}
|
|
662 |
if (node.isLeaf()) {
|
|
663 |
if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding)
|
|
664 |
throw new IOException(
|
|
665 |
"Not a EOS prefix padding or unexpected end of data");
|
|
666 |
}
|
50681
|
667 |
if (node.isEOSPath()) {
|
49944
|
668 |
throw new IOException("Encountered EOS");
|
|
669 |
}
|
|
670 |
destination.append(node.getSymbol());
|
|
671 |
curr = root;
|
|
672 |
len = 0;
|
|
673 |
} else {
|
|
674 |
curr = node;
|
|
675 |
// because of the padding, we can't match more bits than
|
|
676 |
// there are currently in the buffer
|
|
677 |
len += Math.min(bufferLen, node.getLength());
|
|
678 |
}
|
|
679 |
buffer <<= node.getLength();
|
|
680 |
bufferLen -= node.getLength();
|
|
681 |
}
|
|
682 |
if (done && (curr.isEOSPath() && len > 7)) {
|
|
683 |
throw new IOException(
|
|
684 |
"Padding is too long (len=" + len + ") "
|
|
685 |
+ "or unexpected end of data");
|
|
686 |
}
|
|
687 |
}
|
|
688 |
}
|
|
689 |
|
50681
|
690 |
@Override
|
|
691 |
public void reset() {
|
|
692 |
curr = root;
|
|
693 |
len = 0;
|
|
694 |
buffer = 0;
|
|
695 |
bufferLen = 0;
|
|
696 |
done = false;
|
49944
|
697 |
}
|
|
698 |
}
|
|
699 |
|
|
700 |
static final class Writer implements Huffman.Writer {
|
|
701 |
|
50681
|
702 |
private final BufferUpdateConsumer UPDATER =
|
|
703 |
(buf, bufLen) -> {
|
|
704 |
buffer = buf;
|
|
705 |
bufferLen = bufLen;
|
|
706 |
};
|
|
707 |
|
49944
|
708 |
private CharSequence source;
|
|
709 |
private boolean padded;
|
|
710 |
private int pos;
|
|
711 |
private int end;
|
|
712 |
private long buffer;
|
|
713 |
private int bufferLen;
|
|
714 |
|
|
715 |
@Override
|
|
716 |
public QuickHuffman.Writer from(CharSequence input, int start, int end) {
|
|
717 |
Objects.checkFromToIndex(start, end, input.length());
|
|
718 |
this.pos = start;
|
|
719 |
this.end = end;
|
|
720 |
this.source = input;
|
|
721 |
return this;
|
|
722 |
}
|
|
723 |
|
|
724 |
@Override
|
|
725 |
public boolean write(ByteBuffer destination) {
|
|
726 |
while (true) {
|
50681
|
727 |
while (true) { // stuff codes into long
|
|
728 |
if (pos >= end) {
|
|
729 |
break;
|
|
730 |
}
|
|
731 |
char c = source.charAt(pos);
|
|
732 |
if (c > 255) {
|
|
733 |
throw new IllegalArgumentException("char=" + ((int) c));
|
|
734 |
}
|
|
735 |
long len = codeLengthOf(c);
|
|
736 |
if (bufferLen + len <= 64) {
|
|
737 |
buffer |= (codeValueOf(c) >>> bufferLen); // append
|
|
738 |
bufferLen += len;
|
|
739 |
pos++;
|
|
740 |
} else {
|
|
741 |
break;
|
|
742 |
}
|
49944
|
743 |
}
|
|
744 |
if (bufferLen == 0) {
|
|
745 |
return true;
|
|
746 |
}
|
50681
|
747 |
if (pos >= end && !padded) { // no more input chars are expected, pad
|
49944
|
748 |
padded = true;
|
50681
|
749 |
// A long shift to 64 will result in the same long, not
|
|
750 |
// necessarily 0L. In which case padding will be performed
|
|
751 |
// incorrectly. If bufferLen is equal to 64, the shift (and
|
|
752 |
// padding) in not performed.
|
|
753 |
// (see https://docs.oracle.com/javase/specs/jls/se10/html/jls-15.html#jls-15.19)
|
|
754 |
if (bufferLen != 64) {
|
|
755 |
buffer |= (EOS_MSB >>> bufferLen);
|
|
756 |
bufferLen = bytesForBits(bufferLen) << 3;
|
|
757 |
}
|
49944
|
758 |
}
|
50681
|
759 |
int nBytes = HPACK.write(buffer, bufferLen, UPDATER, destination);
|
|
760 |
if (nBytes == 0) {
|
|
761 |
return false;
|
49944
|
762 |
}
|
|
763 |
}
|
|
764 |
}
|
|
765 |
|
|
766 |
@Override
|
|
767 |
public QuickHuffman.Writer reset() {
|
|
768 |
source = null;
|
|
769 |
buffer = 0;
|
|
770 |
bufferLen = 0;
|
|
771 |
end = 0;
|
|
772 |
pos = 0;
|
|
773 |
padded = false;
|
|
774 |
return this;
|
|
775 |
}
|
|
776 |
|
|
777 |
@Override
|
|
778 |
public int lengthOf(CharSequence value, int start, int end) {
|
|
779 |
int len = 0;
|
|
780 |
for (int i = start; i < end; i++) {
|
|
781 |
char c = value.charAt(i);
|
|
782 |
len += codeLengthOf(c);
|
|
783 |
}
|
|
784 |
return bytesForBits(len);
|
|
785 |
}
|
|
786 |
}
|
|
787 |
}
|