49944
|
1 |
/*
|
|
2 |
* Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
|
24 |
*/
|
|
25 |
package jdk.internal.net.http.hpack;
|
|
26 |
|
|
27 |
import java.io.IOException;
|
|
28 |
import java.nio.ByteBuffer;
|
|
29 |
|
|
30 |
import static java.lang.String.format;
|
|
31 |
|
|
32 |
/**
|
|
33 |
* Huffman coding table.
|
|
34 |
*
|
|
35 |
* <p> Instances of this class are safe for use by multiple threads.
|
|
36 |
*
|
|
37 |
* @since 9
|
|
38 |
*/
|
|
39 |
public final class NaiveHuffman {
|
|
40 |
|
|
41 |
// TODO: check if reset is done in both reader and writer
|
|
42 |
|
|
43 |
static final class Reader implements Huffman.Reader {
|
|
44 |
|
|
45 |
private Node curr; // position in the trie
|
|
46 |
private int len; // length of the path from the root to 'curr'
|
|
47 |
private int p; // byte probe
|
|
48 |
|
|
49 |
{
|
|
50 |
reset();
|
|
51 |
}
|
|
52 |
|
|
53 |
@Override
|
|
54 |
public void read(ByteBuffer source,
|
|
55 |
Appendable destination,
|
|
56 |
boolean isLast) throws IOException {
|
|
57 |
read(source, destination, true, isLast);
|
|
58 |
}
|
|
59 |
|
|
60 |
// Takes 'isLast' rather than returns whether the reading is done or
|
|
61 |
// not, for more informative exceptions.
|
|
62 |
void read(ByteBuffer source,
|
|
63 |
Appendable destination,
|
|
64 |
boolean reportEOS, /* reportEOS is exposed for tests */
|
|
65 |
boolean isLast) throws IOException {
|
|
66 |
Node c = curr;
|
|
67 |
int l = len;
|
|
68 |
/*
|
|
69 |
Since ByteBuffer is itself stateful, its position is
|
|
70 |
remembered here NOT as a part of Reader's state,
|
|
71 |
but to set it back in the case of a failure
|
|
72 |
*/
|
|
73 |
int pos = source.position();
|
|
74 |
|
|
75 |
while (source.hasRemaining()) {
|
|
76 |
int d = source.get();
|
|
77 |
for (; p != 0; p >>= 1) {
|
|
78 |
c = c.getChild(p & d);
|
|
79 |
l++;
|
|
80 |
if (c.isLeaf()) {
|
|
81 |
if (reportEOS && c.isEOSPath) {
|
|
82 |
throw new IOException("Encountered EOS");
|
|
83 |
}
|
|
84 |
char ch;
|
|
85 |
try {
|
|
86 |
ch = c.getChar();
|
|
87 |
} catch (IllegalStateException e) {
|
|
88 |
source.position(pos); // do we need this?
|
|
89 |
throw new IOException(e);
|
|
90 |
}
|
|
91 |
try {
|
|
92 |
destination.append(ch);
|
|
93 |
} catch (IOException e) {
|
|
94 |
source.position(pos); // do we need this?
|
|
95 |
throw e;
|
|
96 |
}
|
|
97 |
c = INSTANCE.root;
|
|
98 |
l = 0;
|
|
99 |
}
|
|
100 |
curr = c;
|
|
101 |
len = l;
|
|
102 |
}
|
|
103 |
resetProbe();
|
|
104 |
pos++;
|
|
105 |
}
|
|
106 |
if (!isLast) {
|
|
107 |
return; // it's too early to jump to any conclusions, let's wait
|
|
108 |
}
|
|
109 |
if (c.isLeaf()) {
|
|
110 |
return; // it's perfectly ok, no extra padding bits
|
|
111 |
}
|
|
112 |
if (c.isEOSPath && len <= 7) {
|
|
113 |
return; // it's ok, some extra padding bits
|
|
114 |
}
|
|
115 |
if (c.isEOSPath) {
|
|
116 |
throw new IOException(
|
|
117 |
"Padding is too long (len=" + len + ") " +
|
|
118 |
"or unexpected end of data");
|
|
119 |
}
|
|
120 |
throw new IOException(
|
|
121 |
"Not a EOS prefix padding or unexpected end of data");
|
|
122 |
}
|
|
123 |
|
|
124 |
@Override
|
|
125 |
public void reset() {
|
|
126 |
curr = INSTANCE.root;
|
|
127 |
len = 0;
|
|
128 |
resetProbe();
|
|
129 |
}
|
|
130 |
|
|
131 |
private void resetProbe() {
|
|
132 |
p = 0x80;
|
|
133 |
}
|
|
134 |
}
|
|
135 |
|
|
136 |
static final class Writer implements Huffman.Writer {
|
|
137 |
|
|
138 |
private int pos; // position in 'source'
|
|
139 |
private int avail = 8; // number of least significant bits available in 'curr'
|
|
140 |
private int curr; // next byte to put to the destination
|
|
141 |
private int rem; // number of least significant bits in 'code' yet to be processed
|
|
142 |
private int code; // current code being written
|
|
143 |
|
|
144 |
private CharSequence source;
|
|
145 |
private int end;
|
|
146 |
|
|
147 |
@Override
|
|
148 |
public Writer from(CharSequence input, int start, int end) {
|
|
149 |
if (start < 0 || end < 0 || end > input.length() || start > end) {
|
|
150 |
throw new IndexOutOfBoundsException(
|
|
151 |
String.format("input.length()=%s, start=%s, end=%s",
|
|
152 |
input.length(), start, end));
|
|
153 |
}
|
|
154 |
pos = start;
|
|
155 |
this.end = end;
|
|
156 |
this.source = input;
|
|
157 |
return this;
|
|
158 |
}
|
|
159 |
|
|
160 |
@Override
|
|
161 |
public boolean write(ByteBuffer destination) {
|
|
162 |
for (; pos < end; pos++) {
|
|
163 |
if (rem == 0) {
|
|
164 |
Code desc = INSTANCE.codeOf(source.charAt(pos));
|
|
165 |
rem = desc.length;
|
|
166 |
code = desc.code;
|
|
167 |
}
|
|
168 |
while (rem > 0) {
|
|
169 |
if (rem < avail) {
|
|
170 |
curr |= (code << (avail - rem));
|
|
171 |
avail -= rem;
|
|
172 |
rem = 0;
|
|
173 |
} else {
|
|
174 |
int c = (curr | (code >>> (rem - avail)));
|
|
175 |
if (destination.hasRemaining()) {
|
|
176 |
destination.put((byte) c);
|
|
177 |
} else {
|
|
178 |
return false;
|
|
179 |
}
|
|
180 |
curr = c;
|
|
181 |
code <<= (32 - rem + avail); // throw written bits off the cliff (is this Sparta?)
|
|
182 |
code >>>= (32 - rem + avail); // return to the position
|
|
183 |
rem -= avail;
|
|
184 |
curr = 0;
|
|
185 |
avail = 8;
|
|
186 |
}
|
|
187 |
}
|
|
188 |
}
|
|
189 |
|
|
190 |
if (avail < 8) { // have to pad
|
|
191 |
if (destination.hasRemaining()) {
|
|
192 |
destination.put((byte) (curr | (INSTANCE.EOS.code >>> (INSTANCE.EOS.length - avail))));
|
|
193 |
avail = 8;
|
|
194 |
} else {
|
|
195 |
return false;
|
|
196 |
}
|
|
197 |
}
|
|
198 |
|
|
199 |
return true;
|
|
200 |
}
|
|
201 |
|
|
202 |
@Override
|
|
203 |
public Writer reset() {
|
|
204 |
source = null;
|
|
205 |
end = -1;
|
|
206 |
pos = -1;
|
|
207 |
avail = 8;
|
|
208 |
curr = 0;
|
|
209 |
code = 0;
|
|
210 |
return this;
|
|
211 |
}
|
|
212 |
|
|
213 |
@Override
|
|
214 |
public int lengthOf(CharSequence value, int start, int end) {
|
|
215 |
return INSTANCE.lengthOf(value, start, end);
|
|
216 |
}
|
|
217 |
}
|
|
218 |
|
|
219 |
/**
|
|
220 |
* Shared instance.
|
|
221 |
*/
|
|
222 |
public static final NaiveHuffman INSTANCE = new NaiveHuffman();
|
|
223 |
|
|
224 |
private final Code EOS = new Code(0x3fffffff, 30);
|
|
225 |
private final Code[] codes = new Code[257];
|
|
226 |
private final Node root = new Node() {
|
|
227 |
@Override
|
|
228 |
public String toString() { return "root"; }
|
|
229 |
};
|
|
230 |
|
|
231 |
// TODO: consider builder and immutable trie
|
|
232 |
private NaiveHuffman() {
|
|
233 |
// @formatter:off
|
|
234 |
addChar(0, 0x1ff8, 13);
|
|
235 |
addChar(1, 0x7fffd8, 23);
|
|
236 |
addChar(2, 0xfffffe2, 28);
|
|
237 |
addChar(3, 0xfffffe3, 28);
|
|
238 |
addChar(4, 0xfffffe4, 28);
|
|
239 |
addChar(5, 0xfffffe5, 28);
|
|
240 |
addChar(6, 0xfffffe6, 28);
|
|
241 |
addChar(7, 0xfffffe7, 28);
|
|
242 |
addChar(8, 0xfffffe8, 28);
|
|
243 |
addChar(9, 0xffffea, 24);
|
|
244 |
addChar(10, 0x3ffffffc, 30);
|
|
245 |
addChar(11, 0xfffffe9, 28);
|
|
246 |
addChar(12, 0xfffffea, 28);
|
|
247 |
addChar(13, 0x3ffffffd, 30);
|
|
248 |
addChar(14, 0xfffffeb, 28);
|
|
249 |
addChar(15, 0xfffffec, 28);
|
|
250 |
addChar(16, 0xfffffed, 28);
|
|
251 |
addChar(17, 0xfffffee, 28);
|
|
252 |
addChar(18, 0xfffffef, 28);
|
|
253 |
addChar(19, 0xffffff0, 28);
|
|
254 |
addChar(20, 0xffffff1, 28);
|
|
255 |
addChar(21, 0xffffff2, 28);
|
|
256 |
addChar(22, 0x3ffffffe, 30);
|
|
257 |
addChar(23, 0xffffff3, 28);
|
|
258 |
addChar(24, 0xffffff4, 28);
|
|
259 |
addChar(25, 0xffffff5, 28);
|
|
260 |
addChar(26, 0xffffff6, 28);
|
|
261 |
addChar(27, 0xffffff7, 28);
|
|
262 |
addChar(28, 0xffffff8, 28);
|
|
263 |
addChar(29, 0xffffff9, 28);
|
|
264 |
addChar(30, 0xffffffa, 28);
|
|
265 |
addChar(31, 0xffffffb, 28);
|
|
266 |
addChar(32, 0x14, 6);
|
|
267 |
addChar(33, 0x3f8, 10);
|
|
268 |
addChar(34, 0x3f9, 10);
|
|
269 |
addChar(35, 0xffa, 12);
|
|
270 |
addChar(36, 0x1ff9, 13);
|
|
271 |
addChar(37, 0x15, 6);
|
|
272 |
addChar(38, 0xf8, 8);
|
|
273 |
addChar(39, 0x7fa, 11);
|
|
274 |
addChar(40, 0x3fa, 10);
|
|
275 |
addChar(41, 0x3fb, 10);
|
|
276 |
addChar(42, 0xf9, 8);
|
|
277 |
addChar(43, 0x7fb, 11);
|
|
278 |
addChar(44, 0xfa, 8);
|
|
279 |
addChar(45, 0x16, 6);
|
|
280 |
addChar(46, 0x17, 6);
|
|
281 |
addChar(47, 0x18, 6);
|
|
282 |
addChar(48, 0x0, 5);
|
|
283 |
addChar(49, 0x1, 5);
|
|
284 |
addChar(50, 0x2, 5);
|
|
285 |
addChar(51, 0x19, 6);
|
|
286 |
addChar(52, 0x1a, 6);
|
|
287 |
addChar(53, 0x1b, 6);
|
|
288 |
addChar(54, 0x1c, 6);
|
|
289 |
addChar(55, 0x1d, 6);
|
|
290 |
addChar(56, 0x1e, 6);
|
|
291 |
addChar(57, 0x1f, 6);
|
|
292 |
addChar(58, 0x5c, 7);
|
|
293 |
addChar(59, 0xfb, 8);
|
|
294 |
addChar(60, 0x7ffc, 15);
|
|
295 |
addChar(61, 0x20, 6);
|
|
296 |
addChar(62, 0xffb, 12);
|
|
297 |
addChar(63, 0x3fc, 10);
|
|
298 |
addChar(64, 0x1ffa, 13);
|
|
299 |
addChar(65, 0x21, 6);
|
|
300 |
addChar(66, 0x5d, 7);
|
|
301 |
addChar(67, 0x5e, 7);
|
|
302 |
addChar(68, 0x5f, 7);
|
|
303 |
addChar(69, 0x60, 7);
|
|
304 |
addChar(70, 0x61, 7);
|
|
305 |
addChar(71, 0x62, 7);
|
|
306 |
addChar(72, 0x63, 7);
|
|
307 |
addChar(73, 0x64, 7);
|
|
308 |
addChar(74, 0x65, 7);
|
|
309 |
addChar(75, 0x66, 7);
|
|
310 |
addChar(76, 0x67, 7);
|
|
311 |
addChar(77, 0x68, 7);
|
|
312 |
addChar(78, 0x69, 7);
|
|
313 |
addChar(79, 0x6a, 7);
|
|
314 |
addChar(80, 0x6b, 7);
|
|
315 |
addChar(81, 0x6c, 7);
|
|
316 |
addChar(82, 0x6d, 7);
|
|
317 |
addChar(83, 0x6e, 7);
|
|
318 |
addChar(84, 0x6f, 7);
|
|
319 |
addChar(85, 0x70, 7);
|
|
320 |
addChar(86, 0x71, 7);
|
|
321 |
addChar(87, 0x72, 7);
|
|
322 |
addChar(88, 0xfc, 8);
|
|
323 |
addChar(89, 0x73, 7);
|
|
324 |
addChar(90, 0xfd, 8);
|
|
325 |
addChar(91, 0x1ffb, 13);
|
|
326 |
addChar(92, 0x7fff0, 19);
|
|
327 |
addChar(93, 0x1ffc, 13);
|
|
328 |
addChar(94, 0x3ffc, 14);
|
|
329 |
addChar(95, 0x22, 6);
|
|
330 |
addChar(96, 0x7ffd, 15);
|
|
331 |
addChar(97, 0x3, 5);
|
|
332 |
addChar(98, 0x23, 6);
|
|
333 |
addChar(99, 0x4, 5);
|
|
334 |
addChar(100, 0x24, 6);
|
|
335 |
addChar(101, 0x5, 5);
|
|
336 |
addChar(102, 0x25, 6);
|
|
337 |
addChar(103, 0x26, 6);
|
|
338 |
addChar(104, 0x27, 6);
|
|
339 |
addChar(105, 0x6, 5);
|
|
340 |
addChar(106, 0x74, 7);
|
|
341 |
addChar(107, 0x75, 7);
|
|
342 |
addChar(108, 0x28, 6);
|
|
343 |
addChar(109, 0x29, 6);
|
|
344 |
addChar(110, 0x2a, 6);
|
|
345 |
addChar(111, 0x7, 5);
|
|
346 |
addChar(112, 0x2b, 6);
|
|
347 |
addChar(113, 0x76, 7);
|
|
348 |
addChar(114, 0x2c, 6);
|
|
349 |
addChar(115, 0x8, 5);
|
|
350 |
addChar(116, 0x9, 5);
|
|
351 |
addChar(117, 0x2d, 6);
|
|
352 |
addChar(118, 0x77, 7);
|
|
353 |
addChar(119, 0x78, 7);
|
|
354 |
addChar(120, 0x79, 7);
|
|
355 |
addChar(121, 0x7a, 7);
|
|
356 |
addChar(122, 0x7b, 7);
|
|
357 |
addChar(123, 0x7ffe, 15);
|
|
358 |
addChar(124, 0x7fc, 11);
|
|
359 |
addChar(125, 0x3ffd, 14);
|
|
360 |
addChar(126, 0x1ffd, 13);
|
|
361 |
addChar(127, 0xffffffc, 28);
|
|
362 |
addChar(128, 0xfffe6, 20);
|
|
363 |
addChar(129, 0x3fffd2, 22);
|
|
364 |
addChar(130, 0xfffe7, 20);
|
|
365 |
addChar(131, 0xfffe8, 20);
|
|
366 |
addChar(132, 0x3fffd3, 22);
|
|
367 |
addChar(133, 0x3fffd4, 22);
|
|
368 |
addChar(134, 0x3fffd5, 22);
|
|
369 |
addChar(135, 0x7fffd9, 23);
|
|
370 |
addChar(136, 0x3fffd6, 22);
|
|
371 |
addChar(137, 0x7fffda, 23);
|
|
372 |
addChar(138, 0x7fffdb, 23);
|
|
373 |
addChar(139, 0x7fffdc, 23);
|
|
374 |
addChar(140, 0x7fffdd, 23);
|
|
375 |
addChar(141, 0x7fffde, 23);
|
|
376 |
addChar(142, 0xffffeb, 24);
|
|
377 |
addChar(143, 0x7fffdf, 23);
|
|
378 |
addChar(144, 0xffffec, 24);
|
|
379 |
addChar(145, 0xffffed, 24);
|
|
380 |
addChar(146, 0x3fffd7, 22);
|
|
381 |
addChar(147, 0x7fffe0, 23);
|
|
382 |
addChar(148, 0xffffee, 24);
|
|
383 |
addChar(149, 0x7fffe1, 23);
|
|
384 |
addChar(150, 0x7fffe2, 23);
|
|
385 |
addChar(151, 0x7fffe3, 23);
|
|
386 |
addChar(152, 0x7fffe4, 23);
|
|
387 |
addChar(153, 0x1fffdc, 21);
|
|
388 |
addChar(154, 0x3fffd8, 22);
|
|
389 |
addChar(155, 0x7fffe5, 23);
|
|
390 |
addChar(156, 0x3fffd9, 22);
|
|
391 |
addChar(157, 0x7fffe6, 23);
|
|
392 |
addChar(158, 0x7fffe7, 23);
|
|
393 |
addChar(159, 0xffffef, 24);
|
|
394 |
addChar(160, 0x3fffda, 22);
|
|
395 |
addChar(161, 0x1fffdd, 21);
|
|
396 |
addChar(162, 0xfffe9, 20);
|
|
397 |
addChar(163, 0x3fffdb, 22);
|
|
398 |
addChar(164, 0x3fffdc, 22);
|
|
399 |
addChar(165, 0x7fffe8, 23);
|
|
400 |
addChar(166, 0x7fffe9, 23);
|
|
401 |
addChar(167, 0x1fffde, 21);
|
|
402 |
addChar(168, 0x7fffea, 23);
|
|
403 |
addChar(169, 0x3fffdd, 22);
|
|
404 |
addChar(170, 0x3fffde, 22);
|
|
405 |
addChar(171, 0xfffff0, 24);
|
|
406 |
addChar(172, 0x1fffdf, 21);
|
|
407 |
addChar(173, 0x3fffdf, 22);
|
|
408 |
addChar(174, 0x7fffeb, 23);
|
|
409 |
addChar(175, 0x7fffec, 23);
|
|
410 |
addChar(176, 0x1fffe0, 21);
|
|
411 |
addChar(177, 0x1fffe1, 21);
|
|
412 |
addChar(178, 0x3fffe0, 22);
|
|
413 |
addChar(179, 0x1fffe2, 21);
|
|
414 |
addChar(180, 0x7fffed, 23);
|
|
415 |
addChar(181, 0x3fffe1, 22);
|
|
416 |
addChar(182, 0x7fffee, 23);
|
|
417 |
addChar(183, 0x7fffef, 23);
|
|
418 |
addChar(184, 0xfffea, 20);
|
|
419 |
addChar(185, 0x3fffe2, 22);
|
|
420 |
addChar(186, 0x3fffe3, 22);
|
|
421 |
addChar(187, 0x3fffe4, 22);
|
|
422 |
addChar(188, 0x7ffff0, 23);
|
|
423 |
addChar(189, 0x3fffe5, 22);
|
|
424 |
addChar(190, 0x3fffe6, 22);
|
|
425 |
addChar(191, 0x7ffff1, 23);
|
|
426 |
addChar(192, 0x3ffffe0, 26);
|
|
427 |
addChar(193, 0x3ffffe1, 26);
|
|
428 |
addChar(194, 0xfffeb, 20);
|
|
429 |
addChar(195, 0x7fff1, 19);
|
|
430 |
addChar(196, 0x3fffe7, 22);
|
|
431 |
addChar(197, 0x7ffff2, 23);
|
|
432 |
addChar(198, 0x3fffe8, 22);
|
|
433 |
addChar(199, 0x1ffffec, 25);
|
|
434 |
addChar(200, 0x3ffffe2, 26);
|
|
435 |
addChar(201, 0x3ffffe3, 26);
|
|
436 |
addChar(202, 0x3ffffe4, 26);
|
|
437 |
addChar(203, 0x7ffffde, 27);
|
|
438 |
addChar(204, 0x7ffffdf, 27);
|
|
439 |
addChar(205, 0x3ffffe5, 26);
|
|
440 |
addChar(206, 0xfffff1, 24);
|
|
441 |
addChar(207, 0x1ffffed, 25);
|
|
442 |
addChar(208, 0x7fff2, 19);
|
|
443 |
addChar(209, 0x1fffe3, 21);
|
|
444 |
addChar(210, 0x3ffffe6, 26);
|
|
445 |
addChar(211, 0x7ffffe0, 27);
|
|
446 |
addChar(212, 0x7ffffe1, 27);
|
|
447 |
addChar(213, 0x3ffffe7, 26);
|
|
448 |
addChar(214, 0x7ffffe2, 27);
|
|
449 |
addChar(215, 0xfffff2, 24);
|
|
450 |
addChar(216, 0x1fffe4, 21);
|
|
451 |
addChar(217, 0x1fffe5, 21);
|
|
452 |
addChar(218, 0x3ffffe8, 26);
|
|
453 |
addChar(219, 0x3ffffe9, 26);
|
|
454 |
addChar(220, 0xffffffd, 28);
|
|
455 |
addChar(221, 0x7ffffe3, 27);
|
|
456 |
addChar(222, 0x7ffffe4, 27);
|
|
457 |
addChar(223, 0x7ffffe5, 27);
|
|
458 |
addChar(224, 0xfffec, 20);
|
|
459 |
addChar(225, 0xfffff3, 24);
|
|
460 |
addChar(226, 0xfffed, 20);
|
|
461 |
addChar(227, 0x1fffe6, 21);
|
|
462 |
addChar(228, 0x3fffe9, 22);
|
|
463 |
addChar(229, 0x1fffe7, 21);
|
|
464 |
addChar(230, 0x1fffe8, 21);
|
|
465 |
addChar(231, 0x7ffff3, 23);
|
|
466 |
addChar(232, 0x3fffea, 22);
|
|
467 |
addChar(233, 0x3fffeb, 22);
|
|
468 |
addChar(234, 0x1ffffee, 25);
|
|
469 |
addChar(235, 0x1ffffef, 25);
|
|
470 |
addChar(236, 0xfffff4, 24);
|
|
471 |
addChar(237, 0xfffff5, 24);
|
|
472 |
addChar(238, 0x3ffffea, 26);
|
|
473 |
addChar(239, 0x7ffff4, 23);
|
|
474 |
addChar(240, 0x3ffffeb, 26);
|
|
475 |
addChar(241, 0x7ffffe6, 27);
|
|
476 |
addChar(242, 0x3ffffec, 26);
|
|
477 |
addChar(243, 0x3ffffed, 26);
|
|
478 |
addChar(244, 0x7ffffe7, 27);
|
|
479 |
addChar(245, 0x7ffffe8, 27);
|
|
480 |
addChar(246, 0x7ffffe9, 27);
|
|
481 |
addChar(247, 0x7ffffea, 27);
|
|
482 |
addChar(248, 0x7ffffeb, 27);
|
|
483 |
addChar(249, 0xffffffe, 28);
|
|
484 |
addChar(250, 0x7ffffec, 27);
|
|
485 |
addChar(251, 0x7ffffed, 27);
|
|
486 |
addChar(252, 0x7ffffee, 27);
|
|
487 |
addChar(253, 0x7ffffef, 27);
|
|
488 |
addChar(254, 0x7fffff0, 27);
|
|
489 |
addChar(255, 0x3ffffee, 26);
|
|
490 |
addEOS (256, EOS.code, EOS.length);
|
|
491 |
// @formatter:on
|
|
492 |
}
|
|
493 |
|
|
494 |
|
|
495 |
/**
|
|
496 |
* Calculates the number of bytes required to represent the given {@code
|
|
497 |
* CharSequence} with the Huffman coding.
|
|
498 |
*
|
|
499 |
* @param value
|
|
500 |
* characters
|
|
501 |
*
|
|
502 |
* @return number of bytes
|
|
503 |
*
|
|
504 |
* @throws NullPointerException
|
|
505 |
* if the value is null
|
|
506 |
*/
|
|
507 |
public int lengthOf(CharSequence value) {
|
|
508 |
return lengthOf(value, 0, value.length());
|
|
509 |
}
|
|
510 |
|
|
511 |
/**
|
|
512 |
* Calculates the number of bytes required to represent a subsequence of the
|
|
513 |
* given {@code CharSequence} with the Huffman coding.
|
|
514 |
*
|
|
515 |
* @param value
|
|
516 |
* characters
|
|
517 |
* @param start
|
|
518 |
* the start index, inclusive
|
|
519 |
* @param end
|
|
520 |
* the end index, exclusive
|
|
521 |
*
|
|
522 |
* @return number of bytes
|
|
523 |
*
|
|
524 |
* @throws NullPointerException
|
|
525 |
* if the value is null
|
|
526 |
* @throws IndexOutOfBoundsException
|
|
527 |
* if any invocation of {@code value.charAt(i)}, where
|
|
528 |
* {@code start <= i < end} would throw an IndexOutOfBoundsException
|
|
529 |
*/
|
|
530 |
public int lengthOf(CharSequence value, int start, int end) {
|
|
531 |
int len = 0;
|
|
532 |
for (int i = start; i < end; i++) {
|
|
533 |
char c = value.charAt(i);
|
|
534 |
len += INSTANCE.codeOf(c).length;
|
|
535 |
}
|
|
536 |
// Integer division with ceiling, assumption:
|
|
537 |
assert (len / 8 + (len % 8 != 0 ? 1 : 0)) == (len + 7) / 8 : len;
|
|
538 |
return (len + 7) / 8;
|
|
539 |
}
|
|
540 |
|
|
541 |
private void addChar(int c, int code, int bitLength) {
|
|
542 |
addLeaf(c, code, bitLength, false);
|
|
543 |
codes[c] = new Code(code, bitLength);
|
|
544 |
}
|
|
545 |
|
|
546 |
private void addEOS(int c, int code, int bitLength) {
|
|
547 |
addLeaf(c, code, bitLength, true);
|
|
548 |
codes[c] = new Code(code, bitLength);
|
|
549 |
}
|
|
550 |
|
|
551 |
private void addLeaf(int c, int code, int bitLength, boolean isEOS) {
|
|
552 |
if (bitLength < 1) {
|
|
553 |
throw new IllegalArgumentException("bitLength < 1");
|
|
554 |
}
|
|
555 |
Node curr = root;
|
|
556 |
for (int p = 1 << bitLength - 1; p != 0 && !curr.isLeaf(); p = p >> 1) {
|
|
557 |
curr.isEOSPath |= isEOS; // If it's already true, it can't become false
|
|
558 |
curr = curr.addChildIfAbsent(p & code);
|
|
559 |
}
|
|
560 |
curr.isEOSPath |= isEOS; // The last one needs to have this property as well
|
|
561 |
if (curr.isLeaf()) {
|
|
562 |
throw new IllegalStateException("Specified code is already taken");
|
|
563 |
}
|
|
564 |
curr.setChar((char) c);
|
|
565 |
}
|
|
566 |
|
|
567 |
private Code codeOf(char c) {
|
|
568 |
if (c > 255) {
|
|
569 |
throw new IllegalArgumentException("char=" + ((int) c));
|
|
570 |
}
|
|
571 |
return codes[c];
|
|
572 |
}
|
|
573 |
|
|
574 |
//
|
|
575 |
// For debugging/testing purposes
|
|
576 |
//
|
|
577 |
Node getRoot() {
|
|
578 |
return root;
|
|
579 |
}
|
|
580 |
|
|
581 |
//
|
|
582 |
// Guarantees:
|
|
583 |
//
|
|
584 |
// if (isLeaf() == true) => getChar() is a legal call
|
|
585 |
// if (isLeaf() == false) => getChild(i) is a legal call (though it can
|
|
586 |
// return null)
|
|
587 |
//
|
|
588 |
static class Node {
|
|
589 |
|
|
590 |
Node left;
|
|
591 |
Node right;
|
|
592 |
boolean isEOSPath;
|
|
593 |
|
|
594 |
boolean charIsSet;
|
|
595 |
char c;
|
|
596 |
|
|
597 |
Node getChild(int selector) {
|
|
598 |
if (isLeaf()) {
|
|
599 |
throw new IllegalStateException("This is a leaf node");
|
|
600 |
}
|
|
601 |
Node result = selector == 0 ? left : right;
|
|
602 |
if (result == null) {
|
|
603 |
throw new IllegalStateException(format(
|
|
604 |
"Node doesn't have a child (selector=%s)", selector));
|
|
605 |
}
|
|
606 |
return result;
|
|
607 |
}
|
|
608 |
|
|
609 |
boolean isLeaf() {
|
|
610 |
return charIsSet;
|
|
611 |
}
|
|
612 |
|
|
613 |
char getChar() {
|
|
614 |
if (!isLeaf()) {
|
|
615 |
throw new IllegalStateException("This node is not a leaf node");
|
|
616 |
}
|
|
617 |
return c;
|
|
618 |
}
|
|
619 |
|
|
620 |
void setChar(char c) {
|
|
621 |
if (charIsSet) {
|
|
622 |
throw new IllegalStateException(
|
|
623 |
"This node has been taken already");
|
|
624 |
}
|
|
625 |
if (left != null || right != null) {
|
|
626 |
throw new IllegalStateException("The node cannot be made "
|
|
627 |
+ "a leaf as it's already has a child");
|
|
628 |
}
|
|
629 |
this.c = c;
|
|
630 |
charIsSet = true;
|
|
631 |
}
|
|
632 |
|
|
633 |
Node addChildIfAbsent(int i) {
|
|
634 |
if (charIsSet) {
|
|
635 |
throw new IllegalStateException("The node cannot have a child "
|
|
636 |
+ "as it's already a leaf node");
|
|
637 |
}
|
|
638 |
Node child;
|
|
639 |
if (i == 0) {
|
|
640 |
if ((child = left) == null) {
|
|
641 |
child = left = new Node();
|
|
642 |
}
|
|
643 |
} else {
|
|
644 |
if ((child = right) == null) {
|
|
645 |
child = right = new Node();
|
|
646 |
}
|
|
647 |
}
|
|
648 |
return child;
|
|
649 |
}
|
|
650 |
|
|
651 |
@Override
|
|
652 |
public String toString() {
|
|
653 |
if (isLeaf()) {
|
|
654 |
if (isEOSPath) {
|
|
655 |
return "EOS";
|
|
656 |
} else {
|
|
657 |
return format("char: (%3s) '%s'", (int) c, c);
|
|
658 |
}
|
|
659 |
}
|
|
660 |
return "/\\";
|
|
661 |
}
|
|
662 |
}
|
|
663 |
|
|
664 |
// TODO: value-based class?
|
|
665 |
// FIXME: can we re-use Node instead of this class?
|
|
666 |
private static final class Code {
|
|
667 |
|
|
668 |
final int code;
|
|
669 |
final int length;
|
|
670 |
|
|
671 |
private Code(int code, int length) {
|
|
672 |
this.code = code;
|
|
673 |
this.length = length;
|
|
674 |
}
|
|
675 |
|
|
676 |
public int getCode() {
|
|
677 |
return code;
|
|
678 |
}
|
|
679 |
|
|
680 |
public int getLength() {
|
|
681 |
return length;
|
|
682 |
}
|
|
683 |
|
|
684 |
@Override
|
|
685 |
public String toString() {
|
|
686 |
long p = 1 << length;
|
|
687 |
return Long.toBinaryString(code + p).substring(1)
|
|
688 |
+ ", length=" + length;
|
|
689 |
}
|
|
690 |
}
|
|
691 |
}
|