author | mkos |
Sun, 30 Dec 2012 00:00:00 +0100 | |
changeset 22678 | ac1ea46be942 |
parent 12009 | 4abb694f273a |
permissions | -rw-r--r-- |
12009 | 1 |
/* |
22678
ac1ea46be942
8029237: Update copyright year to match last edit in jaxws repository for 2012
mkos
parents:
12009
diff
changeset
|
2 |
* Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. |
12009 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. Oracle designates this |
|
8 |
* particular file as subject to the "Classpath" exception as provided |
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
|
10 |
* |
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
24 |
* |
|
25 |
* THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC. |
|
26 |
*/ |
|
27 |
||
28 |
package com.sun.xml.internal.fastinfoset; |
|
29 |
||
30 |
public class DecoderStateTables { |
|
31 |
private static int RANGE_INDEX_END = 0; |
|
32 |
private static int RANGE_INDEX_VALUE = 1; |
|
33 |
||
34 |
public final static int STATE_ILLEGAL = 255; |
|
35 |
public final static int STATE_UNSUPPORTED = 254; |
|
36 |
||
37 |
// EII child states |
|
38 |
public final static int EII_NO_AIIS_INDEX_SMALL = 0; |
|
39 |
public final static int EII_AIIS_INDEX_SMALL = 1; |
|
40 |
public final static int EII_INDEX_MEDIUM = 2; |
|
41 |
public final static int EII_INDEX_LARGE = 3; |
|
42 |
public final static int EII_NAMESPACES = 4; |
|
43 |
public final static int EII_LITERAL = 5; |
|
44 |
public final static int CII_UTF8_SMALL_LENGTH = 6; |
|
45 |
public final static int CII_UTF8_MEDIUM_LENGTH = 7; |
|
46 |
public final static int CII_UTF8_LARGE_LENGTH = 8; |
|
47 |
public final static int CII_UTF16_SMALL_LENGTH = 9; |
|
48 |
public final static int CII_UTF16_MEDIUM_LENGTH = 10; |
|
49 |
public final static int CII_UTF16_LARGE_LENGTH = 11; |
|
50 |
public final static int CII_RA = 12; |
|
51 |
public final static int CII_EA = 13; |
|
52 |
public final static int CII_INDEX_SMALL = 14; |
|
53 |
public final static int CII_INDEX_MEDIUM = 15; |
|
54 |
public final static int CII_INDEX_LARGE = 16; |
|
55 |
public final static int CII_INDEX_LARGE_LARGE = 17; |
|
56 |
public final static int COMMENT_II = 18; |
|
57 |
public final static int PROCESSING_INSTRUCTION_II = 19; |
|
58 |
public final static int DOCUMENT_TYPE_DECLARATION_II = 20; |
|
59 |
public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21; |
|
60 |
public final static int TERMINATOR_SINGLE = 22; |
|
61 |
public final static int TERMINATOR_DOUBLE = 23; |
|
62 |
||
63 |
private static final int[] DII = new int[256]; |
|
64 |
||
65 |
private static final int[][] DII_RANGES = { |
|
66 |
// EII |
|
67 |
||
68 |
// %00000000 to %00011111 EII no attributes small index |
|
69 |
{ 0x1F, EII_NO_AIIS_INDEX_SMALL }, |
|
70 |
||
71 |
// %00100000 to %00100111 EII medium index |
|
72 |
{ 0x27, EII_INDEX_MEDIUM }, |
|
73 |
||
74 |
// %00101000 to %00101111 EII large index |
|
75 |
// %00110000 EII very large index |
|
76 |
// %00101000 to %00110000 |
|
77 |
{ 0x30, EII_INDEX_LARGE }, |
|
78 |
||
79 |
// %00110001 to %00110111 ILLEGAL |
|
80 |
{ 0x37, STATE_ILLEGAL }, |
|
81 |
||
82 |
// %00111000 EII namespaces |
|
83 |
{ 0x38, EII_NAMESPACES }, |
|
84 |
||
85 |
// %00111001 to %00111011 ILLEGAL |
|
86 |
{ 0x3B, STATE_ILLEGAL }, |
|
87 |
||
88 |
// %00111100 EII literal (no prefix, no namespace) |
|
89 |
{ 0x3C, EII_LITERAL }, |
|
90 |
||
91 |
// %00111101 EII literal (no prefix, namespace) |
|
92 |
{ 0x3D, EII_LITERAL }, |
|
93 |
||
94 |
// %00111110 ILLEGAL |
|
95 |
{ 0x3E, STATE_ILLEGAL }, |
|
96 |
||
97 |
// %00111111 EII literal (prefix, namespace) |
|
98 |
{ 0x3F, EII_LITERAL }, |
|
99 |
||
100 |
// %01000000 to %01011111 EII attributes small index |
|
101 |
{ 0x5F, EII_AIIS_INDEX_SMALL }, |
|
102 |
||
103 |
// %01100000 to %01100111 EII medium index |
|
104 |
{ 0x67, EII_INDEX_MEDIUM }, |
|
105 |
||
106 |
// %01101000 to %01101111 EII large index |
|
107 |
// %01110000 EII very large index |
|
108 |
// %01101000 to %01110000 |
|
109 |
{ 0x70, EII_INDEX_LARGE }, |
|
110 |
||
111 |
// %01110001 to %01110111 ILLEGAL |
|
112 |
{ 0x77, STATE_ILLEGAL }, |
|
113 |
||
114 |
// %01111000 EII attributes namespaces |
|
115 |
{ 0x78, EII_NAMESPACES }, |
|
116 |
||
117 |
// %01111001 to %01111011 ILLEGAL |
|
118 |
{ 0x7B, STATE_ILLEGAL }, |
|
119 |
||
120 |
// %01111100 EII attributes literal (no prefix, no namespace) |
|
121 |
{ 0x7C, EII_LITERAL }, |
|
122 |
||
123 |
// %01111101 EII attributes literal (no prefix, namespace) |
|
124 |
{ 0x7D, EII_LITERAL }, |
|
125 |
||
126 |
// %01111110 ILLEGAL |
|
127 |
{ 0x7E, STATE_ILLEGAL }, |
|
128 |
||
129 |
// %01111111 EII attributes literal (prefix, namespace) |
|
130 |
{ 0x7F, EII_LITERAL }, |
|
131 |
||
132 |
// %10000000 to %11000011 |
|
133 |
{ 0xC3, STATE_ILLEGAL }, |
|
134 |
||
135 |
// %11000100 to %11000111 |
|
136 |
{ 0xC7, DOCUMENT_TYPE_DECLARATION_II }, |
|
137 |
||
138 |
// %11001000 to %1110000 |
|
139 |
{ 0xE0, STATE_ILLEGAL }, |
|
140 |
||
141 |
// %11100001 processing instruction |
|
142 |
{ 0xE1, PROCESSING_INSTRUCTION_II }, |
|
143 |
||
144 |
// %11100010 comment |
|
145 |
{ 0xE2, COMMENT_II}, |
|
146 |
||
147 |
// %111000011 to %11101111 |
|
148 |
{ 0xEF, STATE_ILLEGAL }, |
|
149 |
||
150 |
// Terminators |
|
151 |
||
152 |
// %11110000 single terminator |
|
153 |
{ 0xF0, TERMINATOR_SINGLE }, |
|
154 |
||
155 |
// %11110000 to %11111110 ILLEGAL |
|
156 |
{ 0xFE, STATE_ILLEGAL }, |
|
157 |
||
158 |
// %11111111 double terminator |
|
159 |
{ 0xFF, TERMINATOR_DOUBLE } |
|
160 |
}; |
|
161 |
||
162 |
private static final int[] EII = new int[256]; |
|
163 |
||
164 |
private static final int[][] EII_RANGES = { |
|
165 |
// EII |
|
166 |
||
167 |
// %00000000 to %00011111 EII no attributes small index |
|
168 |
{ 0x1F, EII_NO_AIIS_INDEX_SMALL }, |
|
169 |
||
170 |
// %00100000 to %00100111 EII medium index |
|
171 |
{ 0x27, EII_INDEX_MEDIUM }, |
|
172 |
||
173 |
// %00101000 to %00101111 EII large index |
|
174 |
// %00110000 EII very large index |
|
175 |
// %00101000 to %00110000 |
|
176 |
{ 0x30, EII_INDEX_LARGE }, |
|
177 |
||
178 |
// %00110001 to %00110111 ILLEGAL |
|
179 |
{ 0x37, STATE_ILLEGAL }, |
|
180 |
||
181 |
// %00111000 EII namespaces |
|
182 |
{ 0x38, EII_NAMESPACES }, |
|
183 |
||
184 |
// %00111001 to %00111011 ILLEGAL |
|
185 |
{ 0x3B, STATE_ILLEGAL }, |
|
186 |
||
187 |
// %00111100 EII literal (no prefix, no namespace) |
|
188 |
{ 0x3C, EII_LITERAL }, |
|
189 |
||
190 |
// %00111101 EII literal (no prefix, namespace) |
|
191 |
{ 0x3D, EII_LITERAL }, |
|
192 |
||
193 |
// %00111110 ILLEGAL |
|
194 |
{ 0x3E, STATE_ILLEGAL }, |
|
195 |
||
196 |
// %00111111 EII literal (prefix, namespace) |
|
197 |
{ 0x3F, EII_LITERAL }, |
|
198 |
||
199 |
// %01000000 to %01011111 EII attributes small index |
|
200 |
{ 0x5F, EII_AIIS_INDEX_SMALL }, |
|
201 |
||
202 |
// %01100000 to %01100111 EII medium index |
|
203 |
{ 0x67, EII_INDEX_MEDIUM }, |
|
204 |
||
205 |
// %01101000 to %01101111 EII large index |
|
206 |
// %01110000 EII very large index |
|
207 |
// %01101000 to %01110000 |
|
208 |
{ 0x70, EII_INDEX_LARGE }, |
|
209 |
||
210 |
// %01110001 to %01110111 ILLEGAL |
|
211 |
{ 0x77, STATE_ILLEGAL }, |
|
212 |
||
213 |
// %01111000 EII attributes namespaces |
|
214 |
{ 0x78, EII_NAMESPACES }, |
|
215 |
||
216 |
// %01111001 to %01111011 ILLEGAL |
|
217 |
{ 0x7B, STATE_ILLEGAL }, |
|
218 |
||
219 |
// %01111100 EII attributes literal (no prefix, no namespace) |
|
220 |
{ 0x7C, EII_LITERAL }, |
|
221 |
||
222 |
// %01111101 EII attributes literal (no prefix, namespace) |
|
223 |
{ 0x7D, EII_LITERAL }, |
|
224 |
||
225 |
// %01111110 ILLEGAL |
|
226 |
{ 0x7E, STATE_ILLEGAL }, |
|
227 |
||
228 |
// %01111111 EII attributes literal (prefix, namespace) |
|
229 |
{ 0x7F, EII_LITERAL }, |
|
230 |
||
231 |
// CII |
|
232 |
||
233 |
// UTF-8 string |
|
234 |
||
235 |
// %10000000 to %10000001 CII UTF-8 no add to table small length |
|
236 |
{ 0x81, CII_UTF8_SMALL_LENGTH }, |
|
237 |
||
238 |
// %10000010 CII UTF-8 no add to table medium length |
|
239 |
{ 0x82, CII_UTF8_MEDIUM_LENGTH }, |
|
240 |
||
241 |
// %10000011 CII UTF-8 no add to table large length |
|
242 |
{ 0x83, CII_UTF8_LARGE_LENGTH }, |
|
243 |
||
244 |
// UTF-16 string |
|
245 |
||
246 |
// %10000100 to %10000101 CII UTF-16 no add to table small length |
|
247 |
{ 0x85, CII_UTF16_SMALL_LENGTH }, |
|
248 |
||
249 |
// %10000110 CII UTF-16 no add to table medium length |
|
250 |
{ 0x86, CII_UTF16_MEDIUM_LENGTH }, |
|
251 |
||
252 |
// %10000111 CII UTF-16 no add to table large length |
|
253 |
{ 0x87, CII_UTF16_LARGE_LENGTH }, |
|
254 |
||
255 |
// Resitricted alphabet |
|
256 |
||
257 |
// %10001000 to %10001011 CII RA no add to table |
|
258 |
{ 0x8B, CII_RA }, |
|
259 |
||
260 |
// Encoding algorithm |
|
261 |
||
262 |
// %10001100 to %10001111 CII EA no add to table |
|
263 |
{ 0x8F, CII_EA }, |
|
264 |
||
265 |
// UTF-8 string, add to table |
|
266 |
||
267 |
// %10010000 to %10010001 CII add to table small length |
|
268 |
{ 0x91, CII_UTF8_SMALL_LENGTH }, |
|
269 |
||
270 |
// %10010010 CII add to table medium length |
|
271 |
{ 0x92, CII_UTF8_MEDIUM_LENGTH }, |
|
272 |
||
273 |
// %10010011 CII add to table large length |
|
274 |
{ 0x93, CII_UTF8_LARGE_LENGTH }, |
|
275 |
||
276 |
// UTF-16 string, add to table |
|
277 |
||
278 |
// %10010100 to %10010101 CII UTF-16 add to table small length |
|
279 |
{ 0x95, CII_UTF16_SMALL_LENGTH }, |
|
280 |
||
281 |
// %10010110 CII UTF-16 add to table medium length |
|
282 |
{ 0x96, CII_UTF16_MEDIUM_LENGTH }, |
|
283 |
||
284 |
// %10010111 CII UTF-16 add to table large length |
|
285 |
{ 0x97, CII_UTF16_LARGE_LENGTH }, |
|
286 |
||
287 |
// Restricted alphabet, add to table |
|
288 |
||
289 |
// %10011000 to %10011011 CII RA add to table |
|
290 |
{ 0x9B, CII_RA }, |
|
291 |
||
292 |
// Encoding algorithm, add to table |
|
293 |
||
294 |
// %10011100 to %10011111 CII EA add to table |
|
295 |
{ 0x9F, CII_EA }, |
|
296 |
||
297 |
// Index |
|
298 |
||
299 |
// %10100000 to %10101111 CII small index |
|
300 |
{ 0xAF, CII_INDEX_SMALL }, |
|
301 |
||
302 |
// %10110000 to %10110011 CII medium index |
|
303 |
{ 0xB3, CII_INDEX_MEDIUM }, |
|
304 |
||
305 |
// %10110100 to %10110111 CII large index |
|
306 |
{ 0xB7, CII_INDEX_LARGE }, |
|
307 |
||
308 |
// %10111000 CII very large index |
|
309 |
{ 0xB8, CII_INDEX_LARGE_LARGE }, |
|
310 |
||
311 |
// %10111001 to %11000111 ILLEGAL |
|
312 |
{ 0xC7, STATE_ILLEGAL }, |
|
313 |
||
314 |
// %11001000 to %11001011 |
|
315 |
{ 0xCB, UNEXPANDED_ENTITY_REFERENCE_II }, |
|
316 |
||
317 |
// %11001100 to %11100000 ILLEGAL |
|
318 |
{ 0xE0, STATE_ILLEGAL }, |
|
319 |
||
320 |
// %11100001 processing instruction |
|
321 |
{ 0xE1, PROCESSING_INSTRUCTION_II }, |
|
322 |
||
323 |
// %11100010 comment |
|
324 |
{ 0xE2, COMMENT_II}, |
|
325 |
||
326 |
// %111000011 to %11101111 |
|
327 |
{ 0xEF, STATE_ILLEGAL }, |
|
328 |
||
329 |
// Terminators |
|
330 |
||
331 |
// %11110000 single terminator |
|
332 |
{ 0xF0, TERMINATOR_SINGLE }, |
|
333 |
||
334 |
// %11110000 to %11111110 ILLEGAL |
|
335 |
{ 0xFE, STATE_ILLEGAL }, |
|
336 |
||
337 |
// %11111111 double terminator |
|
338 |
{ 0xFF, TERMINATOR_DOUBLE } |
|
339 |
}; |
|
340 |
||
341 |
||
342 |
// AII states |
|
343 |
public final static int AII_INDEX_SMALL = 0; |
|
344 |
public final static int AII_INDEX_MEDIUM = 1; |
|
345 |
public final static int AII_INDEX_LARGE = 2; |
|
346 |
public final static int AII_LITERAL = 3; |
|
347 |
public final static int AII_TERMINATOR_SINGLE = 4; |
|
348 |
public final static int AII_TERMINATOR_DOUBLE = 5; |
|
349 |
||
350 |
private static final int[] AII = new int[256]; |
|
351 |
||
352 |
private static final int[][] AII_RANGES = { |
|
353 |
// %00000000 to %00111111 AII small index |
|
354 |
{ 0x3F, AII_INDEX_SMALL }, |
|
355 |
||
356 |
// %01000000 to %01011111 AII medium index |
|
357 |
{ 0x5F, AII_INDEX_MEDIUM }, |
|
358 |
||
359 |
// %01100000 to %01101111 AII large index |
|
360 |
{ 0x6F, AII_INDEX_LARGE }, |
|
361 |
||
362 |
// %01110000 to %01110111 ILLEGAL |
|
363 |
{ 0x77, STATE_ILLEGAL }, |
|
364 |
||
365 |
// %01111000 AII literal (no prefix, no namespace) |
|
366 |
// %01111001 AII literal (no prefix, namespace) |
|
367 |
{ 0x79, AII_LITERAL }, |
|
368 |
||
369 |
// %01111010 ILLEGAL |
|
370 |
{ 0x7A, STATE_ILLEGAL }, |
|
371 |
||
372 |
// %01111011 AII literal (prefix, namespace) |
|
373 |
{ 0x7B, AII_LITERAL }, |
|
374 |
||
375 |
// %10000000 to %11101111 ILLEGAL |
|
376 |
{ 0xEF, STATE_ILLEGAL }, |
|
377 |
||
378 |
// Terminators |
|
379 |
||
380 |
// %11110000 single terminator |
|
381 |
{ 0xF0, AII_TERMINATOR_SINGLE }, |
|
382 |
||
383 |
// %11110000 to %11111110 ILLEGAL |
|
384 |
{ 0xFE, STATE_ILLEGAL }, |
|
385 |
||
386 |
// %11111111 double terminator |
|
387 |
{ 0xFF, AII_TERMINATOR_DOUBLE } |
|
388 |
}; |
|
389 |
||
390 |
||
391 |
// AII value states |
|
392 |
public final static int NISTRING_UTF8_SMALL_LENGTH = 0; |
|
393 |
public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1; |
|
394 |
public final static int NISTRING_UTF8_LARGE_LENGTH = 2; |
|
395 |
public final static int NISTRING_UTF16_SMALL_LENGTH = 3; |
|
396 |
public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4; |
|
397 |
public final static int NISTRING_UTF16_LARGE_LENGTH = 5; |
|
398 |
public final static int NISTRING_RA = 6; |
|
399 |
public final static int NISTRING_EA = 7; |
|
400 |
public final static int NISTRING_INDEX_SMALL = 8; |
|
401 |
public final static int NISTRING_INDEX_MEDIUM = 9; |
|
402 |
public final static int NISTRING_INDEX_LARGE = 10; |
|
403 |
public final static int NISTRING_EMPTY = 11; |
|
404 |
||
405 |
private static final int[] NISTRING = new int[256]; |
|
406 |
||
407 |
private static final int[][] NISTRING_RANGES = { |
|
408 |
// UTF-8 string |
|
409 |
||
410 |
// %00000000 to %00000111 UTF-8 no add to table small length |
|
411 |
{ 0x07, NISTRING_UTF8_SMALL_LENGTH }, |
|
412 |
||
413 |
// %00001000 UTF-8 no add to table medium length |
|
414 |
{ 0x08, NISTRING_UTF8_MEDIUM_LENGTH }, |
|
415 |
||
416 |
// %00001001 to %00001011 ILLEGAL |
|
417 |
{ 0x0B, STATE_ILLEGAL }, |
|
418 |
||
419 |
// %00001100 UTF-8 no add to table large length |
|
420 |
{ 0x0C, NISTRING_UTF8_LARGE_LENGTH }, |
|
421 |
||
422 |
// %00001101 to %00001111 ILLEGAL |
|
423 |
{ 0x0F, STATE_ILLEGAL }, |
|
424 |
||
425 |
// UTF-16 string |
|
426 |
||
427 |
// %00010000 to %00010111 UTF-16 no add to table small length |
|
428 |
{ 0x17, NISTRING_UTF16_SMALL_LENGTH }, |
|
429 |
||
430 |
// %00001000 UTF-16 no add to table medium length |
|
431 |
{ 0x18, NISTRING_UTF16_MEDIUM_LENGTH }, |
|
432 |
||
433 |
// %00011001 to %00011011 ILLEGAL |
|
434 |
{ 0x1B, STATE_ILLEGAL }, |
|
435 |
||
436 |
// %00011100 UTF-16 no add to table large length |
|
437 |
{ 0x1C, NISTRING_UTF16_LARGE_LENGTH }, |
|
438 |
||
439 |
// %00011101 to %00011111 ILLEGAL |
|
440 |
{ 0x1F, STATE_ILLEGAL }, |
|
441 |
||
442 |
// Restricted alphabet |
|
443 |
||
444 |
// %00100000 to %00101111 RA no add to table small length |
|
445 |
{ 0x2F, NISTRING_RA }, |
|
446 |
||
447 |
// Encoding algorithm |
|
448 |
||
449 |
// %00110000 to %00111111 EA no add to table |
|
450 |
{ 0x3F, NISTRING_EA }, |
|
451 |
||
452 |
// UTF-8 string, add to table |
|
453 |
||
454 |
// %01000000 to %01000111 UTF-8 add to table small length |
|
455 |
{ 0x47, NISTRING_UTF8_SMALL_LENGTH }, |
|
456 |
||
457 |
// %01001000 UTF-8 add to table medium length |
|
458 |
{ 0x48, NISTRING_UTF8_MEDIUM_LENGTH }, |
|
459 |
||
460 |
// %01001001 to %01001011 ILLEGAL |
|
461 |
{ 0x4B, STATE_ILLEGAL }, |
|
462 |
||
463 |
// %01001100 UTF-8 add to table large length |
|
464 |
{ 0x4C, NISTRING_UTF8_LARGE_LENGTH }, |
|
465 |
||
466 |
// %01001101 to %01001111 ILLEGAL |
|
467 |
{ 0x4F, STATE_ILLEGAL }, |
|
468 |
||
469 |
// UTF-16 string, add to table |
|
470 |
||
471 |
// %01010000 to %01010111 UTF-16 add to table small length |
|
472 |
{ 0x57, NISTRING_UTF16_SMALL_LENGTH }, |
|
473 |
||
474 |
// %01001000 UTF-16 add to table medium length |
|
475 |
{ 0x58, NISTRING_UTF16_MEDIUM_LENGTH }, |
|
476 |
||
477 |
// %01011001 to %01011011 ILLEGAL |
|
478 |
{ 0x5B, STATE_ILLEGAL }, |
|
479 |
||
480 |
// %01011100 UTF-16 add to table large length |
|
481 |
{ 0x5C, NISTRING_UTF16_LARGE_LENGTH }, |
|
482 |
||
483 |
// %01011101 to %01011111 ILLEGAL |
|
484 |
{ 0x5F, STATE_ILLEGAL }, |
|
485 |
||
486 |
// Restricted alphabet, add to table |
|
487 |
||
488 |
// %01100000 to %01101111 RA no add to table small length |
|
489 |
{ 0x6F, NISTRING_RA }, |
|
490 |
||
491 |
// Encoding algorithm, add to table |
|
492 |
||
493 |
// %01110000 to %01111111 EA add to table |
|
494 |
{ 0x7F, NISTRING_EA }, |
|
495 |
||
496 |
// Index |
|
497 |
||
498 |
// %10000000 to %10111111 index small |
|
499 |
{ 0xBF, NISTRING_INDEX_SMALL }, |
|
500 |
||
501 |
// %11000000 to %11011111 index medium |
|
502 |
{ 0xDF, NISTRING_INDEX_MEDIUM }, |
|
503 |
||
504 |
// %11100000 to %11101111 index large |
|
505 |
{ 0xEF, NISTRING_INDEX_LARGE }, |
|
506 |
||
507 |
// %11110000 to %11111110 ILLEGAL |
|
508 |
{ 0xFE, STATE_ILLEGAL }, |
|
509 |
||
510 |
// %11111111 Empty value |
|
511 |
{ 0xFF, NISTRING_EMPTY }, |
|
512 |
}; |
|
513 |
||
514 |
||
515 |
/* package */ final static int ISTRING_SMALL_LENGTH = 0; |
|
516 |
/* package */ final static int ISTRING_MEDIUM_LENGTH = 1; |
|
517 |
/* package */ final static int ISTRING_LARGE_LENGTH = 2; |
|
518 |
/* package */ final static int ISTRING_INDEX_SMALL = 3; |
|
519 |
/* package */ final static int ISTRING_INDEX_MEDIUM = 4; |
|
520 |
/* package */ final static int ISTRING_INDEX_LARGE = 5; |
|
521 |
||
522 |
private static final int[] ISTRING = new int[256]; |
|
523 |
||
524 |
private static final int[][] ISTRING_RANGES = { |
|
525 |
// %00000000 to %00111111 small length |
|
526 |
{ 0x3F, ISTRING_SMALL_LENGTH }, |
|
527 |
||
528 |
// %01000000 medium length |
|
529 |
{ 0x40, ISTRING_MEDIUM_LENGTH }, |
|
530 |
||
531 |
// %01000001 to %01011111 ILLEGAL |
|
532 |
{ 0x5F, STATE_ILLEGAL }, |
|
533 |
||
534 |
// %01100000 large length |
|
535 |
{ 0x60, ISTRING_LARGE_LENGTH }, |
|
536 |
||
537 |
// %01100001 to %01111111 ILLEGAL |
|
538 |
{ 0x7F, STATE_ILLEGAL }, |
|
539 |
||
540 |
// %10000000 to %10111111 index small |
|
541 |
{ 0xBF, ISTRING_INDEX_SMALL }, |
|
542 |
||
543 |
// %11000000 to %11011111 index medium |
|
544 |
{ 0xDF, ISTRING_INDEX_MEDIUM }, |
|
545 |
||
546 |
// %11100000 to %11101111 index large |
|
547 |
{ 0xEF, ISTRING_INDEX_LARGE }, |
|
548 |
||
549 |
// %11110000 to %11111111 ILLEGAL |
|
550 |
{ 0xFF, STATE_ILLEGAL }, |
|
551 |
}; |
|
552 |
||
553 |
||
554 |
/* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6; |
|
555 |
/* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7; |
|
556 |
/* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8; |
|
557 |
/* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9; |
|
558 |
/* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10; |
|
559 |
||
560 |
private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256]; |
|
561 |
||
562 |
private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = { |
|
563 |
// %00000000 to %00000001 small length |
|
564 |
{ 0x01, ISTRING_SMALL_LENGTH }, |
|
565 |
||
566 |
// %00000010 small length |
|
567 |
{ 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 }, |
|
568 |
||
569 |
// %00000011 small length |
|
570 |
{ 0x03, ISTRING_SMALL_LENGTH }, |
|
571 |
||
572 |
// %00000100 small length |
|
573 |
{ 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 }, |
|
574 |
||
575 |
// %00011011 small length |
|
576 |
{ 0x1B, ISTRING_SMALL_LENGTH }, |
|
577 |
||
578 |
// %00011100 small length |
|
579 |
{ 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 }, |
|
580 |
||
581 |
// %00100010 small length |
|
582 |
{ 0x22, ISTRING_SMALL_LENGTH }, |
|
583 |
||
584 |
// %00100011 small length |
|
585 |
{ 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 }, |
|
586 |
||
587 |
// %00000101 to %00111111 small length |
|
588 |
{ 0x3F, ISTRING_SMALL_LENGTH }, |
|
589 |
||
590 |
||
591 |
||
592 |
||
593 |
// %01000000 medium length |
|
594 |
{ 0x40, ISTRING_MEDIUM_LENGTH }, |
|
595 |
||
596 |
// %01000001 to %01011111 ILLEGAL |
|
597 |
{ 0x5F, STATE_ILLEGAL }, |
|
598 |
||
599 |
// %01100000 large length |
|
600 |
{ 0x60, ISTRING_LARGE_LENGTH }, |
|
601 |
||
602 |
// %01100001 to %01111111 ILLEGAL |
|
603 |
{ 0x7F, STATE_ILLEGAL }, |
|
604 |
||
605 |
// %10000000 index small, 0 |
|
606 |
{ 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO }, |
|
607 |
||
608 |
// %10000000 to %10111111 index small |
|
609 |
{ 0xBF, ISTRING_INDEX_SMALL }, |
|
610 |
||
611 |
// %11000000 to %11011111 index medium |
|
612 |
{ 0xDF, ISTRING_INDEX_MEDIUM }, |
|
613 |
||
614 |
// %11100000 to %11101111 index large |
|
615 |
{ 0xEF, ISTRING_INDEX_LARGE }, |
|
616 |
||
617 |
// %11110000 to %11111111 ILLEGAL |
|
618 |
{ 0xFF, STATE_ILLEGAL }, |
|
619 |
}; |
|
620 |
||
621 |
// UTF-8 states |
|
622 |
/* package */ final static int UTF8_NCNAME_NCNAME = 0; |
|
623 |
/* package */ final static int UTF8_NCNAME_NCNAME_CHAR = 1; |
|
624 |
/* package */ final static int UTF8_TWO_BYTES = 2; |
|
625 |
/* package */ final static int UTF8_THREE_BYTES = 3; |
|
626 |
/* package */ final static int UTF8_FOUR_BYTES = 4; |
|
627 |
||
628 |
private static final int[] UTF8_NCNAME = new int[256]; |
|
629 |
||
630 |
private static final int[][] UTF8_NCNAME_RANGES = { |
|
631 |
||
632 |
// Basic Latin |
|
633 |
||
634 |
// %00000000 to %00101100 |
|
635 |
{ 0x2C, STATE_ILLEGAL }, |
|
636 |
||
637 |
// '-' '.' |
|
638 |
// %%00101101 to %00101110 [#x002D-#x002E] |
|
639 |
{ 0x2E, UTF8_NCNAME_NCNAME_CHAR }, |
|
640 |
||
641 |
// %00101111 |
|
642 |
{ 0x2F, STATE_ILLEGAL }, |
|
643 |
||
644 |
// [0-9] |
|
645 |
// %0011000 to %00111001 [#x0030-#x0039] |
|
646 |
{ 0x39, UTF8_NCNAME_NCNAME_CHAR }, |
|
647 |
||
648 |
// %01000000 |
|
649 |
{ 0x40, STATE_ILLEGAL }, |
|
650 |
||
651 |
// [A-Z] |
|
652 |
// %01000001 to %01011010 [#x0041-#x005A] |
|
653 |
{ 0x5A, UTF8_NCNAME_NCNAME }, |
|
654 |
||
655 |
// %01011110 |
|
656 |
{ 0x5E, STATE_ILLEGAL }, |
|
657 |
||
658 |
// '_' |
|
659 |
// %01011111 [#x005F] |
|
660 |
{ 0x5F, UTF8_NCNAME_NCNAME }, |
|
661 |
||
662 |
// %01100000 |
|
663 |
{ 0x60, STATE_ILLEGAL }, |
|
664 |
||
665 |
// [a-z] |
|
666 |
// %01100001 to %01111010 [#x0061-#x007A] |
|
667 |
{ 0x7A, UTF8_NCNAME_NCNAME }, |
|
668 |
||
669 |
// %01111011 to %01111111 |
|
670 |
{ 0x7F, STATE_ILLEGAL }, |
|
671 |
||
672 |
||
673 |
// Two bytes |
|
674 |
||
675 |
// %10000000 to %11000001 |
|
676 |
{ 0xC1, STATE_ILLEGAL }, |
|
677 |
||
678 |
// %11000010 to %11011111 |
|
679 |
{ 0xDF, UTF8_TWO_BYTES }, |
|
680 |
||
681 |
||
682 |
// Three bytes |
|
683 |
||
684 |
// %11100000 to %11101111 |
|
685 |
{ 0xEF, UTF8_THREE_BYTES }, |
|
686 |
||
687 |
||
688 |
// Four bytes |
|
689 |
||
690 |
// %11110000 to %11110111 |
|
691 |
{ 0xF7, UTF8_FOUR_BYTES }, |
|
692 |
||
693 |
||
694 |
// %11111000 to %11111111 |
|
695 |
{ 0xFF, STATE_ILLEGAL } |
|
696 |
}; |
|
697 |
||
698 |
/* package */ final static int UTF8_ONE_BYTE = 1; |
|
699 |
||
700 |
private static final int[] UTF8 = new int[256]; |
|
701 |
||
702 |
private static final int[][] UTF8_RANGES = { |
|
703 |
||
704 |
// Basic Latin |
|
705 |
||
706 |
// %00000000 to %00001000 |
|
707 |
{ 0x08, STATE_ILLEGAL }, |
|
708 |
||
709 |
// CHARACTER TABULATION, LINE FEED |
|
710 |
// %%00001001 to %00001010 [#x0009-#x000A] |
|
711 |
{ 0x0A, UTF8_ONE_BYTE }, |
|
712 |
||
713 |
// %00001011 to %00001100 |
|
714 |
{ 0x0C, STATE_ILLEGAL }, |
|
715 |
||
716 |
// CARRIAGE RETURN |
|
717 |
// %00001101 [#x000D] |
|
718 |
{ 0x0D, UTF8_ONE_BYTE }, |
|
719 |
||
720 |
// %00001110 to %00011111 |
|
721 |
{ 0x1F, STATE_ILLEGAL }, |
|
722 |
||
723 |
// %0010000 to %01111111 |
|
724 |
{ 0x7F, UTF8_ONE_BYTE }, |
|
725 |
||
726 |
||
727 |
// Two bytes |
|
728 |
||
729 |
// %10000000 to %11000001 |
|
730 |
{ 0xC1, STATE_ILLEGAL }, |
|
731 |
||
732 |
// %11000010 to %11011111 |
|
733 |
{ 0xDF, UTF8_TWO_BYTES }, |
|
734 |
||
735 |
||
736 |
// Three bytes |
|
737 |
||
738 |
// %11100000 to %11101111 |
|
739 |
{ 0xEF, UTF8_THREE_BYTES }, |
|
740 |
||
741 |
||
742 |
// Four bytes |
|
743 |
||
744 |
// %11110000 to %11110111 |
|
745 |
{ 0xF7, UTF8_FOUR_BYTES }, |
|
746 |
||
747 |
||
748 |
// %11111000 to %11111111 |
|
749 |
{ 0xFF, STATE_ILLEGAL } |
|
750 |
}; |
|
751 |
||
752 |
private static void constructTable(int[] table, int[][] ranges) { |
|
753 |
int start = 0x00; |
|
754 |
for (int range = 0; range < ranges.length; range++) { |
|
755 |
int end = ranges[range][RANGE_INDEX_END]; |
|
756 |
int value = ranges[range][RANGE_INDEX_VALUE]; |
|
757 |
for (int i = start; i<= end; i++) { |
|
758 |
table[i] = value; |
|
759 |
} |
|
760 |
start = end + 1; |
|
761 |
} |
|
762 |
} |
|
763 |
||
764 |
public static final int DII(final int index) { |
|
765 |
return DII[index]; |
|
766 |
} |
|
767 |
||
768 |
public static final int EII(final int index) { |
|
769 |
return EII[index]; |
|
770 |
} |
|
771 |
||
772 |
public static final int AII(final int index) { |
|
773 |
return AII[index]; |
|
774 |
} |
|
775 |
||
776 |
public static final int NISTRING(final int index) { |
|
777 |
return NISTRING[index]; |
|
778 |
} |
|
779 |
||
780 |
public static final int ISTRING(final int index) { |
|
781 |
return ISTRING[index]; |
|
782 |
} |
|
783 |
||
784 |
public static final int ISTRING_PREFIX_NAMESPACE(final int index) { |
|
785 |
return ISTRING_PREFIX_NAMESPACE[index]; |
|
786 |
} |
|
787 |
||
788 |
public static final int UTF8(final int index) { |
|
789 |
return UTF8[index]; |
|
790 |
} |
|
791 |
||
792 |
public static final int UTF8_NCNAME(final int index) { |
|
793 |
return UTF8_NCNAME[index]; |
|
794 |
} |
|
795 |
||
796 |
static { |
|
797 |
// DII |
|
798 |
constructTable(DII, DII_RANGES); |
|
799 |
||
800 |
// EII |
|
801 |
constructTable(EII, EII_RANGES); |
|
802 |
||
803 |
// AII |
|
804 |
constructTable(AII, AII_RANGES); |
|
805 |
||
806 |
// AII Value |
|
807 |
constructTable(NISTRING, NISTRING_RANGES); |
|
808 |
||
809 |
// Identifying string |
|
810 |
constructTable(ISTRING, ISTRING_RANGES); |
|
811 |
||
812 |
// Identifying string |
|
813 |
constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES); |
|
814 |
||
815 |
// UTF-8 NCNAME states |
|
816 |
constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES); |
|
817 |
||
818 |
// UTF-8 states |
|
819 |
constructTable(UTF8, UTF8_RANGES); |
|
820 |
} |
|
821 |
||
822 |
private DecoderStateTables() { |
|
823 |
} |
|
824 |
} |