author | never |
Mon, 12 Jul 2010 22:27:18 -0700 | |
changeset 5926 | a36f90d986b6 |
parent 5506 | 202f599c92aa |
child 9547 | 454881baaca0 |
permissions | -rw-r--r-- |
1091
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
1 |
/* |
5506 | 2 |
* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. |
1091
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
4 |
* |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
5 |
* This code is free software; you can redistribute it and/or modify it |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
7 |
* published by the Free Software Foundation. |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
8 |
* |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
13 |
* accompanied this code). |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
14 |
* |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
15 |
* You should have received a copy of the GNU General Public License version |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
18 |
* |
5506 | 19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 |
* or visit www.oracle.com if you need additional information or have any |
|
21 |
* questions. |
|
1091
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
22 |
*/ |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
23 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
24 |
/* |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
25 |
* @test |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
26 |
* @bug 4486841 |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
27 |
* @summary Test UTF-8 charset |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
28 |
*/ |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
29 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
30 |
import java.nio.charset.*; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
31 |
import java.nio.*; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
32 |
import java.util.*; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
33 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
34 |
public class TestUTF8 { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
35 |
static char[] decode(byte[] bb, String csn, boolean testDirect) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
36 |
throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
37 |
CharsetDecoder dec = Charset.forName(csn).newDecoder(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
38 |
ByteBuffer bbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
39 |
CharBuffer cbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
40 |
if (testDirect) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
41 |
bbf = ByteBuffer.allocateDirect(bb.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
42 |
cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
43 |
bbf.put(bb).flip(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
44 |
} else { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
45 |
bbf = ByteBuffer.wrap(bb); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
46 |
cbf = CharBuffer.allocate(bb.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
47 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
48 |
CoderResult cr = dec.decode(bbf, cbf, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
49 |
if (cr != CoderResult.UNDERFLOW) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
50 |
throw new RuntimeException("Decoding err: " + csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
51 |
char[] cc = new char[cbf.position()]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
52 |
cbf.flip(); cbf.get(cc); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
53 |
return cc; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
54 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
55 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
56 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
57 |
static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
58 |
throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
59 |
CharsetDecoder dec = Charset.forName(csn).newDecoder(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
60 |
ByteBuffer bbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
61 |
CharBuffer cbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
62 |
if (testDirect) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
63 |
bbf = ByteBuffer.allocateDirect(bb.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
64 |
cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
65 |
bbf.put(bb).flip(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
66 |
} else { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
67 |
bbf = ByteBuffer.wrap(bb); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
68 |
cbf = CharBuffer.allocate(bb.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
69 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
70 |
return dec.decode(bbf, cbf, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
71 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
72 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
73 |
static byte[] encode(char[] cc, String csn, boolean testDirect) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
74 |
throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
75 |
ByteBuffer bbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
76 |
CharBuffer cbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
77 |
CharsetEncoder enc = Charset.forName(csn).newEncoder(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
78 |
if (testDirect) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
79 |
bbf = ByteBuffer.allocateDirect(cc.length * 4); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
80 |
cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
81 |
cbf.put(cc).flip(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
82 |
} else { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
83 |
bbf = ByteBuffer.allocate(cc.length * 4); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
84 |
cbf = CharBuffer.wrap(cc); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
85 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
86 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
87 |
CoderResult cr = enc.encode(cbf, bbf, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
88 |
if (cr != CoderResult.UNDERFLOW) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
89 |
throw new RuntimeException("Encoding err: " + csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
90 |
byte[] bb = new byte[bbf.position()]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
91 |
bbf.flip(); bbf.get(bb); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
92 |
return bb; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
93 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
94 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
95 |
static CoderResult encodeCR(char[] cc, String csn, boolean testDirect) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
96 |
throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
97 |
ByteBuffer bbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
98 |
CharBuffer cbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
99 |
CharsetEncoder enc = Charset.forName(csn).newEncoder(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
100 |
if (testDirect) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
101 |
bbf = ByteBuffer.allocateDirect(cc.length * 4); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
102 |
cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
103 |
cbf.put(cc).flip(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
104 |
} else { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
105 |
bbf = ByteBuffer.allocate(cc.length * 4); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
106 |
cbf = CharBuffer.wrap(cc); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
107 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
108 |
return enc.encode(cbf, bbf, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
109 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
110 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
111 |
static char[] getUTFChars() { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
112 |
char[] cc = new char[0x10000 - 0xe000 + 0xd800 + //bmp |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
113 |
(0x110000 - 0x10000) * 2]; //supp |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
114 |
int pos = 0; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
115 |
int i = 0; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
116 |
for (i = 0; i < 0xd800; i++) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
117 |
cc[pos++] = (char)i; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
118 |
for (i = 0xe000; i < 0x10000; i++) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
119 |
cc[pos++] = (char)i; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
120 |
for (i = 0x10000; i < 0x110000; i++) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
121 |
pos += Character.toChars(i, cc, pos); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
122 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
123 |
return cc; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
124 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
125 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
126 |
static int to3ByteUTF8(char c, byte[] bb, int pos) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
127 |
bb[pos++] = (byte)(0xe0 | ((c >> 12))); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
128 |
bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f)); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
129 |
bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f)); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
130 |
return 3; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
131 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
132 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
133 |
static void checkRoundtrip(String csn) throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
134 |
System.out.printf(" Check roundtrip <%s>...", csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
135 |
char[] cc = getUTFChars(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
136 |
byte[] bb = encode(cc, csn, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
137 |
char[] ccO = decode(bb, csn, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
138 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
139 |
if (!Arrays.equals(cc, ccO)) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
140 |
System.out.printf(" non-direct failed"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
141 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
142 |
bb = encode(cc, csn, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
143 |
ccO = decode(bb, csn, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
144 |
if (!Arrays.equals(cc, ccO)) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
145 |
System.out.printf(" (direct) failed"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
146 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
147 |
System.out.println(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
148 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
149 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
150 |
static void check6ByteSurrs(String csn) throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
151 |
System.out.printf(" Check 6-byte Surrogates <%s>...%n", csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
152 |
byte[] bb = new byte[(0x110000 - 0x10000) * 6]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
153 |
char[] cc = new char[(0x110000 - 0x10000) * 2]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
154 |
int bpos = 0; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
155 |
int cpos = 0; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
156 |
for (int i = 0x10000; i < 0x110000; i++) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
157 |
Character.toChars(i, cc, cpos); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
158 |
bpos += to3ByteUTF8(cc[cpos], bb, bpos); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
159 |
bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
160 |
cpos += 2; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
161 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
162 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
163 |
char[] ccO = decode(bb, csn, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
164 |
if (!Arrays.equals(cc, ccO)) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
165 |
System.out.printf(" decoding failed%n"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
166 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
167 |
ccO = decode(bb, csn, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
168 |
if (!Arrays.equals(cc, ccO)) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
169 |
System.out.printf(" decoding(direct) failed%n"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
170 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
171 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
172 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
173 |
static void compare(String csn1, String csn2) throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
174 |
System.out.printf(" Diff <%s> <%s>...%n", csn1, csn2); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
175 |
char[] cc = getUTFChars(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
176 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
177 |
byte[] bb1 = encode(cc, csn1, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
178 |
byte[] bb2 = encode(cc, csn2, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
179 |
if (!Arrays.equals(bb1, bb2)) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
180 |
System.out.printf(" encoding failed%n"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
181 |
char[] cc1 = decode(bb1, csn1, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
182 |
char[] cc2 = decode(bb1, csn2, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
183 |
if (!Arrays.equals(cc1, cc2)) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
184 |
System.out.printf(" decoding failed%n"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
185 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
186 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
187 |
bb1 = encode(cc, csn1, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
188 |
bb2 = encode(cc, csn2, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
189 |
if (!Arrays.equals(bb1, bb2)) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
190 |
System.out.printf(" encoding (direct) failed%n"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
191 |
cc1 = decode(bb1, csn1, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
192 |
cc2 = decode(bb1, csn2, true); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
193 |
if (!Arrays.equals(cc1, cc2)) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
194 |
System.out.printf(" decoding (direct) failed%n"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
195 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
196 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
197 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
198 |
// The first byte is the length of malformed bytes |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
199 |
static byte[][] malformed = { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
200 |
// One-byte sequences: |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
201 |
{1, (byte)0xFF }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
202 |
{1, (byte)0xC0 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
203 |
{1, (byte)0x80 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
204 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
205 |
{1, (byte)0xFF, (byte)0xFF}, // all ones |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
206 |
{1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
207 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
208 |
// Two-byte sequences: |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
209 |
{1, (byte)0xC0, (byte)0x80}, // invalid first byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
210 |
{1, (byte)0xC1, (byte)0xBF}, // invalid first byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
211 |
{1, (byte)0xC2, (byte)0x00}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
212 |
{1, (byte)0xC2, (byte)0xC0}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
213 |
{1, (byte)0xD0, (byte)0x00}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
214 |
{1, (byte)0xD0, (byte)0xC0}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
215 |
{1, (byte)0xDF, (byte)0x00}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
216 |
{1, (byte)0xDF, (byte)0xC0}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
217 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
218 |
// Three-byte sequences |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
219 |
{1, (byte)0xE0, (byte)0x80, (byte)0x80}, // 111x first byte first nibble |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
220 |
{1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
221 |
{1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
222 |
{1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
223 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
224 |
{1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
225 |
{2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
226 |
{2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
227 |
{1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
228 |
{1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
229 |
{1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
230 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
231 |
// Four-byte sequences |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
232 |
{1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
233 |
{1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
234 |
{1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
235 |
{1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
236 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
237 |
{1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
238 |
{1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
239 |
{1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
240 |
{2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
241 |
{3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid third byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
242 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
243 |
{1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
244 |
{2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
245 |
{3, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
246 |
{1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
247 |
{1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
248 |
{1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
249 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
250 |
// Five-byte sequences |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
251 |
{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
252 |
{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
253 |
{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
254 |
{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
255 |
{5, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
256 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
257 |
{1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
258 |
{2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
259 |
{3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
260 |
{4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
261 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
262 |
// Six-byte sequences |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
263 |
{6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
264 |
{6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
265 |
{6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
266 |
{6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
267 |
{1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
268 |
{2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
269 |
{3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
270 |
{4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
271 |
{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
272 |
}; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
273 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
274 |
static void checkMalformed(String csn) throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
275 |
boolean failed = false; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
276 |
System.out.printf(" Check malformed <%s>...%n", csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
277 |
for (boolean direct: new boolean[] {false, true}) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
278 |
for (byte[] bins : malformed) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
279 |
int mlen = bins[0]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
280 |
byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
281 |
CoderResult cr = decodeCR(bin, csn, direct); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
282 |
String ashex = ""; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
283 |
for (int i = 0; i < bin.length; i++) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
284 |
if (i > 0) ashex += " "; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
285 |
ashex += Integer.toBinaryString((int)bin[i] & 0xff); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
286 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
287 |
if (!cr.isMalformed()) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
288 |
System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
289 |
failed = true; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
290 |
} else if (cr.length() != mlen) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
291 |
System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
292 |
failed = true; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
293 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
294 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
295 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
296 |
if (failed) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
297 |
throw new RuntimeException("Check malformed failed " + csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
298 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
299 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
300 |
static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
301 |
int inPos = flow[0]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
302 |
int inLen = flow[1]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
303 |
int outPos = flow[2]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
304 |
int outLen = flow[3]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
305 |
int expedInPos = flow[4]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
306 |
int expedOutPos = flow[5]; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
307 |
CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
308 |
:CoderResult.OVERFLOW; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
309 |
ByteBuffer bbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
310 |
CharBuffer cbf; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
311 |
if (direct) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
312 |
bbf = ByteBuffer.allocateDirect(inPos + utf8s.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
313 |
cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
314 |
} else { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
315 |
bbf = ByteBuffer.allocate(inPos + utf8s.length); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
316 |
cbf = CharBuffer.allocate(outPos + outLen); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
317 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
318 |
bbf.position(inPos); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
319 |
bbf.put(utf8s).flip().position(inPos).limit(inPos + inLen); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
320 |
cbf.position(outPos); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
321 |
dec.reset(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
322 |
CoderResult cr = dec.decode(bbf, cbf, false); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
323 |
if (cr != expedCR || |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
324 |
bbf.position() != expedInPos || |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
325 |
cbf.position() != expedOutPos) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
326 |
System.out.printf("Expected(direct=%5b): [", direct); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
327 |
for (int i:flow) System.out.print(" " + i); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
328 |
System.out.println("] CR=" + cr + |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
329 |
", inPos=" + bbf.position() + |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
330 |
", outPos=" + cbf.position()); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
331 |
return false; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
332 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
333 |
return true; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
334 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
335 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
336 |
static void checkUnderOverflow(String csn) throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
337 |
System.out.printf(" Check under/overflow <%s>...%n", csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
338 |
CharsetDecoder dec = Charset.forName(csn).newDecoder(); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
339 |
boolean failed = false; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
340 |
byte[] utf8s = new String("\u007f\u07ff\ue000\ud800\udc00").getBytes("UTF-8"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
341 |
int inlen = utf8s.length; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
342 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
343 |
for (int inoff = 0; inoff < 20; inoff++) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
344 |
for (int outoff = 0; outoff < 20; outoff++) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
345 |
int[][] Flows = { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
346 |
//inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
347 |
{inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
348 |
{inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
349 |
{inoff, inlen, outoff, 3, inoff + 6, outoff + 3, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
350 |
{inoff, inlen, outoff, 4, inoff + 6, outoff + 3, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
351 |
{inoff, inlen, outoff, 5, inoff + 10,outoff + 5, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
352 |
// underflow |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
353 |
{inoff, 1, outoff, 5, inoff + 1, outoff + 1, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
354 |
{inoff, 2, outoff, 5, inoff + 1, outoff + 1, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
355 |
{inoff, 3, outoff, 5, inoff + 3, outoff + 2, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
356 |
{inoff, 4, outoff, 5, inoff + 3, outoff + 2, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
357 |
{inoff, 5, outoff, 5, inoff + 3, outoff + 2, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
358 |
{inoff, 6, outoff, 5, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
359 |
{inoff, 7, outoff, 5, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
360 |
{inoff, 8, outoff, 5, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
361 |
{inoff, 9, outoff, 5, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
362 |
{inoff, 10, outoff, 5, inoff + 10,outoff + 5, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
363 |
// 2-byte underflow/overflow |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
364 |
{inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
365 |
{inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
366 |
// 3-byte underflow/overflow |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
367 |
{inoff, 4, outoff, 2, inoff + 3, outoff + 2, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
368 |
{inoff, 5, outoff, 2, inoff + 3, outoff + 2, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
369 |
{inoff, 6, outoff, 2, inoff + 3, outoff + 2, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
370 |
// 4-byte underflow/overflow |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
371 |
{inoff, 7, outoff, 4, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
372 |
{inoff, 8, outoff, 4, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
373 |
{inoff, 9, outoff, 4, inoff + 6, outoff + 3, 0}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
374 |
{inoff, 10, outoff, 4, inoff + 6, outoff + 3, 1}, |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
375 |
}; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
376 |
for (boolean direct: new boolean[] {false, true}) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
377 |
for (int[] flow: Flows) { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
378 |
if (!check(dec, utf8s, direct, flow)) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
379 |
failed = true; |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
380 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
381 |
}}} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
382 |
if (failed) |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
383 |
throw new RuntimeException("Check under/overflow failed " + csn); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
384 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
385 |
|
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
386 |
public static void main(String[] args) throws Exception { |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
387 |
checkRoundtrip("UTF-8"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
388 |
check6ByteSurrs("UTF-8"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
389 |
//compare("UTF-8", "UTF-8-OLD"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
390 |
checkMalformed("UTF-8"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
391 |
checkUnderOverflow("UTF-8"); |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
392 |
} |
136d19d6c372
4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff
changeset
|
393 |
} |