author | jiangli |
Wed, 27 Sep 2017 17:55:20 -0400 | |
changeset 47548 | 664b9d44db74 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
796 | 1 |
/* |
5506 | 2 |
* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. |
796 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5506 | 19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 |
* or visit www.oracle.com if you need additional information or have any |
|
21 |
* questions. |
|
796 | 22 |
*/ |
23 |
||
24 |
/* |
|
25 |
* @test |
|
32038
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
26 |
* @bug 4831163 5053096 5056440 8022224 |
796 | 27 |
* @summary NIO charset basic verification of JISAutodetect decoder |
44115
bb4e971bf5d4
8176195: Fix misc module dependencies in jdk_core tests
xiaofeya
parents:
32038
diff
changeset
|
28 |
* @modules jdk.charsets |
796 | 29 |
* @author Martin Buchholz |
30 |
*/ |
|
31 |
||
32 |
import java.io.*; |
|
33 |
import java.nio.ByteBuffer; |
|
34 |
import java.nio.CharBuffer; |
|
35 |
import java.nio.charset.Charset; |
|
36 |
import java.nio.charset.CharsetDecoder; |
|
37 |
import java.nio.charset.CoderResult; |
|
38 |
import static java.lang.System.*; |
|
39 |
||
40 |
public class NIOJISAutoDetectTest { |
|
41 |
private static int failures = 0; |
|
42 |
||
43 |
private static void fail(String failureMsg) { |
|
44 |
System.out.println(failureMsg); |
|
45 |
failures++; |
|
46 |
} |
|
47 |
||
48 |
private static void check(boolean cond, String msg) { |
|
49 |
if (!cond) { |
|
50 |
fail("test failed: " + msg); |
|
51 |
new Exception().printStackTrace(); |
|
52 |
} |
|
53 |
} |
|
54 |
||
55 |
private static String SJISName() throws Exception { |
|
56 |
return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd, |
|
57 |
(byte)0xcf, (byte)0xb2}); |
|
58 |
} |
|
59 |
||
60 |
private static String EUCJName() throws Exception { |
|
61 |
return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2, |
|
62 |
(byte)0xa4, (byte)0xe9}); |
|
63 |
} |
|
64 |
||
65 |
private static String detectingCharset(byte[] bytes) throws Exception { |
|
66 |
//---------------------------------------------------------------- |
|
67 |
// Test special public methods of CharsetDecoder while we're here |
|
68 |
//---------------------------------------------------------------- |
|
69 |
CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder(); |
|
70 |
check(cd.isAutoDetecting(), "isAutodecting()"); |
|
71 |
check(! cd.isCharsetDetected(), "isCharsetDetected"); |
|
72 |
cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'})); |
|
73 |
check(! cd.isCharsetDetected(), "isCharsetDetected"); |
|
74 |
try { |
|
75 |
cd.detectedCharset(); |
|
76 |
fail("no IllegalStateException"); |
|
77 |
} catch (IllegalStateException e) {} |
|
78 |
cd.decode(ByteBuffer.wrap(bytes)); |
|
79 |
check(cd.isCharsetDetected(), "isCharsetDetected"); |
|
80 |
Charset cs = cd.detectedCharset(); |
|
81 |
check(cs != null, "cs != null"); |
|
82 |
check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()"); |
|
83 |
return cs.name(); |
|
84 |
} |
|
85 |
||
86 |
public static void main(String[] argv) throws Exception { |
|
87 |
//---------------------------------------------------------------- |
|
88 |
// Used to throw BufferOverflowException |
|
89 |
//---------------------------------------------------------------- |
|
90 |
out.println(new String(new byte[] {0x61}, "JISAutoDetect")); |
|
91 |
||
92 |
//---------------------------------------------------------------- |
|
93 |
// InputStreamReader(...JISAutoDetect) used to infloop |
|
94 |
//---------------------------------------------------------------- |
|
95 |
{ |
|
96 |
byte[] bytes = "ABCD\n".getBytes(); |
|
97 |
ByteArrayInputStream bais = new ByteArrayInputStream(bytes); |
|
98 |
InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect"); |
|
99 |
BufferedReader reader = new BufferedReader(isr); |
|
100 |
check (reader.readLine().equals("ABCD"), "first read gets text"); |
|
101 |
// used to return "ABCD" on second and subsequent reads |
|
102 |
check (reader.readLine() == null, "second read gets null"); |
|
103 |
} |
|
104 |
||
105 |
//---------------------------------------------------------------- |
|
106 |
// Check all Japanese chars for sanity |
|
107 |
//---------------------------------------------------------------- |
|
108 |
String SJIS = SJISName(); |
|
109 |
String EUCJ = EUCJName(); |
|
110 |
out.printf("SJIS charset is %s%n", SJIS); |
|
111 |
out.printf("EUCJ charset is %s%n", EUCJ); |
|
112 |
||
113 |
int cnt2022 = 0; |
|
114 |
int cnteucj = 0; |
|
115 |
int cntsjis = 0; |
|
116 |
int cntBAD = 0; |
|
117 |
for (char c = '\u0000'; c < '\uffff'; c++) { |
|
118 |
if (c == '\u001b' || // ESC |
|
119 |
c == '\u2014') // Em-Dash? |
|
120 |
continue; |
|
121 |
String s = new String (new char[] {c}); |
|
122 |
||
123 |
//---------------------------------------------------------------- |
|
124 |
// JISAutoDetect can handle all chars that EUC-JP can, |
|
125 |
// unless there is an ambiguity with SJIS. |
|
126 |
//---------------------------------------------------------------- |
|
127 |
byte[] beucj = s.getBytes(EUCJ); |
|
128 |
String seucj = new String(beucj, EUCJ); |
|
129 |
if (seucj.equals(s)) { |
|
130 |
cnteucj++; |
|
131 |
String sauto = new String(beucj, "JISAutoDetect"); |
|
132 |
||
133 |
if (! sauto.equals(seucj)) { |
|
134 |
cntBAD++; |
|
135 |
String ssjis = new String(beucj, SJIS); |
|
136 |
if (! sauto.equals(ssjis)) { |
|
137 |
fail("Autodetection agrees with neither EUC nor SJIS"); |
|
138 |
} |
|
139 |
} |
|
140 |
} else |
|
141 |
continue; // Optimization |
|
142 |
||
143 |
//---------------------------------------------------------------- |
|
144 |
// JISAutoDetect can handle all chars that ISO-2022-JP can. |
|
145 |
//---------------------------------------------------------------- |
|
146 |
byte[] b2022 = s.getBytes("ISO-2022-JP"); |
|
147 |
if (new String(b2022, "ISO-2022-JP").equals(s)) { |
|
148 |
cnt2022++; |
|
149 |
check(new String(b2022,"JISAutoDetect").equals(s), |
|
150 |
"ISO2022 autodetection"); |
|
151 |
} |
|
152 |
||
153 |
//---------------------------------------------------------------- |
|
154 |
// JISAutoDetect can handle almost all chars that SJIS can. |
|
155 |
//---------------------------------------------------------------- |
|
156 |
byte[] bsjis = s.getBytes(SJIS); |
|
157 |
if (new String(bsjis, SJIS).equals(s)) { |
|
158 |
cntsjis++; |
|
159 |
check(new String(bsjis,"JISAutoDetect").equals(s), |
|
160 |
"SJIS autodetection"); |
|
161 |
} |
|
162 |
} |
|
163 |
out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022); |
|
164 |
out.printf("There are %d SJIS-encodable characters.%n", cntsjis); |
|
165 |
out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj); |
|
166 |
out.printf("There are %d characters that are " + |
|
167 |
"misdetected as SJIS after being EUC-encoded.%n", cntBAD); |
|
168 |
||
169 |
||
170 |
//---------------------------------------------------------------- |
|
171 |
// tests for specific byte sequences |
|
172 |
//---------------------------------------------------------------- |
|
173 |
test("ISO-2022-JP", new byte[] {'A', 'B', 'C'}); |
|
174 |
test("EUC-JP", new byte[] {'A', 'B', 'C'}); |
|
175 |
test("SJIS", new byte[] {'A', 'B', 'C'}); |
|
176 |
||
177 |
test("SJIS", |
|
178 |
new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't', |
|
179 |
' ', (byte)0xa9, ' ', '1', '9', '9', '8' }); |
|
180 |
||
181 |
test("SJIS", |
|
182 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
|
183 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
|
184 |
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, |
|
185 |
(byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 }); |
|
186 |
||
187 |
test("EUC-JP", |
|
188 |
new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9, |
|
189 |
(byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca }); |
|
190 |
||
191 |
test("SJIS", |
|
192 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
|
193 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
|
194 |
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde}); |
|
195 |
||
196 |
test("SJIS", |
|
197 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
|
198 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
|
199 |
(byte)0xc3, (byte)0xd1, (byte)0xbd }); |
|
200 |
||
201 |
test("SJIS", |
|
202 |
new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa }); |
|
203 |
||
204 |
test("EUC-JP", |
|
205 |
new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20}); |
|
206 |
||
207 |
test("EUC-JP", |
|
208 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
|
209 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
|
210 |
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, |
|
211 |
(byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 }); |
|
212 |
||
213 |
test("ISO-2022-JP", |
|
214 |
new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' }); |
|
215 |
||
216 |
||
217 |
//---------------------------------------------------------------- |
|
218 |
// Check handling of ambiguous end-of-input in middle of first char |
|
219 |
//---------------------------------------------------------------- |
|
220 |
{ |
|
221 |
CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder(); |
|
222 |
ByteBuffer bb = ByteBuffer.allocate(128); |
|
223 |
CharBuffer cb = CharBuffer.allocate(128); |
|
224 |
bb.put((byte)'A').put((byte)0x8f); |
|
225 |
bb.flip(); |
|
226 |
CoderResult res = dc.decode(bb,cb,false); |
|
227 |
check(res.isUnderflow(), "isUnderflow"); |
|
228 |
check(bb.position() == 1, "bb.position()"); |
|
229 |
check(cb.position() == 1, "cb.position()"); |
|
230 |
res = dc.decode(bb,cb,false); |
|
231 |
check(res.isUnderflow(), "isUnderflow"); |
|
232 |
check(bb.position() == 1, "bb.position()"); |
|
233 |
check(cb.position() == 1, "cb.position()"); |
|
234 |
bb.compact(); |
|
235 |
bb.put((byte)0xa1); |
|
236 |
bb.flip(); |
|
237 |
res = dc.decode(bb,cb,true); |
|
238 |
check(res.isUnderflow(), "isUnderflow"); |
|
239 |
check(bb.position() == 2, "bb.position()"); |
|
240 |
check(cb.position() == 2, "cb.position()"); |
|
241 |
} |
|
242 |
||
32038
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
243 |
// test #8022224 |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
244 |
Charset cs = Charset.forName("x-JISAutoDetect"); |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
245 |
ByteBuffer bb = ByteBuffer.wrap(new byte[] { 'a', 0x1b, 0x24, 0x40 }); |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
246 |
CharBuffer cb = CharBuffer.wrap(new char[10]); |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
247 |
CoderResult cr = cs.newDecoder().decode(bb, cb, false); |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
248 |
bb.rewind(); |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
249 |
cb.clear().limit(1); |
911671c0c520
8022224: Rare bug in JISAutodetect charset detected by FindDecoderBugs test
sherman
parents:
5506
diff
changeset
|
250 |
check(cr == cs.newDecoder().decode(bb, cb, false), "#8022224"); |
796 | 251 |
|
252 |
if (failures > 0) |
|
253 |
throw new RuntimeException(failures + " tests failed"); |
|
254 |
} |
|
255 |
||
256 |
static void checkCoderResult(CoderResult result) { |
|
257 |
check(result.isUnderflow(), |
|
258 |
"Unexpected coder result: " + result); |
|
259 |
} |
|
260 |
||
261 |
static void test(String expectedCharset, byte[] input) throws Exception { |
|
262 |
Charset cs = Charset.forName("x-JISAutoDetect"); |
|
263 |
CharsetDecoder autoDetect = cs.newDecoder(); |
|
264 |
||
265 |
Charset cs2 = Charset.forName(expectedCharset); |
|
266 |
CharsetDecoder decoder = cs2.newDecoder(); |
|
267 |
||
268 |
ByteBuffer bb = ByteBuffer.allocate(128); |
|
269 |
CharBuffer charOutput = CharBuffer.allocate(128); |
|
270 |
CharBuffer charExpected = CharBuffer.allocate(128); |
|
271 |
||
272 |
bb.put(input); |
|
273 |
bb.flip(); |
|
274 |
bb.mark(); |
|
275 |
||
276 |
CoderResult result = autoDetect.decode(bb, charOutput, true); |
|
277 |
checkCoderResult(result); |
|
278 |
charOutput.flip(); |
|
279 |
String actual = charOutput.toString(); |
|
280 |
||
281 |
bb.reset(); |
|
282 |
||
283 |
result = decoder.decode(bb, charExpected, true); |
|
284 |
checkCoderResult(result); |
|
285 |
charExpected.flip(); |
|
286 |
String expected = charExpected.toString(); |
|
287 |
||
288 |
check(actual.equals(expected), |
|
289 |
String.format("actual=%s expected=%s", actual, expected)); |
|
290 |
} |
|
291 |
} |