author | never |
Mon, 12 Jul 2010 22:27:18 -0700 | |
changeset 5926 | a36f90d986b6 |
parent 5506 | 202f599c92aa |
child 44115 | bb4e971bf5d4 |
permissions | -rw-r--r-- |
796 | 1 |
/* |
5506 | 2 |
* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. |
796 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5506 | 19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 |
* or visit www.oracle.com if you need additional information or have any |
|
21 |
* questions. |
|
796 | 22 |
*/ |
23 |
||
24 |
/* @test |
|
2913
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
25 |
@bug 4779029 4924625 6392664 6730652 |
796 | 26 |
@summary Test decoding of various permutations of valid ISO-2022-CN byte sequences |
27 |
*/ |
|
28 |
||
29 |
/* |
|
30 |
* Regression test for NIO ISO-2022-CN decoder. Passes various valid |
|
31 |
* ISO-2022-CN byte sequences to the decoder using the java.io |
|
32 |
* InputStreamReader API |
|
33 |
*/ |
|
34 |
||
35 |
import java.io.*; |
|
36 |
import java.nio.*; |
|
37 |
import java.nio.charset.*; |
|
38 |
||
39 |
public class TestISO2022CNDecoder |
|
40 |
{ |
|
41 |
private static String encodingName = "ISO2022CN"; |
|
42 |
||
43 |
// |
|
44 |
// Positive tests -- test both output and input processing against |
|
45 |
// various "known good" data |
|
46 |
// |
|
47 |
private static boolean decodeTest ( |
|
48 |
byte encoded[], |
|
49 |
char decoded[], |
|
50 |
String label) |
|
51 |
{ |
|
52 |
boolean retval = true; |
|
53 |
int i = 0; |
|
54 |
||
55 |
try { |
|
56 |
// |
|
57 |
// Ensure that reading decodes correctly |
|
58 |
// |
|
59 |
ByteArrayInputStream in; |
|
60 |
InputStreamReader reader; |
|
61 |
||
62 |
in = new ByteArrayInputStream(encoded); |
|
63 |
reader = new InputStreamReader(in, encodingName); |
|
64 |
||
65 |
for (i = 0; i < decoded.length; i++) { |
|
66 |
int c = reader.read(); |
|
67 |
||
68 |
if (c != decoded[i]) { |
|
69 |
System.err.print(label + ": read failed, char " + i); |
|
70 |
System.err.print(" ... expected 0x" |
|
71 |
+ Integer.toHexString(decoded[i])); |
|
72 |
if (c == -1) |
|
73 |
System.err.println(", got EOF"); |
|
74 |
else |
|
75 |
System.err.println(", got 0x" |
|
76 |
+ Integer.toHexString(c)); |
|
77 |
retval = false; |
|
78 |
if (c == -1) |
|
79 |
return retval; |
|
80 |
} |
|
81 |
} |
|
82 |
||
83 |
int testChar; |
|
84 |
if ((testChar = reader.read()) != -1) { |
|
85 |
System.err.println(label + ": read failed, no EOF"); |
|
86 |
System.err.println("testChar is " + |
|
87 |
Integer.toHexString((int)testChar)); |
|
88 |
return false; |
|
89 |
} |
|
90 |
String decodedString = new String(encoded, "ISO2022CN"); |
|
91 |
||
92 |
for (i = 0; i < decodedString.length(); i++) { |
|
93 |
if (decodedString.charAt(i) != decoded[i]) |
|
94 |
System.err.println(label + ": read failed, char " + i); |
|
95 |
} |
|
96 |
||
97 |
CharsetDecoder dec = Charset.forName("ISO2022CN") |
|
98 |
.newDecoder() |
|
99 |
.onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
100 |
.onMalformedInput(CodingErrorAction.REPLACE); |
|
101 |
ByteBuffer bb = ByteBuffer.allocateDirect(encoded.length).put(encoded); |
|
102 |
bb.flip(); |
|
103 |
CharBuffer cb = ByteBuffer.allocateDirect(2*encoded.length*(int)dec.maxCharsPerByte()) |
|
104 |
.asCharBuffer(); |
|
105 |
if (bb.hasArray() || cb.hasArray()) { |
|
106 |
System.err.println(label + ": directBuffer failed, "); |
|
107 |
return false; |
|
108 |
} |
|
109 |
if (!dec.decode(bb, cb, true).isUnderflow()) { |
|
110 |
System.err.println(label + ": decoder's decode() failed!"); |
|
111 |
return false; |
|
112 |
} |
|
113 |
cb.flip(); |
|
114 |
for (i = 0; i < cb.limit(); i++) { |
|
115 |
if (cb.get() != decoded[i]) |
|
116 |
System.err.println(label + ": decoder failed, char " + i); |
|
117 |
} |
|
118 |
||
119 |
} catch (Exception e) { |
|
120 |
System.err.println(label + ": failed " |
|
121 |
+ "(i = " + i + "), " |
|
122 |
+ e.getClass().getName() |
|
123 |
+ ", " + e.getMessage()); |
|
124 |
e.printStackTrace(); |
|
125 |
return false; |
|
126 |
} |
|
127 |
return retval; |
|
128 |
} |
|
129 |
||
130 |
private static boolean equal(CoderResult a, CoderResult b) { |
|
131 |
return (a == CoderResult.OVERFLOW && b == CoderResult.OVERFLOW) || |
|
132 |
(a == CoderResult.UNDERFLOW && b == CoderResult.UNDERFLOW) || |
|
133 |
((a.isError() == b.isError()) && |
|
134 |
(a.isMalformed() == b.isMalformed()) && |
|
135 |
(a.isUnmappable() == b.isUnmappable()) && |
|
136 |
(a.length() == b.length())); |
|
137 |
} |
|
138 |
||
139 |
private static boolean decodeResultTest (byte encoded[], |
|
140 |
CoderResult expected, |
|
141 |
String label) { |
|
142 |
CharsetDecoder dec = Charset.forName("ISO2022CN").newDecoder(); |
|
143 |
ByteBuffer bb = ByteBuffer.wrap(encoded); |
|
144 |
CharBuffer cb = CharBuffer.allocate(encoded.length*(int)dec.maxCharsPerByte()); |
|
145 |
CoderResult result = dec.decode(bb, cb, true); |
|
146 |
if (!equal(result, expected)) { |
|
147 |
System.err.println(label + ": decoder's decode() failed!"); |
|
148 |
return false; |
|
149 |
} |
|
150 |
||
151 |
bb = ByteBuffer.allocateDirect(encoded.length).put(encoded); |
|
152 |
bb.flip(); |
|
153 |
cb = ByteBuffer.allocateDirect(2*encoded.length*(int)dec.maxCharsPerByte()) |
|
154 |
.asCharBuffer(); |
|
155 |
if (bb.hasArray() || cb.hasArray()) { |
|
156 |
System.err.println(label + ": directBuffer failed, "); |
|
157 |
return false; |
|
158 |
} |
|
159 |
result = dec.reset().decode(bb, cb, true); |
|
160 |
if (!equal(result, expected)) { |
|
161 |
System.err.println(label + ": decoder's decode() - direct failed!"); |
|
162 |
return false; |
|
163 |
} |
|
164 |
return true; |
|
165 |
} |
|
166 |
||
167 |
// |
|
168 |
// Negative tests -- only for input processing, make sure that |
|
169 |
// invalid or corrupt characters are rejected. |
|
170 |
// |
|
171 |
private static boolean negative (byte encoded [], String label) |
|
172 |
{ |
|
173 |
try { |
|
174 |
ByteArrayInputStream in; |
|
175 |
InputStreamReader reader; |
|
176 |
int c; |
|
177 |
||
178 |
in = new ByteArrayInputStream(encoded); |
|
179 |
reader = new InputStreamReader(in, encodingName); |
|
180 |
||
181 |
c = reader.read(); |
|
182 |
System.err.print (label + ": read failed, "); |
|
183 |
||
184 |
if (c == -1) |
|
185 |
System.err.println("reported EOF"); |
|
186 |
else |
|
187 |
System.err.println("returned char 0x" |
|
188 |
+ Integer.toHexString(c) |
|
189 |
+ ", expected exception"); |
|
190 |
return false; |
|
191 |
||
192 |
} catch (CharConversionException e) { |
|
193 |
return true; |
|
194 |
||
195 |
} catch (Throwable t) { |
|
196 |
System.err.println(label + ": failed, threw " |
|
197 |
+ t.getClass().getName() |
|
198 |
+ ", " + t.getMessage()); |
|
199 |
} |
|
200 |
return false; |
|
201 |
} |
|
202 |
||
203 |
private static boolean decodeTest6392664 () { |
|
204 |
try { |
|
205 |
CharsetDecoder dec = Charset.forName("ISO-2022-CN-GB").newDecoder(); |
|
206 |
dec.decode(ByteBuffer.wrap(new byte[] {(byte)0x0e, (byte)0x42, (byte)0x43 })); |
|
207 |
} catch (Exception e) { |
|
208 |
e.printStackTrace(); |
|
209 |
return false; |
|
210 |
} |
|
211 |
return true; |
|
212 |
} |
|
213 |
||
214 |
// |
|
215 |
// TEST #0: 7-bit unshifted values, |
|
216 |
// shift-in of a valid decodable GB2312-80 |
|
217 |
// character and an unmappable GB2312-80 char |
|
218 |
// This is a positive test. |
|
219 |
// |
|
220 |
private static byte test0_bytes[] = { |
|
221 |
(byte)0x00, |
|
222 |
(byte)0x01, (byte)0x02, (byte)0x03, |
|
223 |
(byte)0x0E, (byte)0x21, (byte)0x2f, |
|
224 |
(byte)0x0E, (byte)0xDD, (byte)0x9f |
|
225 |
}; |
|
226 |
||
227 |
private static char test0_chars[] = { |
|
228 |
0x0000, |
|
229 |
0x0001, 0x0002, 0x0003, |
|
230 |
0x2019, |
|
231 |
0xFFFD |
|
232 |
}; |
|
233 |
||
234 |
private static byte test1_bytes[] = { |
|
235 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, (byte)0x21, |
|
236 |
(byte)0x2f }; |
|
237 |
||
238 |
private static char test1_chars[] = { |
|
239 |
0x21, 0x2f |
|
240 |
}; |
|
241 |
||
242 |
private static byte test2_bytes[] = { |
|
243 |
(byte)0x0e, |
|
244 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, |
|
245 |
(byte)0x21, (byte)0x2f }; |
|
246 |
||
247 |
private static char test2_chars[] = { |
|
248 |
0x2019 |
|
249 |
}; |
|
250 |
||
251 |
private static byte test3_bytes[] = { |
|
252 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, |
|
253 |
(byte)0x0e, |
|
254 |
(byte)0x21, (byte)0x2f }; |
|
255 |
||
256 |
private static byte test3a_bytes[] = { |
|
257 |
(byte)0x1b, (byte)0x24, (byte)0x41, |
|
258 |
(byte)0x0e, |
|
259 |
(byte)0x21, (byte)0x2f }; |
|
260 |
||
261 |
private static char test3_chars[] = { |
|
262 |
0x2019 |
|
263 |
}; |
|
264 |
||
265 |
private static byte test4_bytes[] = { |
|
266 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, |
|
267 |
(byte)0x0f, |
|
268 |
(byte)0x21, (byte)0x2f }; |
|
269 |
||
270 |
private static char test4_chars[] = { |
|
271 |
0x21, 0x2f |
|
272 |
}; |
|
273 |
||
274 |
private static byte test5_bytes[] = { |
|
275 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, |
|
276 |
(byte)0x0e, (byte)0x21, (byte)0x2e, |
|
277 |
(byte)0x0f, (byte)0x21, (byte)0x2f }; |
|
278 |
||
279 |
private static char test5_chars[] = { |
|
280 |
0x2018, 0x21, 0x2f |
|
281 |
}; |
|
282 |
||
283 |
private static byte test6_bytes[] = { |
|
284 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, |
|
285 |
(byte)0x0e, (byte)0x21, (byte)0x2e, |
|
286 |
(byte)0x21, (byte)0x2f }; |
|
287 |
||
288 |
private static char test6_chars[] = { |
|
289 |
0x2018, 0x2019 |
|
290 |
}; |
|
291 |
||
292 |
private static byte test7_bytes[] = { |
|
293 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)'G', |
|
294 |
(byte)0x0e, (byte)0x21, (byte)0x2e, |
|
295 |
(byte)0x21, (byte)0x2f }; |
|
296 |
||
297 |
private static char test7_chars[] = { |
|
298 |
0xFE50, 0xFE51 |
|
299 |
}; |
|
300 |
||
301 |
private static byte test8_bytes[] = { |
|
302 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)'G', |
|
303 |
(byte)0x0e, (byte)0x21, (byte)0x2e, |
|
304 |
(byte)0x0f, (byte)0x21, (byte)0x2f }; |
|
305 |
||
306 |
private static char test8_chars[] = { |
|
307 |
0xFE50, 0x21, 0x2f |
|
308 |
}; |
|
309 |
||
310 |
private static byte test9_bytes[] = { |
|
311 |
(byte)0x1b, (byte)0x24, (byte)0x2a, (byte)'H', |
|
312 |
(byte)0x1b, (byte)0x4e, |
|
313 |
(byte)0x21, (byte)0x2f }; |
|
314 |
||
315 |
private static char test9_chars[] = { |
|
316 |
0x4e0e |
|
317 |
}; |
|
318 |
||
319 |
/* |
|
320 |
* Plane 3 support provided for compatibility with |
|
321 |
* sun.io ISO2022_CN decoder. Officially ISO-2022-CN |
|
322 |
* just handles planes 1/2 of CNS-11643 (1986) |
|
323 |
* Test case data below verifies this compatibility |
|
324 |
* |
|
325 |
*/ |
|
326 |
||
327 |
private static byte test10_bytes[] = { |
|
328 |
(byte)0x1b, (byte)0x24, (byte)'+', (byte)'I', |
|
329 |
(byte)0x1b, (byte)0x4f, |
|
330 |
(byte)0x21, (byte)0x2f }; |
|
331 |
||
332 |
private static char test10_chars[] = { |
|
333 |
0x51e2 |
|
334 |
}; |
|
335 |
||
336 |
private static byte test11_bytes[] = { |
|
337 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, //SO Designator |
|
338 |
(byte)0x0e, //SO |
|
339 |
(byte)0x21, (byte)0x2e, //GB2312 char |
|
340 |
(byte)0x1b, (byte)0x24, (byte)0x2a, (byte)'H', //SS2 Designator |
|
341 |
(byte)0x1b, (byte)0x4e, //SS2 |
|
342 |
(byte)0x21, (byte)0x2f, //CNS-P2 char |
|
343 |
(byte)0x21, (byte)0x2f //GB2312 char |
|
344 |
}; |
|
345 |
||
346 |
private static char test11_chars[] = { |
|
347 |
0x2018, |
|
348 |
0x4e0e, |
|
349 |
0x2019 |
|
350 |
}; |
|
351 |
||
352 |
private static byte test12_bytes[] = { |
|
353 |
(byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, //SO Designator |
|
354 |
(byte)0x0e, //SO |
|
355 |
(byte)0x21, (byte)0x2e, //GB2312 char |
|
356 |
(byte)0x1b, (byte)0x24, (byte)'+', (byte)'I', //SS3 Designator |
|
357 |
(byte)0x1b, (byte)0x4f, //SS3 |
|
358 |
(byte)0x21, (byte)0x2f, //CNS-P2 char |
|
359 |
(byte)0x21, (byte)0x2f //GB2312 char |
|
360 |
}; |
|
361 |
||
362 |
private static char test12_chars[] = { |
|
363 |
0x2018, |
|
364 |
0x51e2, |
|
365 |
0x2019 |
|
366 |
}; |
|
367 |
||
368 |
||
369 |
private static byte test13_bytes[] = { |
|
370 |
(byte)0x0f0, // byte with MSB |
|
371 |
}; |
|
372 |
||
373 |
private static char test13_chars[] = { |
|
374 |
0x00f0, |
|
375 |
}; |
|
376 |
||
377 |
private static byte test14_bytes[] = { |
|
378 |
(byte)0x0E, (byte)0x21, (byte)0x2f, |
|
379 |
(byte)0x0E, (byte)0xDD, (byte)0x9f |
|
380 |
}; |
|
381 |
private static CoderResult test14_result = CoderResult.unmappableForLength(2); |
|
382 |
||
383 |
// Current ISO2022CN treats the "out of range" code points as "unmappable" |
|
384 |
private static byte test15_bytes[] = { |
|
385 |
(byte)0x1b, (byte)0x4f, // SS3 |
|
386 |
(byte)0x20, (byte)0x2f, // "out of range" CNS-P2 char |
|
387 |
}; |
|
388 |
private static CoderResult test15_result = CoderResult.unmappableForLength(4); |
|
389 |
||
2913
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
390 |
private static boolean encodeTest6730652 () throws Exception { |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
391 |
//sample p3 codepoints |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
392 |
String strCNSP3 = "\u4e28\u4e36\u4e3f\u4e85\u4e05\u4e04\u5369\u53b6\u4e2a\u4e87\u4e49\u51e2\u56b8\u56b9\u56c4\u8053\u92b0"; |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
393 |
return strCNSP3.equals(new String(strCNSP3.getBytes("x-ISO-2022-CN-CNS"), "x-ISO-2022-CN-CNS")); |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
394 |
} |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
395 |
|
796 | 396 |
/** |
397 |
* Main program to test ISO2022CN conformance |
|
398 |
* |
|
399 |
*/ |
|
400 |
public static void main (String argv []) throws Exception |
|
401 |
{ |
|
402 |
boolean pass = true; |
|
403 |
||
404 |
System.out.println (""); |
|
405 |
System.out.println ("------ checking ISO2022CN decoder -----"); |
|
406 |
||
407 |
// This regtest must be the first one. |
|
408 |
pass &= decodeTest6392664(); |
|
409 |
||
410 |
try { |
|
411 |
new InputStreamReader (System.in, "ISO2022CN"); |
|
412 |
} catch (Exception e) { |
|
413 |
encodingName = "ISO2022CN"; |
|
414 |
System.out.println ("... requires nonstandard encoding name " |
|
415 |
+ encodingName); |
|
416 |
pass &= false; |
|
417 |
} |
|
418 |
||
419 |
// |
|
420 |
// Positive tests -- good data is dealt with correctly |
|
421 |
// |
|
422 |
pass &= decodeTest(test0_bytes, test0_chars, "first batch"); |
|
423 |
pass &= decodeTest(test1_bytes, test1_chars, "escapes1"); |
|
424 |
pass &= decodeTest(test2_bytes, test2_chars, "escapes2"); |
|
425 |
pass &= decodeTest(test3_bytes, test3_chars, "escapes3"); |
|
426 |
pass &= decodeTest(test3a_bytes, test3_chars, "escapes3a"); |
|
427 |
pass &= decodeTest(test4_bytes, test4_chars, "escapes4"); |
|
428 |
pass &= decodeTest(test5_bytes, test5_chars, "escapes5"); |
|
429 |
pass &= decodeTest(test6_bytes, test6_chars, "escapes6"); |
|
430 |
pass &= decodeTest(test7_bytes, test7_chars, "escapes7"); |
|
431 |
pass &= decodeTest(test8_bytes, test8_chars, "escapes8"); |
|
432 |
pass &= decodeTest(test9_bytes, test9_chars, "escapes9"); |
|
433 |
pass &= decodeTest(test10_bytes, test10_chars, "escapes10"); |
|
434 |
pass &= decodeTest(test11_bytes, test11_chars, "escapes11"); |
|
435 |
pass &= decodeTest(test12_bytes, test12_chars, "escapes12"); |
|
436 |
pass &= decodeTest(test13_bytes, test13_chars, "escapes13"); |
|
437 |
pass &= decodeResultTest(test14_bytes, test14_result, "escapes14"); |
|
438 |
pass &= decodeResultTest(test15_bytes, test15_result, "escapes15"); |
|
2913
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
439 |
|
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
440 |
pass &= encodeTest6730652 (); |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
796
diff
changeset
|
441 |
|
796 | 442 |
// PASS/FAIL status is what the whole thing is about. |
443 |
// |
|
444 |
if (! pass) { |
|
445 |
throw new Exception("FAIL -- incorrect ISO-2022-CN"); |
|
446 |
} |
|
447 |
||
448 |
} |
|
449 |
} |