796
|
1 |
/*
|
5506
|
2 |
* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
|
796
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation.
|
|
8 |
*
|
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
13 |
* accompanied this code).
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License version
|
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 |
*
|
5506
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
20 |
* or visit www.oracle.com if you need additional information or have any
|
|
21 |
* questions.
|
796
|
22 |
*/
|
|
23 |
|
|
24 |
/*
|
|
25 |
* @test
|
|
26 |
* @bug 4831163 5053096 5056440
|
|
27 |
* @summary NIO charset basic verification of JISAutodetect decoder
|
|
28 |
* @author Martin Buchholz
|
|
29 |
*/
|
|
30 |
|
|
31 |
import java.io.*;
|
|
32 |
import java.nio.ByteBuffer;
|
|
33 |
import java.nio.CharBuffer;
|
|
34 |
import java.nio.charset.Charset;
|
|
35 |
import java.nio.charset.CharsetDecoder;
|
|
36 |
import java.nio.charset.CoderResult;
|
|
37 |
import static java.lang.System.*;
|
|
38 |
|
|
39 |
public class NIOJISAutoDetectTest {
|
|
40 |
private static int failures = 0;
|
|
41 |
|
|
42 |
private static void fail(String failureMsg) {
|
|
43 |
System.out.println(failureMsg);
|
|
44 |
failures++;
|
|
45 |
}
|
|
46 |
|
|
47 |
private static void check(boolean cond, String msg) {
|
|
48 |
if (!cond) {
|
|
49 |
fail("test failed: " + msg);
|
|
50 |
new Exception().printStackTrace();
|
|
51 |
}
|
|
52 |
}
|
|
53 |
|
|
54 |
private static String SJISName() throws Exception {
|
|
55 |
return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd,
|
|
56 |
(byte)0xcf, (byte)0xb2});
|
|
57 |
}
|
|
58 |
|
|
59 |
private static String EUCJName() throws Exception {
|
|
60 |
return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2,
|
|
61 |
(byte)0xa4, (byte)0xe9});
|
|
62 |
}
|
|
63 |
|
|
64 |
private static String detectingCharset(byte[] bytes) throws Exception {
|
|
65 |
//----------------------------------------------------------------
|
|
66 |
// Test special public methods of CharsetDecoder while we're here
|
|
67 |
//----------------------------------------------------------------
|
|
68 |
CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder();
|
|
69 |
check(cd.isAutoDetecting(), "isAutodecting()");
|
|
70 |
check(! cd.isCharsetDetected(), "isCharsetDetected");
|
|
71 |
cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'}));
|
|
72 |
check(! cd.isCharsetDetected(), "isCharsetDetected");
|
|
73 |
try {
|
|
74 |
cd.detectedCharset();
|
|
75 |
fail("no IllegalStateException");
|
|
76 |
} catch (IllegalStateException e) {}
|
|
77 |
cd.decode(ByteBuffer.wrap(bytes));
|
|
78 |
check(cd.isCharsetDetected(), "isCharsetDetected");
|
|
79 |
Charset cs = cd.detectedCharset();
|
|
80 |
check(cs != null, "cs != null");
|
|
81 |
check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()");
|
|
82 |
return cs.name();
|
|
83 |
}
|
|
84 |
|
|
85 |
public static void main(String[] argv) throws Exception {
|
|
86 |
//----------------------------------------------------------------
|
|
87 |
// Used to throw BufferOverflowException
|
|
88 |
//----------------------------------------------------------------
|
|
89 |
out.println(new String(new byte[] {0x61}, "JISAutoDetect"));
|
|
90 |
|
|
91 |
//----------------------------------------------------------------
|
|
92 |
// InputStreamReader(...JISAutoDetect) used to infloop
|
|
93 |
//----------------------------------------------------------------
|
|
94 |
{
|
|
95 |
byte[] bytes = "ABCD\n".getBytes();
|
|
96 |
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
|
|
97 |
InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect");
|
|
98 |
BufferedReader reader = new BufferedReader(isr);
|
|
99 |
check (reader.readLine().equals("ABCD"), "first read gets text");
|
|
100 |
// used to return "ABCD" on second and subsequent reads
|
|
101 |
check (reader.readLine() == null, "second read gets null");
|
|
102 |
}
|
|
103 |
|
|
104 |
//----------------------------------------------------------------
|
|
105 |
// Check all Japanese chars for sanity
|
|
106 |
//----------------------------------------------------------------
|
|
107 |
String SJIS = SJISName();
|
|
108 |
String EUCJ = EUCJName();
|
|
109 |
out.printf("SJIS charset is %s%n", SJIS);
|
|
110 |
out.printf("EUCJ charset is %s%n", EUCJ);
|
|
111 |
|
|
112 |
int cnt2022 = 0;
|
|
113 |
int cnteucj = 0;
|
|
114 |
int cntsjis = 0;
|
|
115 |
int cntBAD = 0;
|
|
116 |
for (char c = '\u0000'; c < '\uffff'; c++) {
|
|
117 |
if (c == '\u001b' || // ESC
|
|
118 |
c == '\u2014') // Em-Dash?
|
|
119 |
continue;
|
|
120 |
String s = new String (new char[] {c});
|
|
121 |
|
|
122 |
//----------------------------------------------------------------
|
|
123 |
// JISAutoDetect can handle all chars that EUC-JP can,
|
|
124 |
// unless there is an ambiguity with SJIS.
|
|
125 |
//----------------------------------------------------------------
|
|
126 |
byte[] beucj = s.getBytes(EUCJ);
|
|
127 |
String seucj = new String(beucj, EUCJ);
|
|
128 |
if (seucj.equals(s)) {
|
|
129 |
cnteucj++;
|
|
130 |
String sauto = new String(beucj, "JISAutoDetect");
|
|
131 |
|
|
132 |
if (! sauto.equals(seucj)) {
|
|
133 |
cntBAD++;
|
|
134 |
String ssjis = new String(beucj, SJIS);
|
|
135 |
if (! sauto.equals(ssjis)) {
|
|
136 |
fail("Autodetection agrees with neither EUC nor SJIS");
|
|
137 |
}
|
|
138 |
}
|
|
139 |
} else
|
|
140 |
continue; // Optimization
|
|
141 |
|
|
142 |
//----------------------------------------------------------------
|
|
143 |
// JISAutoDetect can handle all chars that ISO-2022-JP can.
|
|
144 |
//----------------------------------------------------------------
|
|
145 |
byte[] b2022 = s.getBytes("ISO-2022-JP");
|
|
146 |
if (new String(b2022, "ISO-2022-JP").equals(s)) {
|
|
147 |
cnt2022++;
|
|
148 |
check(new String(b2022,"JISAutoDetect").equals(s),
|
|
149 |
"ISO2022 autodetection");
|
|
150 |
}
|
|
151 |
|
|
152 |
//----------------------------------------------------------------
|
|
153 |
// JISAutoDetect can handle almost all chars that SJIS can.
|
|
154 |
//----------------------------------------------------------------
|
|
155 |
byte[] bsjis = s.getBytes(SJIS);
|
|
156 |
if (new String(bsjis, SJIS).equals(s)) {
|
|
157 |
cntsjis++;
|
|
158 |
check(new String(bsjis,"JISAutoDetect").equals(s),
|
|
159 |
"SJIS autodetection");
|
|
160 |
}
|
|
161 |
}
|
|
162 |
out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022);
|
|
163 |
out.printf("There are %d SJIS-encodable characters.%n", cntsjis);
|
|
164 |
out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj);
|
|
165 |
out.printf("There are %d characters that are " +
|
|
166 |
"misdetected as SJIS after being EUC-encoded.%n", cntBAD);
|
|
167 |
|
|
168 |
|
|
169 |
//----------------------------------------------------------------
|
|
170 |
// tests for specific byte sequences
|
|
171 |
//----------------------------------------------------------------
|
|
172 |
test("ISO-2022-JP", new byte[] {'A', 'B', 'C'});
|
|
173 |
test("EUC-JP", new byte[] {'A', 'B', 'C'});
|
|
174 |
test("SJIS", new byte[] {'A', 'B', 'C'});
|
|
175 |
|
|
176 |
test("SJIS",
|
|
177 |
new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't',
|
|
178 |
' ', (byte)0xa9, ' ', '1', '9', '9', '8' });
|
|
179 |
|
|
180 |
test("SJIS",
|
|
181 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
|
|
182 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
|
|
183 |
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
|
|
184 |
(byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 });
|
|
185 |
|
|
186 |
test("EUC-JP",
|
|
187 |
new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9,
|
|
188 |
(byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca });
|
|
189 |
|
|
190 |
test("SJIS",
|
|
191 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
|
|
192 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
|
|
193 |
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde});
|
|
194 |
|
|
195 |
test("SJIS",
|
|
196 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
|
|
197 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
|
|
198 |
(byte)0xc3, (byte)0xd1, (byte)0xbd });
|
|
199 |
|
|
200 |
test("SJIS",
|
|
201 |
new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa });
|
|
202 |
|
|
203 |
test("EUC-JP",
|
|
204 |
new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20});
|
|
205 |
|
|
206 |
test("EUC-JP",
|
|
207 |
new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
|
|
208 |
(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
|
|
209 |
(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
|
|
210 |
(byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 });
|
|
211 |
|
|
212 |
test("ISO-2022-JP",
|
|
213 |
new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' });
|
|
214 |
|
|
215 |
|
|
216 |
//----------------------------------------------------------------
|
|
217 |
// Check handling of ambiguous end-of-input in middle of first char
|
|
218 |
//----------------------------------------------------------------
|
|
219 |
{
|
|
220 |
CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder();
|
|
221 |
ByteBuffer bb = ByteBuffer.allocate(128);
|
|
222 |
CharBuffer cb = CharBuffer.allocate(128);
|
|
223 |
bb.put((byte)'A').put((byte)0x8f);
|
|
224 |
bb.flip();
|
|
225 |
CoderResult res = dc.decode(bb,cb,false);
|
|
226 |
check(res.isUnderflow(), "isUnderflow");
|
|
227 |
check(bb.position() == 1, "bb.position()");
|
|
228 |
check(cb.position() == 1, "cb.position()");
|
|
229 |
res = dc.decode(bb,cb,false);
|
|
230 |
check(res.isUnderflow(), "isUnderflow");
|
|
231 |
check(bb.position() == 1, "bb.position()");
|
|
232 |
check(cb.position() == 1, "cb.position()");
|
|
233 |
bb.compact();
|
|
234 |
bb.put((byte)0xa1);
|
|
235 |
bb.flip();
|
|
236 |
res = dc.decode(bb,cb,true);
|
|
237 |
check(res.isUnderflow(), "isUnderflow");
|
|
238 |
check(bb.position() == 2, "bb.position()");
|
|
239 |
check(cb.position() == 2, "cb.position()");
|
|
240 |
}
|
|
241 |
|
|
242 |
|
|
243 |
if (failures > 0)
|
|
244 |
throw new RuntimeException(failures + " tests failed");
|
|
245 |
}
|
|
246 |
|
|
247 |
static void checkCoderResult(CoderResult result) {
|
|
248 |
check(result.isUnderflow(),
|
|
249 |
"Unexpected coder result: " + result);
|
|
250 |
}
|
|
251 |
|
|
252 |
static void test(String expectedCharset, byte[] input) throws Exception {
|
|
253 |
Charset cs = Charset.forName("x-JISAutoDetect");
|
|
254 |
CharsetDecoder autoDetect = cs.newDecoder();
|
|
255 |
|
|
256 |
Charset cs2 = Charset.forName(expectedCharset);
|
|
257 |
CharsetDecoder decoder = cs2.newDecoder();
|
|
258 |
|
|
259 |
ByteBuffer bb = ByteBuffer.allocate(128);
|
|
260 |
CharBuffer charOutput = CharBuffer.allocate(128);
|
|
261 |
CharBuffer charExpected = CharBuffer.allocate(128);
|
|
262 |
|
|
263 |
bb.put(input);
|
|
264 |
bb.flip();
|
|
265 |
bb.mark();
|
|
266 |
|
|
267 |
CoderResult result = autoDetect.decode(bb, charOutput, true);
|
|
268 |
checkCoderResult(result);
|
|
269 |
charOutput.flip();
|
|
270 |
String actual = charOutput.toString();
|
|
271 |
|
|
272 |
bb.reset();
|
|
273 |
|
|
274 |
result = decoder.decode(bb, charExpected, true);
|
|
275 |
checkCoderResult(result);
|
|
276 |
charExpected.flip();
|
|
277 |
String expected = charExpected.toString();
|
|
278 |
|
|
279 |
check(actual.equals(expected),
|
|
280 |
String.format("actual=%s expected=%s", actual, expected));
|
|
281 |
}
|
|
282 |
}
|