|
1 /* |
|
2 * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
20 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
21 * have any questions. |
|
22 */ |
|
23 |
|
24 /* |
|
25 * @test |
|
26 * @bug 6831794 6229811 |
|
27 * @summary Test EUC_TW charset |
|
28 */ |
|
29 |
|
30 import java.nio.charset.*; |
|
31 import java.nio.*; |
|
32 import java.util.*; |
|
33 |
|
34 public class TestEUC_TW { |
|
35 |
|
36 static class Time { |
|
37 long t; |
|
38 } |
|
39 static int iteration = 1000; |
|
40 |
|
41 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) |
|
42 throws Exception { |
|
43 String csn = cs.name(); |
|
44 CharsetDecoder dec = cs.newDecoder(); |
|
45 ByteBuffer bbf; |
|
46 CharBuffer cbf; |
|
47 if (testDirect) { |
|
48 bbf = ByteBuffer.allocateDirect(bb.length); |
|
49 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); |
|
50 bbf.put(bb); |
|
51 } else { |
|
52 bbf = ByteBuffer.wrap(bb); |
|
53 cbf = CharBuffer.allocate(bb.length); |
|
54 } |
|
55 CoderResult cr = null; |
|
56 long t1 = System.nanoTime()/1000; |
|
57 for (int i = 0; i < iteration; i++) { |
|
58 bbf.rewind(); |
|
59 cbf.clear(); |
|
60 dec.reset(); |
|
61 cr = dec.decode(bbf, cbf, true); |
|
62 } |
|
63 long t2 = System.nanoTime()/1000; |
|
64 if (t != null) |
|
65 t.t = (t2 - t1)/iteration; |
|
66 if (cr != CoderResult.UNDERFLOW) { |
|
67 System.out.println("DEC-----------------"); |
|
68 int pos = bbf.position(); |
|
69 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", |
|
70 cr.toString(), pos, |
|
71 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); |
|
72 throw new RuntimeException("Decoding err: " + csn); |
|
73 } |
|
74 char[] cc = new char[cbf.position()]; |
|
75 cbf.flip(); cbf.get(cc); |
|
76 return cc; |
|
77 |
|
78 } |
|
79 |
|
80 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) |
|
81 throws Exception { |
|
82 CharsetDecoder dec = cs.newDecoder(); |
|
83 ByteBuffer bbf; |
|
84 CharBuffer cbf; |
|
85 if (testDirect) { |
|
86 bbf = ByteBuffer.allocateDirect(bb.length); |
|
87 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); |
|
88 bbf.put(bb).flip(); |
|
89 } else { |
|
90 bbf = ByteBuffer.wrap(bb); |
|
91 cbf = CharBuffer.allocate(bb.length); |
|
92 } |
|
93 return dec.decode(bbf, cbf, true); |
|
94 } |
|
95 |
|
96 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) |
|
97 throws Exception { |
|
98 ByteBuffer bbf; |
|
99 CharBuffer cbf; |
|
100 CharsetEncoder enc = cs.newEncoder(); |
|
101 String csn = cs.name(); |
|
102 if (testDirect) { |
|
103 bbf = ByteBuffer.allocateDirect(cc.length * 4); |
|
104 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); |
|
105 cbf.put(cc).flip(); |
|
106 } else { |
|
107 bbf = ByteBuffer.allocate(cc.length * 4); |
|
108 cbf = CharBuffer.wrap(cc); |
|
109 } |
|
110 CoderResult cr = null; |
|
111 long t1 = System.nanoTime()/1000; |
|
112 for (int i = 0; i < iteration; i++) { |
|
113 cbf.rewind(); |
|
114 bbf.clear(); |
|
115 enc.reset(); |
|
116 cr = enc.encode(cbf, bbf, true); |
|
117 } |
|
118 long t2 = System.nanoTime()/1000; |
|
119 if (t != null) |
|
120 t.t = (t2 - t1)/iteration; |
|
121 if (cr != CoderResult.UNDERFLOW) { |
|
122 System.out.println("ENC-----------------"); |
|
123 int pos = cbf.position(); |
|
124 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", |
|
125 cr.toString(), pos, cc[pos]&0xffff); |
|
126 throw new RuntimeException("Encoding err: " + csn); |
|
127 } |
|
128 byte[] bb = new byte[bbf.position()]; |
|
129 bbf.flip(); bbf.get(bb); |
|
130 return bb; |
|
131 } |
|
132 |
|
133 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) |
|
134 throws Exception { |
|
135 ByteBuffer bbf; |
|
136 CharBuffer cbf; |
|
137 CharsetEncoder enc = cs.newEncoder(); |
|
138 if (testDirect) { |
|
139 bbf = ByteBuffer.allocateDirect(cc.length * 4); |
|
140 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); |
|
141 cbf.put(cc).flip(); |
|
142 } else { |
|
143 bbf = ByteBuffer.allocate(cc.length * 4); |
|
144 cbf = CharBuffer.wrap(cc); |
|
145 } |
|
146 return enc.encode(cbf, bbf, true); |
|
147 } |
|
148 |
|
149 static char[] getEUC_TWChars(boolean skipNR) { |
|
150 //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder(); |
|
151 CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder(); |
|
152 CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder(); |
|
153 char[] cc = new char[0x20000]; |
|
154 char[] c2 = new char[2]; |
|
155 int pos = 0; |
|
156 int i = 0; |
|
157 //bmp |
|
158 for (i = 0; i < 0x10000; i++) { |
|
159 //SKIP these 3 NR codepoints if compared to EUC_TW |
|
160 if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9)) |
|
161 continue; |
|
162 if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) { |
|
163 System.out.printf(" Err i=%x: old=%b new=%b%n", i, |
|
164 encOLD.canEncode((char)i), |
|
165 enc.canEncode((char)i)); |
|
166 throw new RuntimeException("canEncode() err!"); |
|
167 } |
|
168 |
|
169 if (enc.canEncode((char)i)) { |
|
170 cc[pos++] = (char)i; |
|
171 } |
|
172 } |
|
173 |
|
174 //supp |
|
175 CharBuffer cb = CharBuffer.wrap(new char[2]); |
|
176 for (i = 0x20000; i < 0x30000; i++) { |
|
177 Character.toChars(i, c2, 0); |
|
178 cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip(); |
|
179 |
|
180 if (encOLD.canEncode(cb) != enc.canEncode(cb)) { |
|
181 throw new RuntimeException("canEncode() err!"); |
|
182 } |
|
183 |
|
184 if (enc.canEncode(cb)) { |
|
185 //System.out.printf("cp=%x, (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff); |
|
186 cc[pos++] = c2[0]; |
|
187 cc[pos++] = c2[1]; |
|
188 } |
|
189 } |
|
190 |
|
191 return Arrays.copyOf(cc, pos); |
|
192 } |
|
193 |
|
194 static void checkRoundtrip(Charset cs) throws Exception { |
|
195 char[] cc = getEUC_TWChars(false); |
|
196 System.out.printf("Check roundtrip <%s>...", cs.name()); |
|
197 byte[] bb = encode(cc, cs, false, null); |
|
198 char[] ccO = decode(bb, cs, false, null); |
|
199 |
|
200 if (!Arrays.equals(cc, ccO)) { |
|
201 System.out.printf(" non-direct failed"); |
|
202 } |
|
203 bb = encode(cc, cs, true, null); |
|
204 ccO = decode(bb, cs, true, null); |
|
205 if (!Arrays.equals(cc, ccO)) { |
|
206 System.out.printf(" (direct) failed"); |
|
207 } |
|
208 System.out.println(); |
|
209 } |
|
210 |
|
211 static void checkInit(String csn) throws Exception { |
|
212 System.out.printf("Check init <%s>...%n", csn); |
|
213 Charset.forName("Big5"); // load in the ExtendedCharsets |
|
214 long t1 = System.nanoTime()/1000; |
|
215 Charset cs = Charset.forName(csn); |
|
216 long t2 = System.nanoTime()/1000; |
|
217 System.out.printf(" charset :%d%n", t2 - t1); |
|
218 t1 = System.nanoTime()/1000; |
|
219 cs.newDecoder(); |
|
220 t2 = System.nanoTime()/1000; |
|
221 System.out.printf(" new Decoder :%d%n", t2 - t1); |
|
222 |
|
223 t1 = System.nanoTime()/1000; |
|
224 cs.newEncoder(); |
|
225 t2 = System.nanoTime()/1000; |
|
226 System.out.printf(" new Encoder :%d%n", t2 - t1); |
|
227 } |
|
228 |
|
229 static void compare(Charset cs1, Charset cs2) throws Exception { |
|
230 char[] cc = getEUC_TWChars(true); |
|
231 |
|
232 String csn1 = cs1.name(); |
|
233 String csn2 = cs2.name(); |
|
234 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); |
|
235 |
|
236 Time t1 = new Time(); |
|
237 Time t2 = new Time(); |
|
238 |
|
239 byte[] bb1 = encode(cc, cs1, false, t1); |
|
240 byte[] bb2 = encode(cc, cs2, false, t2); |
|
241 |
|
242 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", |
|
243 csn2, csn1, |
|
244 t2.t, t1.t, |
|
245 (double)(t2.t)/(t1.t)); |
|
246 if (!Arrays.equals(bb1, bb2)) { |
|
247 System.out.printf(" encoding failed%n"); |
|
248 } |
|
249 |
|
250 char[] cc2 = decode(bb1, cs2, false, t2); |
|
251 char[] cc1 = decode(bb1, cs1, false, t1); |
|
252 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", |
|
253 csn2, csn1, |
|
254 t2.t, t1.t, |
|
255 (double)(t2.t)/(t1.t)); |
|
256 if (!Arrays.equals(cc1, cc2)) { |
|
257 System.out.printf(" decoding failed%n"); |
|
258 } |
|
259 |
|
260 bb1 = encode(cc, cs1, true, t1); |
|
261 bb2 = encode(cc, cs2, true, t2); |
|
262 |
|
263 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", |
|
264 csn2, csn1, |
|
265 t2.t, t1.t, |
|
266 (double)(t2.t)/(t1.t)); |
|
267 |
|
268 if (!Arrays.equals(bb1, bb2)) |
|
269 System.out.printf(" encoding (direct) failed%n"); |
|
270 |
|
271 cc1 = decode(bb1, cs1, true, t1); |
|
272 cc2 = decode(bb1, cs2, true, t2); |
|
273 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", |
|
274 csn2, csn1, |
|
275 t2.t, t1.t, |
|
276 (double)(t2.t)/(t1.t)); |
|
277 if (!Arrays.equals(cc1, cc2)) { |
|
278 System.out.printf(" decoding (direct) failed%n"); |
|
279 } |
|
280 } |
|
281 |
|
282 // The first byte is the length of malformed bytes |
|
283 static byte[][] malformed = { |
|
284 //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, |
|
285 }; |
|
286 |
|
287 static void checkMalformed(Charset cs) throws Exception { |
|
288 boolean failed = false; |
|
289 String csn = cs.name(); |
|
290 System.out.printf("Check malformed <%s>...%n", csn); |
|
291 for (boolean direct: new boolean[] {false, true}) { |
|
292 for (byte[] bins : malformed) { |
|
293 int mlen = bins[0]; |
|
294 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); |
|
295 CoderResult cr = decodeCR(bin, cs, direct); |
|
296 String ashex = ""; |
|
297 for (int i = 0; i < bin.length; i++) { |
|
298 if (i > 0) ashex += " "; |
|
299 ashex += Integer.toBinaryString((int)bin[i] & 0xff); |
|
300 } |
|
301 if (!cr.isMalformed()) { |
|
302 System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex); |
|
303 failed = true; |
|
304 } else if (cr.length() != mlen) { |
|
305 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); |
|
306 failed = true; |
|
307 } |
|
308 } |
|
309 } |
|
310 if (failed) |
|
311 throw new RuntimeException("Check malformed failed " + csn); |
|
312 } |
|
313 |
|
314 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { |
|
315 int inPos = flow[0]; |
|
316 int inLen = flow[1]; |
|
317 int outPos = flow[2]; |
|
318 int outLen = flow[3]; |
|
319 int expedInPos = flow[4]; |
|
320 int expedOutPos = flow[5]; |
|
321 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW |
|
322 :CoderResult.OVERFLOW; |
|
323 ByteBuffer bbf; |
|
324 CharBuffer cbf; |
|
325 if (direct) { |
|
326 bbf = ByteBuffer.allocateDirect(inPos + bytes.length); |
|
327 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); |
|
328 } else { |
|
329 bbf = ByteBuffer.allocate(inPos + bytes.length); |
|
330 cbf = CharBuffer.allocate(outPos + outLen); |
|
331 } |
|
332 bbf.position(inPos); |
|
333 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); |
|
334 cbf.position(outPos); |
|
335 dec.reset(); |
|
336 CoderResult cr = dec.decode(bbf, cbf, false); |
|
337 if (cr != expedCR || |
|
338 bbf.position() != expedInPos || |
|
339 cbf.position() != expedOutPos) { |
|
340 System.out.printf("Expected(direct=%5b): [", direct); |
|
341 for (int i:flow) System.out.print(" " + i); |
|
342 System.out.println("] CR=" + cr + |
|
343 ", inPos=" + bbf.position() + |
|
344 ", outPos=" + cbf.position()); |
|
345 return false; |
|
346 } |
|
347 return true; |
|
348 } |
|
349 |
|
350 static void checkUnderOverflow(Charset cs) throws Exception { |
|
351 String csn = cs.name(); |
|
352 System.out.printf("Check under/overflow <%s>...%n", csn); |
|
353 CharsetDecoder dec = cs.newDecoder(); |
|
354 boolean failed = false; |
|
355 //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 |
|
356 //0 1 2 3 7 11 |
|
357 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); |
|
358 int inlen = bytes.length; |
|
359 |
|
360 int MAXOFF = 20; |
|
361 for (int inoff = 0; inoff < MAXOFF; inoff++) { |
|
362 for (int outoff = 0; outoff < MAXOFF; outoff++) { |
|
363 int[][] Flows = { |
|
364 //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) |
|
365 //overflow |
|
366 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, |
|
367 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, |
|
368 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, |
|
369 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, |
|
370 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, |
|
371 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, |
|
372 //underflow |
|
373 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, |
|
374 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, |
|
375 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, |
|
376 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, |
|
377 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, |
|
378 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, |
|
379 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, |
|
380 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, |
|
381 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, |
|
382 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, |
|
383 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, |
|
384 // 2-byte under/overflow |
|
385 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, |
|
386 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, |
|
387 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, |
|
388 // 4-byte under/overflow |
|
389 {inoff, 4, outoff, 2, inoff + 3, outoff + 2, 0}, |
|
390 {inoff, 5, outoff, 2, inoff + 3, outoff + 2, 0}, |
|
391 {inoff, 6, outoff, 2, inoff + 3, outoff + 2, 0}, |
|
392 {inoff, 7, outoff, 2, inoff + 3, outoff + 2, 1}, |
|
393 {inoff, 7, outoff, 3, inoff + 7, outoff + 3, 0}, |
|
394 // 4-byte under/overflow |
|
395 {inoff, 8, outoff, 3, inoff + 7, outoff + 3, 0}, |
|
396 {inoff, 9, outoff, 3, inoff + 7, outoff + 3, 0}, |
|
397 {inoff, 10, outoff, 3, inoff + 7, outoff + 3, 0}, |
|
398 {inoff, 11, outoff, 3, inoff + 7, outoff + 3, 1}, |
|
399 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, |
|
400 // 4-byte/supp under/overflow |
|
401 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, |
|
402 {inoff, 12, outoff, 4, inoff +11, outoff + 4, 0}, |
|
403 {inoff, 13, outoff, 4, inoff +11, outoff + 4, 0}, |
|
404 {inoff, 14, outoff, 4, inoff +11, outoff + 4, 0}, |
|
405 {inoff, 15, outoff, 4, inoff +11, outoff + 4, 1}, |
|
406 {inoff, 15, outoff, 5, inoff +11, outoff + 4, 1}, |
|
407 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, |
|
408 }; |
|
409 for (boolean direct: new boolean[] {false, true}) { |
|
410 for (int[] flow: Flows) { |
|
411 if (!check(dec, bytes, direct, flow)) |
|
412 failed = true; |
|
413 } |
|
414 }}} |
|
415 if (failed) |
|
416 throw new RuntimeException("Check under/overflow failed " + csn); |
|
417 } |
|
418 |
|
419 public static void main(String[] args) throws Exception { |
|
420 // be the first one |
|
421 //checkInit("EUC_TW_OLD"); |
|
422 checkInit("EUC_TW"); |
|
423 Charset euctw = Charset.forName("EUC_TW"); |
|
424 checkRoundtrip(euctw); |
|
425 compare(euctw, new EUC_TW_OLD()); |
|
426 checkMalformed(euctw); |
|
427 checkUnderOverflow(euctw); |
|
428 } |
|
429 } |