author | erikj |
Tue, 12 Sep 2017 19:03:39 +0200 (2017-09-12) | |
changeset 47216 | 71c04702a3d5 |
parent 30820 | jdk/test/sun/nio/cs/OLD/TestIBMDB.java@0d4717a011d3 |
permissions | -rw-r--r-- |
2921 | 1 |
/* |
14342
8435a30053c1
7197491: update copyright year to match last edit in jdk8 jdk repository
alanb
parents:
13366
diff
changeset
|
2 |
* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. |
2921 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
5506 | 19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 |
* or visit www.oracle.com if you need additional information or have any |
|
21 |
* questions. |
|
2921 | 22 |
*/ |
23 |
||
24 |
/* |
|
25 |
* @test |
|
26 |
* @bug 6843578 |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
27 |
* @summary Test old and new implementation of db charsets |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
28 |
* @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD SJIS_OLD PCK_OLD EUC_JP_OLD EUC_JP_LINUX_OLD EUC_JP_Open_OLD |
30820 | 29 |
* @modules java.base/sun.nio.cs jdk.charsets/sun.nio.cs.ext |
12180
90b9045428bd
7152866: Tests not run because they are missing the @run tag
alanb
parents:
5506
diff
changeset
|
30 |
* @run main TestIBMDB |
2921 | 31 |
*/ |
32 |
||
33 |
import java.nio.charset.*; |
|
34 |
import java.nio.*; |
|
35 |
import java.util.*; |
|
36 |
||
37 |
public class TestIBMDB { |
|
38 |
static class Time { |
|
39 |
long t; |
|
40 |
} |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
41 |
static int iteration = 200; |
2921 | 42 |
|
43 |
static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) |
|
44 |
throws Exception { |
|
45 |
String csn = cs.name(); |
|
46 |
CharsetDecoder dec = cs.newDecoder(); |
|
47 |
ByteBuffer bbf; |
|
48 |
CharBuffer cbf; |
|
49 |
if (testDirect) { |
|
50 |
bbf = ByteBuffer.allocateDirect(bb.length); |
|
51 |
cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); |
|
52 |
bbf.put(bb); |
|
53 |
} else { |
|
54 |
bbf = ByteBuffer.wrap(bb); |
|
55 |
cbf = CharBuffer.allocate(bb.length); |
|
56 |
} |
|
57 |
CoderResult cr = null; |
|
58 |
long t1 = System.nanoTime()/1000; |
|
59 |
for (int i = 0; i < iteration; i++) { |
|
60 |
bbf.rewind(); |
|
61 |
cbf.clear(); |
|
62 |
dec.reset(); |
|
63 |
cr = dec.decode(bbf, cbf, true); |
|
64 |
} |
|
65 |
long t2 = System.nanoTime()/1000; |
|
66 |
t.t = (t2 - t1)/iteration; |
|
67 |
if (cr != CoderResult.UNDERFLOW) { |
|
68 |
System.out.println("DEC-----------------"); |
|
69 |
int pos = bbf.position(); |
|
70 |
System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", |
|
71 |
cr.toString(), pos, |
|
72 |
bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); |
|
73 |
throw new RuntimeException("Decoding err: " + csn); |
|
74 |
} |
|
75 |
char[] cc = new char[cbf.position()]; |
|
76 |
cbf.flip(); cbf.get(cc); |
|
77 |
return cc; |
|
78 |
||
79 |
} |
|
80 |
||
81 |
static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) |
|
82 |
throws Exception { |
|
83 |
CharsetDecoder dec = cs.newDecoder(); |
|
84 |
ByteBuffer bbf; |
|
85 |
CharBuffer cbf; |
|
86 |
if (testDirect) { |
|
87 |
bbf = ByteBuffer.allocateDirect(bb.length); |
|
88 |
cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); |
|
89 |
bbf.put(bb).flip(); |
|
90 |
} else { |
|
91 |
bbf = ByteBuffer.wrap(bb); |
|
92 |
cbf = CharBuffer.allocate(bb.length); |
|
93 |
} |
|
94 |
CoderResult cr = null; |
|
95 |
for (int i = 0; i < iteration; i++) { |
|
96 |
bbf.rewind(); |
|
97 |
cbf.clear(); |
|
98 |
dec.reset(); |
|
99 |
cr = dec.decode(bbf, cbf, true); |
|
100 |
} |
|
101 |
return cr; |
|
102 |
} |
|
103 |
||
104 |
static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) |
|
105 |
throws Exception { |
|
106 |
ByteBuffer bbf; |
|
107 |
CharBuffer cbf; |
|
108 |
CharsetEncoder enc = cs.newEncoder(); |
|
109 |
String csn = cs.name(); |
|
110 |
if (testDirect) { |
|
111 |
bbf = ByteBuffer.allocateDirect(cc.length * 4); |
|
112 |
cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); |
|
113 |
cbf.put(cc).flip(); |
|
114 |
} else { |
|
115 |
bbf = ByteBuffer.allocate(cc.length * 4); |
|
116 |
cbf = CharBuffer.wrap(cc); |
|
117 |
} |
|
118 |
CoderResult cr = null; |
|
119 |
long t1 = System.nanoTime()/1000; |
|
120 |
for (int i = 0; i < iteration; i++) { |
|
121 |
cbf.rewind(); |
|
122 |
bbf.clear(); |
|
123 |
enc.reset(); |
|
124 |
cr = enc.encode(cbf, bbf, true); |
|
125 |
} |
|
126 |
long t2 = System.nanoTime()/1000; |
|
127 |
t.t = (t2 - t1)/iteration; |
|
128 |
if (cr != CoderResult.UNDERFLOW) { |
|
129 |
System.out.println("ENC-----------------"); |
|
130 |
int pos = cbf.position(); |
|
131 |
System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", |
|
132 |
cr.toString(), pos, cc[pos]&0xffff); |
|
133 |
throw new RuntimeException("Encoding err: " + csn); |
|
134 |
} |
|
135 |
byte[] bb = new byte[bbf.position()]; |
|
136 |
bbf.flip(); bbf.get(bb); |
|
137 |
return bb; |
|
138 |
} |
|
139 |
||
140 |
static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) |
|
141 |
throws Exception { |
|
142 |
ByteBuffer bbf; |
|
143 |
CharBuffer cbf; |
|
144 |
CharsetEncoder enc = cs.newEncoder(); |
|
145 |
if (testDirect) { |
|
146 |
bbf = ByteBuffer.allocateDirect(cc.length * 4); |
|
147 |
cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); |
|
148 |
cbf.put(cc).flip(); |
|
149 |
} else { |
|
150 |
bbf = ByteBuffer.allocate(cc.length * 4); |
|
151 |
cbf = CharBuffer.wrap(cc); |
|
152 |
} |
|
153 |
CoderResult cr = null; |
|
154 |
for (int i = 0; i < iteration; i++) { |
|
155 |
cbf.rewind(); |
|
156 |
bbf.clear(); |
|
157 |
enc.reset(); |
|
158 |
cr = enc.encode(cbf, bbf, true); |
|
159 |
} |
|
160 |
return cr; |
|
161 |
} |
|
162 |
||
163 |
static void printEntry(char c, Charset cs) { |
|
164 |
byte[] bb = new String(new char[] {c}).getBytes(cs); |
|
165 |
for (byte b:bb) |
|
166 |
System.out.printf("%x", b&0xff); |
|
167 |
System.out.printf(" %x", c & 0xffff); |
|
168 |
String s2 = new String(bb, cs); |
|
169 |
System.out.printf(" %x%n", s2.charAt(0) & 0xffff); |
|
170 |
} |
|
171 |
||
172 |
// check and compare canEncoding/Encoding |
|
173 |
static char[] checkEncoding(Charset oldCS, Charset newCS) |
|
174 |
throws Exception { |
|
175 |
System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); |
|
176 |
CharsetEncoder encOLD = oldCS.newEncoder(); |
|
177 |
CharsetEncoder encNew = newCS.newEncoder(); |
|
178 |
char[] cc = new char[0x10000]; |
|
179 |
int pos = 0; |
|
180 |
boolean is970 = "x-IBM970-Old".equals(oldCS.name()); |
|
181 |
||
182 |
for (char c = 0; c < 0xffff; c++) { |
|
183 |
boolean canOld = encOLD.canEncode(c); |
|
184 |
boolean canNew = encNew.canEncode(c); |
|
185 |
||
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
186 |
if (is970 && c == 0x2299) |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
187 |
continue; |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
188 |
|
2921 | 189 |
if (canOld != canNew) { |
190 |
if (canNew) { |
|
191 |
System.out.printf(" NEW(only): "); |
|
192 |
printEntry(c, newCS); |
|
193 |
} else { |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
194 |
if (is970) { |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
195 |
byte[] bb = new String(new char[] {c}).getBytes(oldCS); |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
196 |
if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) { |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
197 |
// we know 970 has bogus nnnn -> a2c1 -> 2299 |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
198 |
continue; |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
199 |
} |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
200 |
} |
2921 | 201 |
System.out.printf(" OLD(only): "); |
202 |
printEntry(c, oldCS); |
|
203 |
} |
|
204 |
} else if (canNew) { |
|
205 |
byte[] bbNew = new String(new char[] {c}).getBytes(newCS); |
|
206 |
byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); |
|
207 |
if (!Arrays.equals(bbNew, bbOld)) { |
|
208 |
System.out.printf(" c->b NEW: "); |
|
209 |
printEntry(c, newCS); |
|
210 |
System.out.printf(" c->b OLD: "); |
|
211 |
printEntry(c, oldCS); |
|
212 |
} else { |
|
213 |
String sNew = new String(bbNew, newCS); |
|
214 |
String sOld = new String(bbOld, oldCS); |
|
215 |
if (!sNew.equals(sOld)) { |
|
216 |
System.out.printf(" b2c NEW (c=%x):", c&0xffff); |
|
217 |
printEntry(sNew.charAt(0), newCS); |
|
218 |
System.out.printf(" b2c OLD:"); |
|
219 |
printEntry(sOld.charAt(0), oldCS); |
|
220 |
} |
|
221 |
} |
|
222 |
} |
|
223 |
if (canNew & canOld) { // added only both for now |
|
224 |
cc[pos++] = c; |
|
225 |
} |
|
226 |
} |
|
227 |
return Arrays.copyOf(cc, pos); |
|
228 |
} |
|
229 |
||
230 |
||
231 |
// check and compare canEncoding/Encoding |
|
232 |
static void checkDecoding(Charset oldCS, Charset newCS) |
|
233 |
throws Exception |
|
234 |
{ |
|
235 |
System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name()); |
|
236 |
boolean isEBCDIC = oldCS.name().startsWith("x-IBM93"); |
|
237 |
||
238 |
//Try singlebyte first |
|
239 |
byte[] bb = new byte[1]; |
|
240 |
System.out.printf(" trying SB...%n"); |
|
241 |
for (int b = 0; b < 0x100; b++) { |
|
242 |
bb[0] = (byte)b; |
|
243 |
String sOld = new String(bb, oldCS); |
|
244 |
String sNew = new String(bb, newCS); |
|
245 |
if (!sOld.equals(sNew)) { |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
246 |
System.out.printf(" b=%x: %x/%d(old) %x/%d(new)%n", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
247 |
b& 0xff, |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
248 |
sOld.charAt(0) & 0xffff, sOld.length(), |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
249 |
sNew.charAt(0) & 0xffff, sNew.length()); |
2921 | 250 |
} |
251 |
} |
|
252 |
||
253 |
System.out.printf(" trying DB...%n"); |
|
254 |
bb = new byte[isEBCDIC?4:2]; |
|
255 |
int b1Min = 0x40; |
|
256 |
int b1Max = 0xfe; |
|
257 |
for (int b1 = 0x40; b1 < 0xff; b1++) { |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
258 |
if (!isEBCDIC) { |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
259 |
// decodable singlebyte b1 |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
260 |
bb[0] = (byte)b1; |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
261 |
String sOld = new String(bb, oldCS); |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
262 |
String sNew = new String(bb, newCS); |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
263 |
if (!sOld.equals(sNew)) { |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
264 |
if (sOld.length() != 2 && sOld.charAt(0) != 0) { |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
265 |
// only prints we are NOT expected. above two are known issue |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
266 |
System.out.printf(" b1=%x: %x/%d(old) %x/%d(new)%n", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
267 |
b1 & 0xff, |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
268 |
sOld.charAt(0) & 0xffff, sOld.length(), |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
269 |
sNew.charAt(0) & 0xffff, sNew.length()); |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
270 |
continue; |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
271 |
} |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
272 |
} |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
273 |
} |
2921 | 274 |
for (int b2 = 0x40; b2 < 0xff; b2++) { |
275 |
if (isEBCDIC) { |
|
276 |
bb[0] = 0x0e; |
|
277 |
bb[1] = (byte)b1; |
|
278 |
bb[2] = (byte)b2; |
|
279 |
bb[3] = 0x0f; |
|
280 |
} else { |
|
281 |
bb[0] = (byte)b1; |
|
282 |
bb[1] = (byte)b2; |
|
283 |
} |
|
284 |
String sOld = new String(bb, oldCS); |
|
285 |
String sNew = new String(bb, newCS); |
|
286 |
//if (!sOld.equals(sNew)) { |
|
287 |
if (sOld.charAt(0) != sNew.charAt(0)) { |
|
288 |
||
289 |
if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd) |
|
290 |
continue; // known issude in old implementation |
|
291 |
||
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
292 |
System.out.printf(" bb=<%x,%x> c(old)=%x, c(new)=%x%n", |
2921 | 293 |
b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff); |
294 |
} |
|
295 |
} |
|
296 |
} |
|
297 |
} |
|
298 |
||
299 |
static void checkInit(String csn) throws Exception { |
|
300 |
System.out.printf("Check init <%s>...%n", csn); |
|
301 |
Charset.forName("Big5"); // load in the ExtendedCharsets |
|
302 |
long t1 = System.nanoTime()/1000; |
|
303 |
Charset cs = Charset.forName(csn); |
|
304 |
long t2 = System.nanoTime()/1000; |
|
305 |
System.out.printf(" charset :%d%n", t2 - t1); |
|
306 |
t1 = System.nanoTime()/1000; |
|
307 |
cs.newDecoder(); |
|
308 |
t2 = System.nanoTime()/1000; |
|
309 |
System.out.printf(" new Decoder :%d%n", t2 - t1); |
|
310 |
||
311 |
t1 = System.nanoTime()/1000; |
|
312 |
cs.newEncoder(); |
|
313 |
t2 = System.nanoTime()/1000; |
|
314 |
System.out.printf(" new Encoder :%d%n", t2 - t1); |
|
315 |
} |
|
316 |
||
317 |
static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception { |
|
318 |
System.gc(); // enqueue finalizable objects |
|
319 |
Thread.sleep(1000); |
|
320 |
System.gc(); // enqueue finalizable objects |
|
321 |
||
322 |
String csn1 = cs1.name(); |
|
323 |
String csn2 = cs2.name(); |
|
324 |
System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); |
|
325 |
||
326 |
Time t1 = new Time(); |
|
327 |
Time t2 = new Time(); |
|
328 |
||
329 |
byte[] bb1 = encode(cc, cs1, false, t1); |
|
330 |
byte[] bb2 = encode(cc, cs2, false, t2); |
|
331 |
||
332 |
System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", |
|
333 |
csn2, csn1, |
|
334 |
t2.t, t1.t, |
|
335 |
(double)(t2.t)/(t1.t)); |
|
336 |
if (!Arrays.equals(bb1, bb2)) { |
|
337 |
System.out.printf(" encoding failed%n"); |
|
338 |
} |
|
339 |
||
340 |
char[] cc2 = decode(bb1, cs2, false, t2); |
|
341 |
char[] cc1 = decode(bb1, cs1, false, t1); |
|
342 |
System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", |
|
343 |
csn2, csn1, |
|
344 |
t2.t, t1.t, |
|
345 |
(double)(t2.t)/(t1.t)); |
|
346 |
if (!Arrays.equals(cc1, cc2)) { |
|
347 |
System.out.printf(" decoding failed%n"); |
|
348 |
} |
|
349 |
||
350 |
bb1 = encode(cc, cs1, true, t1); |
|
351 |
bb2 = encode(cc, cs2, true, t2); |
|
352 |
||
353 |
System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", |
|
354 |
csn2, csn1, |
|
355 |
t2.t, t1.t, |
|
356 |
(double)(t2.t)/(t1.t)); |
|
357 |
||
358 |
if (!Arrays.equals(bb1, bb2)) |
|
359 |
System.out.printf(" encoding (direct) failed%n"); |
|
360 |
||
361 |
cc1 = decode(bb1, cs1, true, t1); |
|
362 |
cc2 = decode(bb1, cs2, true, t2); |
|
363 |
System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", |
|
364 |
csn2, csn1, |
|
365 |
t2.t, t1.t, |
|
366 |
(double)(t2.t)/(t1.t)); |
|
367 |
if (!Arrays.equals(cc1, cc2)) { |
|
368 |
System.out.printf(" decoding (direct) failed%n"); |
|
369 |
} |
|
370 |
} |
|
371 |
||
372 |
/* The first byte is the length of malformed bytes |
|
373 |
byte[][] malformed = { |
|
374 |
{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, |
|
375 |
}; |
|
376 |
*/ |
|
377 |
||
378 |
static void checkMalformed(Charset cs, byte[][] malformed) |
|
379 |
throws Exception |
|
380 |
{ |
|
381 |
boolean failed = false; |
|
382 |
String csn = cs.name(); |
|
383 |
System.out.printf("Check malformed <%s>...%n", csn); |
|
384 |
for (boolean direct: new boolean[] {false, true}) { |
|
385 |
for (byte[] bins : malformed) { |
|
386 |
int mlen = bins[0]; |
|
387 |
byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); |
|
388 |
CoderResult cr = decodeCR(bin, cs, direct); |
|
389 |
String ashex = ""; |
|
390 |
for (int i = 0; i < bin.length; i++) { |
|
391 |
if (i > 0) ashex += " "; |
|
392 |
ashex += Integer.toString((int)bin[i] & 0xff, 16); |
|
393 |
} |
|
394 |
if (!cr.isMalformed()) { |
|
395 |
System.out.printf(" FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); |
|
396 |
failed = true; |
|
397 |
} else if (cr.length() != mlen) { |
|
398 |
System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); |
|
399 |
failed = true; |
|
400 |
} |
|
401 |
} |
|
402 |
} |
|
403 |
if (failed) |
|
404 |
throw new RuntimeException("Check malformed failed " + csn); |
|
405 |
} |
|
406 |
||
407 |
static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { |
|
408 |
int inPos = flow[0]; |
|
409 |
int inLen = flow[1]; |
|
410 |
int outPos = flow[2]; |
|
411 |
int outLen = flow[3]; |
|
412 |
int expedInPos = flow[4]; |
|
413 |
int expedOutPos = flow[5]; |
|
414 |
CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW |
|
415 |
:CoderResult.OVERFLOW; |
|
416 |
ByteBuffer bbf; |
|
417 |
CharBuffer cbf; |
|
418 |
if (direct) { |
|
419 |
bbf = ByteBuffer.allocateDirect(inPos + bytes.length); |
|
420 |
cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); |
|
421 |
} else { |
|
422 |
bbf = ByteBuffer.allocate(inPos + bytes.length); |
|
423 |
cbf = CharBuffer.allocate(outPos + outLen); |
|
424 |
} |
|
425 |
bbf.position(inPos); |
|
426 |
bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); |
|
427 |
cbf.position(outPos); |
|
428 |
dec.reset(); |
|
429 |
CoderResult cr = dec.decode(bbf, cbf, false); |
|
430 |
if (cr != expedCR || |
|
431 |
bbf.position() != expedInPos || |
|
432 |
cbf.position() != expedOutPos) { |
|
433 |
System.out.printf("Expected(direct=%5b): [", direct); |
|
434 |
for (int i:flow) System.out.print(" " + i); |
|
435 |
System.out.println("] CR=" + cr + |
|
436 |
", inPos=" + bbf.position() + |
|
437 |
", outPos=" + cbf.position()); |
|
438 |
return false; |
|
439 |
} |
|
440 |
return true; |
|
441 |
} |
|
442 |
||
443 |
static void checkUnderOverflow(Charset cs) throws Exception { |
|
444 |
String csn = cs.name(); |
|
445 |
System.out.printf("Check under/overflow <%s>...%n", csn); |
|
446 |
CharsetDecoder dec = cs.newDecoder(); |
|
447 |
boolean failed = false; |
|
448 |
||
449 |
//7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 |
|
450 |
//0 1 2 3 7 11 |
|
451 |
byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); |
|
452 |
int inlen = bytes.length; |
|
453 |
||
454 |
int MAXOFF = 20; |
|
455 |
for (int inoff = 0; inoff < MAXOFF; inoff++) { |
|
456 |
for (int outoff = 0; outoff < MAXOFF; outoff++) { |
|
457 |
int[][] Flows = { |
|
458 |
//inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) |
|
459 |
//overflow |
|
460 |
{inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, |
|
461 |
{inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, |
|
462 |
{inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, |
|
463 |
{inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, |
|
464 |
{inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, |
|
465 |
{inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, |
|
466 |
//underflow |
|
467 |
{inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, |
|
468 |
{inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, |
|
469 |
{inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, |
|
470 |
{inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, |
|
471 |
{inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, |
|
472 |
{inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, |
|
473 |
{inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, |
|
474 |
{inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, |
|
475 |
{inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, |
|
476 |
{inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, |
|
477 |
{inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, |
|
478 |
// 2-byte under/overflow |
|
479 |
{inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, |
|
480 |
{inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, |
|
481 |
{inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, |
|
482 |
}; |
|
483 |
for (boolean direct: new boolean[] {false, true}) { |
|
484 |
for (int[] flow: Flows) { |
|
485 |
if (!check(dec, bytes, direct, flow)) |
|
486 |
failed = true; |
|
487 |
} |
|
488 |
}}} |
|
489 |
if (failed) |
|
490 |
throw new RuntimeException("Check under/overflow failed " + csn); |
|
491 |
} |
|
492 |
||
493 |
static String[] csnames = new String[] { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
494 |
|
2921 | 495 |
"IBM930", |
496 |
"IBM933", |
|
497 |
"IBM935", |
|
498 |
"IBM937", |
|
499 |
"IBM939", |
|
500 |
"IBM942", |
|
501 |
"IBM943", |
|
502 |
"IBM948", |
|
503 |
"IBM949", |
|
504 |
"IBM950", |
|
505 |
"IBM970", |
|
506 |
"IBM942C", |
|
507 |
"IBM943C", |
|
508 |
"IBM949C", |
|
509 |
"IBM1381", |
|
510 |
"IBM1383", |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
511 |
|
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
512 |
"EUC_CN", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
513 |
"EUC_KR", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
514 |
"GBK", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
515 |
"Johab", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
516 |
"MS932", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
517 |
"MS936", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
518 |
"MS949", |
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2921
diff
changeset
|
519 |
"MS950", |
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
520 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
521 |
"EUC_JP", |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
522 |
"EUC_JP_LINUX", |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
523 |
"EUC_JP_Open", |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
524 |
"SJIS", |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
525 |
"PCK", |
2921 | 526 |
}; |
527 |
||
528 |
public static void main(String[] args) throws Exception { |
|
529 |
for (String csname: csnames) { |
|
530 |
System.out.printf("-----------------------------------%n"); |
|
531 |
String oldname = csname + "_OLD"; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
532 |
if ("EUC_JP_Open".equals(csname)) |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
12180
diff
changeset
|
533 |
csname = "eucjp-open"; |
2921 | 534 |
checkInit(csname); |
535 |
Charset csOld = (Charset)Class.forName(oldname).newInstance(); |
|
536 |
Charset csNew = Charset.forName(csname); |
|
537 |
char[] cc = checkEncoding(csOld, csNew); |
|
538 |
checkDecoding(csOld, csNew); |
|
539 |
compare(csNew, csOld, cc); |
|
540 |
||
541 |
if (csname.startsWith("x-IBM93")) { |
|
542 |
//ecdbic |
|
543 |
checkMalformed(csNew, new byte[][] { |
|
544 |
{1, 0x26, 0x0f, 0x27}, // in SBSC, no SI |
|
545 |
{1, 0x0e, 0x41, 0x41, 0xe}, // in DBSC, no SO |
|
546 |
{2, 0x0e, 0x40, 0x41, 0xe}, // illegal DB |
|
547 |
}); |
|
548 |
} else if (csname.equals("x-IBM970") || |
|
549 |
csname.equals("x-IBM1383")) { |
|
550 |
//euc_simple |
|
551 |
checkMalformed(csNew, new byte[][] { |
|
552 |
{1, 0x26, (byte)0x8f, 0x27}, // SS2 |
|
553 |
{1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3 |
|
554 |
}); |
|
555 |
} |
|
556 |
} |
|
557 |
} |
|
558 |
} |