34 import java.nio.charset.*; |
34 import java.nio.charset.*; |
35 |
35 |
36 public class TestStringCoding { |
36 public class TestStringCoding { |
37 public static void main(String[] args) throws Throwable { |
37 public static void main(String[] args) throws Throwable { |
38 |
38 |
|
39 // full bmp first |
|
40 char[] bmp = new char[0x10000]; |
|
41 for (int i = 0; i < 0x10000; i++) { |
|
42 bmp[i] = (char)i; |
|
43 } |
|
44 char[] latin = Arrays.copyOf(bmp, 0x100); |
|
45 char[] ascii = Arrays.copyOf(bmp, 0x80); |
|
46 |
|
47 byte[] latinBA = new byte[0x100]; |
|
48 for (int i = 0; i < 0x100; i++) { |
|
49 latinBA[i] = (byte)i; |
|
50 } |
|
51 byte[] asciiBA = Arrays.copyOf(latinBA, 0x80); |
|
52 |
39 for (Boolean hasSM: new boolean[] { false, true }) { |
53 for (Boolean hasSM: new boolean[] { false, true }) { |
40 if (hasSM) |
54 if (hasSM) { |
41 System.setSecurityManager(new PermissiveSecurityManger()); |
55 System.setSecurityManager(new PermissiveSecurityManger()); |
|
56 } |
42 for (Charset cs: Charset.availableCharsets().values()) { |
57 for (Charset cs: Charset.availableCharsets().values()) { |
43 if ("ISO-2022-CN".equals(cs.name()) || |
58 if ("ISO-2022-CN".equals(cs.name()) || |
44 "x-COMPOUND_TEXT".equals(cs.name()) || |
59 "x-COMPOUND_TEXT".equals(cs.name()) || |
45 "x-JISAutoDetect".equals(cs.name())) |
60 "x-JISAutoDetect".equals(cs.name())) |
46 continue; |
61 continue; |
47 System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM); |
62 System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM); |
48 // full bmp first |
63 |
49 char[] bmpCA = new char[0x10000]; |
64 testNewString(cs, testGetBytes(cs, new String(bmp))); |
50 for (int i = 0; i < 0x10000; i++) { |
65 testNewString(cs, testGetBytes(cs, new String(latin))); |
51 bmpCA[i] = (char)i; |
66 testNewString(cs, testGetBytes(cs, new String(ascii))); |
52 } |
67 testGetBytes(cs, testNewString(cs, latinBA)); |
53 byte[] sbBA = new byte[0x100]; |
68 testGetBytes(cs, testNewString(cs, asciiBA)); |
54 for (int i = 0; i < 0x100; i++) { |
69 |
55 sbBA[i] = (byte)i; |
|
56 } |
|
57 test(cs, bmpCA, sbBA); |
|
58 // "randomed" sizes |
70 // "randomed" sizes |
59 Random rnd = new Random(); |
71 Random rnd = new Random(); |
60 for (int i = 0; i < 10; i++) { |
72 for (int i = 0; i < 10; i++) { |
61 int clen = rnd.nextInt(0x10000); |
|
62 int blen = rnd.nextInt(0x100); |
|
63 //System.out.printf(" blen=%d, clen=%d%n", blen, clen); |
73 //System.out.printf(" blen=%d, clen=%d%n", blen, clen); |
64 test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen)); |
74 char[] bmp0 = Arrays.copyOf(bmp, rnd.nextInt(0x10000)); |
|
75 testNewString(cs, testGetBytes(cs, new String(bmp0))); |
65 //add a pair of surrogates |
76 //add a pair of surrogates |
66 int pos = clen / 2; |
77 int pos = bmp0.length / 2; |
67 if ((pos + 1) < blen) { |
78 if ((pos + 1) < bmp0.length) { |
68 bmpCA[pos] = '\uD800'; |
79 bmp0[pos] = '\uD800'; |
69 bmpCA[pos+1] = '\uDC00'; |
80 bmp0[pos+1] = '\uDC00'; |
70 } |
81 } |
71 test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen)); |
82 testNewString(cs, testGetBytes(cs, new String(bmp0))); |
|
83 |
|
84 char[] latin0 = Arrays.copyOf(latin, rnd.nextInt(0x100)); |
|
85 char[] ascii0 = Arrays.copyOf(ascii, rnd.nextInt(0x80)); |
|
86 byte[] latinBA0 = Arrays.copyOf(latinBA, rnd.nextInt(0x100)); |
|
87 byte[] asciiBA0 = Arrays.copyOf(asciiBA, rnd.nextInt(0x80)); |
|
88 testNewString(cs, testGetBytes(cs, new String(latin0))); |
|
89 testNewString(cs, testGetBytes(cs, new String(ascii0))); |
|
90 testGetBytes(cs, testNewString(cs, latinBA0)); |
|
91 testGetBytes(cs, testNewString(cs, asciiBA0)); |
72 } |
92 } |
73 |
93 testSurrogates(cs); |
74 testMixed(cs); |
94 testMixed(cs); |
75 System.out.println("done!"); |
95 System.out.println("done!"); |
76 } |
96 } |
77 } |
97 } |
78 } |
98 } |
107 throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); |
127 throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); |
108 } |
128 } |
109 |
129 |
110 //getBytes(cs); |
130 //getBytes(cs); |
111 bmpBA = bmpStr.getBytes(cs); |
131 bmpBA = bmpStr.getBytes(cs); |
112 if (!Arrays.equals(bmpBA, baNIO)) |
132 if (!Arrays.equals(bmpBA, baNIO)) { |
113 throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); |
133 throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); |
|
134 } |
114 |
135 |
115 //new String(csn); |
136 //new String(csn); |
116 String strSC = new String(bmpBA, cs.name()); |
137 String strSC = new String(bmpBA, cs.name()); |
117 String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString(); |
138 String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString(); |
118 if(!strNIO.equals(strSC)) { |
139 if(!strNIO.equals(strSC)) { |
119 throw new RuntimeException("new String(csn) failed -> " + cs.name()); |
140 throw new RuntimeException("new String(csn) failed -> " + cs.name()); |
120 } |
141 } |
121 |
|
122 //new String(cs); |
142 //new String(cs); |
123 strSC = new String(bmpBA, cs); |
143 strSC = new String(bmpBA, cs); |
124 if (!strNIO.equals(strSC)) |
144 if (!strNIO.equals(strSC)) { |
125 throw new RuntimeException("new String(cs) failed -> " + cs.name()); |
145 throw new RuntimeException("new String(cs) failed -> " + cs.name()); |
126 |
146 } |
127 } |
147 } |
128 |
148 |
129 static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable { |
149 static byte[] getBytes(CharsetEncoder enc, String str) throws Throwable { |
130 String bmpStr = new String(bmpCA); |
150 ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(str.toCharArray())); |
|
151 byte[] ba = new byte[bf.limit()]; |
|
152 bf.get(ba, 0, ba.length); |
|
153 return ba; |
|
154 } |
|
155 |
|
156 static byte[] testGetBytes(Charset cs, String str) throws Throwable { |
|
157 CharsetEncoder enc = cs.newEncoder() |
|
158 .onMalformedInput(CodingErrorAction.REPLACE) |
|
159 .onUnmappableCharacter(CodingErrorAction.REPLACE); |
|
160 //getBytes(csn); |
|
161 byte[] baSC = str.getBytes(cs.name()); |
|
162 byte[] baNIO = getBytes(enc, str); |
|
163 if (!Arrays.equals(baSC, baNIO)) { |
|
164 throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); |
|
165 } |
|
166 //getBytes(cs); |
|
167 baSC = str.getBytes(cs); |
|
168 if (!Arrays.equals(baSC, baNIO)) { |
|
169 throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); |
|
170 } |
|
171 return baSC; |
|
172 } |
|
173 |
|
174 static String testNewString(Charset cs, byte[] ba) throws Throwable { |
131 CharsetDecoder dec = cs.newDecoder() |
175 CharsetDecoder dec = cs.newDecoder() |
132 .onMalformedInput(CodingErrorAction.REPLACE) |
176 .onMalformedInput(CodingErrorAction.REPLACE) |
133 .onUnmappableCharacter(CodingErrorAction.REPLACE); |
177 .onUnmappableCharacter(CodingErrorAction.REPLACE); |
|
178 //new String(csn); |
|
179 String strSC = new String(ba, cs.name()); |
|
180 String strNIO = dec.reset().decode(ByteBuffer.wrap(ba)).toString(); |
|
181 if(!strNIO.equals(strSC)) { |
|
182 throw new RuntimeException("new String(csn) failed -> " + cs.name()); |
|
183 } |
|
184 //new String(cs); |
|
185 strSC = new String(ba, cs); |
|
186 if (!strNIO.equals(strSC)) { |
|
187 throw new RuntimeException("new String(cs)/bmp failed -> " + cs.name()); |
|
188 } |
|
189 return strSC; |
|
190 } |
|
191 |
|
192 static void testSurrogates(Charset cs) throws Throwable { |
|
193 //encode unmappable surrogates |
134 CharsetEncoder enc = cs.newEncoder() |
194 CharsetEncoder enc = cs.newEncoder() |
135 .onMalformedInput(CodingErrorAction.REPLACE) |
195 .onMalformedInput(CodingErrorAction.REPLACE) |
136 .onUnmappableCharacter(CodingErrorAction.REPLACE); |
196 .onUnmappableCharacter(CodingErrorAction.REPLACE); |
137 |
|
138 //getBytes(csn); |
|
139 byte[] baSC = bmpStr.getBytes(cs.name()); |
|
140 ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA)); |
|
141 byte[] baNIO = new byte[bf.limit()]; |
|
142 bf.get(baNIO, 0, baNIO.length); |
|
143 if (!Arrays.equals(baSC, baNIO)) |
|
144 throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); |
|
145 |
|
146 //getBytes(cs); |
|
147 baSC = bmpStr.getBytes(cs); |
|
148 if (!Arrays.equals(baSC, baNIO)) |
|
149 throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); |
|
150 |
|
151 //new String(csn); |
|
152 String strSC = new String(sbBA, cs.name()); |
|
153 String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString(); |
|
154 |
|
155 if(!strNIO.equals(strSC)) |
|
156 throw new RuntimeException("new String(csn) failed -> " + cs.name()); |
|
157 |
|
158 //new String(cs); |
|
159 strSC = new String(sbBA, cs); |
|
160 if (!strNIO.equals(strSC)) |
|
161 throw new RuntimeException("new String(cs) failed -> " + cs.name()); |
|
162 |
|
163 //encode unmappable surrogates |
|
164 if (enc instanceof sun.nio.cs.ArrayEncoder && |
197 if (enc instanceof sun.nio.cs.ArrayEncoder && |
165 cs.contains(Charset.forName("ASCII"))) { |
198 cs.contains(Charset.forName("ASCII"))) { |
166 if (cs.name().equals("UTF-8") || // utf8 handles surrogates |
199 if (cs.name().equals("UTF-8") || // utf8 handles surrogates |
167 cs.name().equals("CESU-8")) // utf8 handles surrogates |
200 cs.name().equals("CESU-8")) // utf8 handles surrogates |
168 return; |
201 return; |