|
1 /* |
|
2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 */ |
|
23 |
|
24 /* @test |
|
25 * @bug 4328178 |
|
26 * @summary Performs baseline and regression test on the ISCII91 charset |
|
27 * @modules jdk.charsets |
|
28 */ |
|
29 |
|
30 import java.io.*; |
|
31 |
|
32 public class ISCIITest { |
|
33 |
|
34 private static void failureReport() { |
|
35 System.err.println ("Failed ISCII91 Regression Test"); |
|
36 } |
|
37 |
|
38 private static void mapEquiv(int start, |
|
39 int end, |
|
40 String testName) |
|
41 throws Exception |
|
42 { |
|
43 byte[] singleByte = new byte[1]; |
|
44 byte[] encoded = new byte[1]; |
|
45 |
|
46 for (int i = start; i <= end; i++ ) { |
|
47 singleByte[0] = (byte) i; |
|
48 try { |
|
49 String unicodeStr = |
|
50 new String (singleByte,"ISCII91"); |
|
51 |
|
52 if (i != (int)unicodeStr.charAt(0)) { |
|
53 System.err.println ("FAILED ISCII91 Regression test" |
|
54 + "input byte is " + i ); |
|
55 throw new Exception(""); |
|
56 } |
|
57 encoded = unicodeStr.getBytes("ISCII91"); |
|
58 |
|
59 if (encoded[0] != singleByte[0]) { |
|
60 System.err.println("Encoding error " + testName); |
|
61 throw new Exception("Failed ISCII91 Regression test"); |
|
62 } |
|
63 |
|
64 } catch (UnsupportedEncodingException e) { |
|
65 failureReport(); |
|
66 } |
|
67 } |
|
68 return; |
|
69 } |
|
70 |
|
71 private static void checkUnmapped(int start, |
|
72 int end, |
|
73 String testName) |
|
74 throws Exception { |
|
75 |
|
76 byte[] singleByte = new byte[1]; |
|
77 |
|
78 for (int i = start; i <= end; i++ ) { |
|
79 singleByte[0] = (byte) i; |
|
80 try { |
|
81 String unicodeStr = new String (singleByte, "ISCII91"); |
|
82 |
|
83 if (unicodeStr.charAt(0) != '\uFFFD') { |
|
84 System.err.println("FAILED " + testName + |
|
85 "input byte is " + i ); |
|
86 throw new Exception ("Failed ISCII91 regression test"); |
|
87 } |
|
88 } catch (UnsupportedEncodingException e) { |
|
89 System.err.println("Unsupported character encoding"); |
|
90 } |
|
91 } |
|
92 return; |
|
93 } |
|
94 |
|
95 /* |
|
96 * |
|
97 */ |
|
98 private static void checkRange(int start, int end, |
|
99 char[] expectChars, |
|
100 String testName) |
|
101 throws Exception { |
|
102 byte[] singleByte = new byte[1]; |
|
103 byte[] encoded = new byte[1]; |
|
104 int lookupOffset = 0; |
|
105 |
|
106 for (int i=start; i <= end; i++ ) { |
|
107 singleByte[0] = (byte) i; |
|
108 String unicodeStr = new String (singleByte, "ISCII91"); |
|
109 if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) { |
|
110 throw new Exception ("Failed ISCII91 Regression Test"); |
|
111 } |
|
112 encoded = unicodeStr.getBytes("ISCII"); |
|
113 } |
|
114 return; |
|
115 } |
|
116 |
|
117 /* |
|
118 * Tests the ISCII91 Indic character encoding |
|
119 * as per IS 13194:1991 Bureau of Indian Standards. |
|
120 */ |
|
121 |
|
122 private static void test () throws Exception { |
|
123 |
|
124 try { |
|
125 |
|
126 |
|
127 // ISCII91 is an 8-byte encoding which retains the ASCII |
|
128 // mappings in the lower half. |
|
129 |
|
130 mapEquiv(0, 0x7f, "7 bit ASCII range"); |
|
131 |
|
132 // Checks a range of characters which are unmappable according |
|
133 // to the standards. |
|
134 |
|
135 checkUnmapped(0x81, 0x9f, "UNMAPPED"); |
|
136 |
|
137 // Vowel Modifier chars can be used to modify the vowel |
|
138 // sound of the preceding consonant, vowel or matra character. |
|
139 |
|
140 byte[] testByte = new byte[1]; |
|
141 char[] vowelModChars = { |
|
142 '\u0901', // Vowel modifier Chandrabindu |
|
143 '\u0902', // Vowel modifier Anuswar |
|
144 '\u0903' // Vowel modifier Visarg |
|
145 }; |
|
146 |
|
147 checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS"); |
|
148 |
|
149 char[] expectChars = { |
|
150 '\u0905', // a4 -- Vowel A |
|
151 '\u0906', // a5 -- Vowel AA |
|
152 '\u0907', // a6 -- Vowel I |
|
153 '\u0908', // a7 -- Vowel II |
|
154 '\u0909', // a8 -- Vowel U |
|
155 '\u090a', // a9 -- Vowel UU |
|
156 '\u090b', // aa -- Vowel RI |
|
157 '\u090e', // ab -- Vowel E ( Southern Scripts ) |
|
158 '\u090f', // ac -- Vowel EY |
|
159 '\u0910', // ad -- Vowel AI |
|
160 '\u090d', // ae -- Vowel AYE ( Devanagari Script ) |
|
161 '\u0912', // af -- Vowel O ( Southern Scripts ) |
|
162 '\u0913', // b0 -- Vowel OW |
|
163 '\u0914', // b1 -- Vowel AU |
|
164 '\u0911', // b2 -- Vowel AWE ( Devanagari Script ) |
|
165 }; |
|
166 |
|
167 checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS"); |
|
168 |
|
169 char[] expectConsChars = |
|
170 { |
|
171 '\u0915', // b3 -- Consonant KA |
|
172 '\u0916', // b4 -- Consonant KHA |
|
173 '\u0917', // b5 -- Consonant GA |
|
174 '\u0918', // b6 -- Consonant GHA |
|
175 '\u0919', // b7 -- Consonant NGA |
|
176 '\u091a', // b8 -- Consonant CHA |
|
177 '\u091b', // b9 -- Consonant CHHA |
|
178 '\u091c', // ba -- Consonant JA |
|
179 '\u091d', // bb -- Consonant JHA |
|
180 '\u091e', // bc -- Consonant JNA |
|
181 '\u091f', // bd -- Consonant Hard TA |
|
182 '\u0920', // be -- Consonant Hard THA |
|
183 '\u0921', // bf -- Consonant Hard DA |
|
184 '\u0922', // c0 -- Consonant Hard DHA |
|
185 '\u0923', // c1 -- Consonant Hard NA |
|
186 '\u0924', // c2 -- Consonant Soft TA |
|
187 '\u0925', // c3 -- Consonant Soft THA |
|
188 '\u0926', // c4 -- Consonant Soft DA |
|
189 '\u0927', // c5 -- Consonant Soft DHA |
|
190 '\u0928', // c6 -- Consonant Soft NA |
|
191 '\u0929', // c7 -- Consonant NA ( Tamil ) |
|
192 '\u092a', // c8 -- Consonant PA |
|
193 '\u092b', // c9 -- Consonant PHA |
|
194 '\u092c', // ca -- Consonant BA |
|
195 '\u092d', // cb -- Consonant BHA |
|
196 '\u092e', // cc -- Consonant MA |
|
197 '\u092f', // cd -- Consonant YA |
|
198 '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya ) |
|
199 '\u0930', // cf -- Consonant RA |
|
200 '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts ) |
|
201 '\u0932', // d1 -- Consonant LA |
|
202 '\u0933', // d2 -- Consonant Hard LA |
|
203 '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam ) |
|
204 '\u0935', // d4 -- Consonant VA |
|
205 '\u0936', // d5 -- Consonant SHA |
|
206 '\u0937', // d6 -- Consonant Hard SHA |
|
207 '\u0938', // d7 -- Consonant SA |
|
208 '\u0939', // d8 -- Consonant HA |
|
209 }; |
|
210 |
|
211 checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS"); |
|
212 |
|
213 char[] matraChars = { |
|
214 '\u093e', // da -- Vowel Sign AA |
|
215 '\u093f', // db -- Vowel Sign I |
|
216 '\u0940', // dc -- Vowel Sign II |
|
217 '\u0941', // dd -- Vowel Sign U |
|
218 '\u0942', // de -- Vowel Sign UU |
|
219 '\u0943', // df -- Vowel Sign RI |
|
220 '\u0946', // e0 -- Vowel Sign E ( Southern Scripts ) |
|
221 '\u0947', // e1 -- Vowel Sign EY |
|
222 '\u0948', // e2 -- Vowel Sign AI |
|
223 '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script ) |
|
224 '\u094a', // e4 -- Vowel Sign O ( Southern Scripts ) |
|
225 '\u094b', // e5 -- Vowel Sign OW |
|
226 '\u094c', // e6 -- Vowel Sign AU |
|
227 '\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script ) |
|
228 }; |
|
229 |
|
230 // Matras or Vowel signs alter the implicit |
|
231 // vowel sound associated with an Indic consonant. |
|
232 |
|
233 checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS"); |
|
234 |
|
235 char[] loneContextModifierChars = { |
|
236 '\u094d', // e8 -- Vowel Omission Sign ( Halant ) |
|
237 '\u093c', // e9 -- Diacritic Sign ( Nukta ) |
|
238 '\u0964' // ea -- Full Stop ( Viram, Northern Scripts ) |
|
239 }; |
|
240 |
|
241 checkRange(0xe8, 0xea, |
|
242 loneContextModifierChars, "LONE INDIC CONTEXT CHARS"); |
|
243 |
|
244 |
|
245 // Test Indic script numeral chars |
|
246 // (as opposed to international numerals) |
|
247 |
|
248 char[] expectNumeralChars = |
|
249 { |
|
250 '\u0966', // f1 -- Digit 0 |
|
251 '\u0967', // f2 -- Digit 1 |
|
252 '\u0968', // f3 -- Digit 2 |
|
253 '\u0969', // f4 -- Digit 3 |
|
254 '\u096a', // f5 -- Digit 4 |
|
255 '\u096b', // f6 -- Digit 5 |
|
256 '\u096c', // f7 -- Digit 6 |
|
257 '\u096d', // f8 -- Digit 7 |
|
258 '\u096e', // f9 -- Digit 8 |
|
259 '\u096f' // fa -- Digit 9 |
|
260 }; |
|
261 |
|
262 checkRange(0xf1, 0xfa, |
|
263 expectNumeralChars, "NUMERAL/DIGIT CHARACTERS"); |
|
264 int lookupOffset = 0; |
|
265 |
|
266 char[] expectNuktaSub = { |
|
267 '\u0950', |
|
268 '\u090c', |
|
269 '\u0961', |
|
270 '\u0960', |
|
271 '\u0962', |
|
272 '\u0963', |
|
273 '\u0944', |
|
274 '\u093d' |
|
275 }; |
|
276 |
|
277 /* |
|
278 * ISCII uses a number of code extension techniques |
|
279 * to access a number of lesser used characters. |
|
280 * The Nukta character which ordinarily signifies |
|
281 * a diacritic is used in combination with existing |
|
282 * characters to escape them to a different character. |
|
283 * value. |
|
284 */ |
|
285 |
|
286 byte[] codeExtensionBytes = { |
|
287 (byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta |
|
288 // =>DEVANAGARI OM SIGN |
|
289 (byte)0xa6 , (byte)0xe9, // Vowel I + Nukta |
|
290 // => DEVANAGARI VOCALIC L |
|
291 (byte)0xa7 , (byte)0xe9, // Vowel II + Nukta |
|
292 // => DEVANAGARI VOCALIC LL |
|
293 (byte)0xaa , (byte)0xe9, // Vowel RI + Nukta |
|
294 // => DEVANAGARI VOCALIC RR |
|
295 (byte)0xdb , (byte)0xe9, // Vowel sign I + Nukta |
|
296 // => DEVANAGARI VOWEL SIGN VOCALIC L |
|
297 (byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta |
|
298 // => DEVANAGARI VOWEL SIGN VOCALIC LL |
|
299 |
|
300 (byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta |
|
301 // => DEVANAGARI VOWEL SIGN VOCALIC RR |
|
302 (byte)0xea , (byte)0xe9 // Full stop/Phrase separator + Nukta |
|
303 // => DEVANAGARI SIGN AVAGRAHA |
|
304 }; |
|
305 |
|
306 lookupOffset = 0; |
|
307 byte[] bytePair = new byte[2]; |
|
308 |
|
309 for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) { |
|
310 bytePair[0] = (byte) codeExtensionBytes[lookupOffset++]; |
|
311 bytePair[1] = (byte) codeExtensionBytes[lookupOffset++]; |
|
312 |
|
313 String unicodeStr = new String (bytePair,"ISCII91"); |
|
314 if (unicodeStr.charAt(0) != expectNuktaSub[i]) { |
|
315 throw new Exception("Failed Nukta Sub"); |
|
316 } |
|
317 } |
|
318 |
|
319 lookupOffset = 0; |
|
320 byte[] comboBytes = { |
|
321 (byte)0xe8 , (byte)0xe8, //HALANT + HALANT |
|
322 (byte)0xe8 , (byte)0xe9 //HALANT + NUKTA aka. Soft Halant |
|
323 }; |
|
324 char[] expectCombChars = { |
|
325 '\u094d', |
|
326 '\u200c', |
|
327 '\u094d', |
|
328 '\u200d' |
|
329 }; |
|
330 |
|
331 for (int i=0; i < (comboBytes.length)/2; i++ ) { |
|
332 bytePair[0] = (byte) comboBytes[lookupOffset++]; |
|
333 bytePair[1] = (byte) comboBytes[lookupOffset]; |
|
334 String unicodeStr = new String (bytePair, "ISCII91"); |
|
335 if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1] |
|
336 && unicodeStr.charAt(1) != expectCombChars[lookupOffset]) { |
|
337 throw new Exception("Failed ISCII91 Regression Test"); |
|
338 } |
|
339 lookupOffset++; |
|
340 } |
|
341 |
|
342 } catch (UnsupportedEncodingException e) { |
|
343 System.err.println ("ISCII91 encoding not supported"); |
|
344 throw new Exception ("Failed ISCII91 Regression Test"); |
|
345 } |
|
346 } |
|
347 |
|
348 public static void main (String[] args) throws Exception { |
|
349 test(); |
|
350 } |
|
351 } |