test/jdk/sun/nio/cs/ISCIITest.java
changeset 47216 71c04702a3d5
parent 44115 bb4e971bf5d4
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 /*
       
     2  * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  */
       
    23 
       
    24 /* @test
       
    25  * @bug 4328178
       
    26  * @summary Performs baseline and regression test on the ISCII91 charset
       
    27  * @modules jdk.charsets
       
    28  */
       
    29 
       
    30 import java.io.*;
       
    31 
       
    32 public class ISCIITest {
       
    33 
       
    34     private static void failureReport() {
       
    35         System.err.println ("Failed ISCII91 Regression Test");
       
    36     }
       
    37 
       
    38     private static void mapEquiv(int start,
       
    39                                  int end,
       
    40                                  String testName)
       
    41     throws Exception
       
    42     {
       
    43         byte[] singleByte = new byte[1];
       
    44         byte[] encoded = new byte[1];
       
    45 
       
    46         for (int i = start; i <= end; i++ ) {
       
    47             singleByte[0] = (byte) i;
       
    48             try {
       
    49                 String unicodeStr =
       
    50                         new String (singleByte,"ISCII91");
       
    51 
       
    52                 if (i != (int)unicodeStr.charAt(0)) {
       
    53                     System.err.println ("FAILED ISCII91 Regression test"
       
    54                                         + "input byte is " + i );
       
    55                     throw new Exception("");
       
    56                 }
       
    57                 encoded = unicodeStr.getBytes("ISCII91");
       
    58 
       
    59                 if (encoded[0] != singleByte[0]) {
       
    60                    System.err.println("Encoding error " + testName);
       
    61                    throw new Exception("Failed ISCII91 Regression test");
       
    62                 }
       
    63 
       
    64             } catch (UnsupportedEncodingException e) {
       
    65                 failureReport();
       
    66             }
       
    67         }
       
    68         return;
       
    69     }
       
    70 
       
    71     private static void checkUnmapped(int start,
       
    72                                       int end,
       
    73                                       String testName)
       
    74     throws Exception {
       
    75 
       
    76         byte[] singleByte = new byte[1];
       
    77 
       
    78         for (int i = start; i <= end; i++ ) {
       
    79             singleByte[0] = (byte) i;
       
    80             try {
       
    81                 String unicodeStr = new String (singleByte, "ISCII91");
       
    82 
       
    83                 if (unicodeStr.charAt(0) != '\uFFFD') {
       
    84                     System.err.println("FAILED " + testName +
       
    85                                         "input byte is " + i );
       
    86                     throw new Exception ("Failed ISCII91 regression test");
       
    87                 }
       
    88             } catch (UnsupportedEncodingException e) {
       
    89                 System.err.println("Unsupported character encoding");
       
    90             }
       
    91         }
       
    92         return;
       
    93     }
       
    94 
       
    95     /*
       
    96      *
       
    97      */
       
    98     private static void checkRange(int start, int end,
       
    99                                    char[] expectChars,
       
   100                                    String testName)
       
   101                                    throws Exception {
       
   102         byte[] singleByte = new byte[1];
       
   103         byte[] encoded = new byte[1];
       
   104         int lookupOffset = 0;
       
   105 
       
   106         for (int i=start; i <= end; i++ ) {
       
   107             singleByte[0] = (byte) i;
       
   108             String unicodeStr = new String (singleByte, "ISCII91");
       
   109             if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) {
       
   110                 throw new Exception ("Failed ISCII91 Regression Test");
       
   111             }
       
   112             encoded = unicodeStr.getBytes("ISCII");
       
   113         }
       
   114         return;
       
   115     }
       
   116 
       
   117     /*
       
   118      * Tests the ISCII91 Indic character encoding
       
   119      * as per IS 13194:1991 Bureau of Indian Standards.
       
   120      */
       
   121 
       
   122     private static void test () throws Exception {
       
   123 
       
   124         try {
       
   125 
       
   126 
       
   127             // ISCII91 is an 8-byte encoding which retains the ASCII
       
   128             // mappings in the lower half.
       
   129 
       
   130             mapEquiv(0, 0x7f, "7 bit ASCII range");
       
   131 
       
   132             // Checks a range of characters which are unmappable according
       
   133             // to the standards.
       
   134 
       
   135             checkUnmapped(0x81, 0x9f, "UNMAPPED");
       
   136 
       
   137             // Vowel Modifier chars can be used to modify the vowel
       
   138             // sound of the preceding consonant, vowel or matra character.
       
   139 
       
   140             byte[] testByte = new byte[1];
       
   141             char[] vowelModChars = {
       
   142                 '\u0901', // Vowel modifier Chandrabindu
       
   143                 '\u0902', // Vowel modifier Anuswar
       
   144                 '\u0903'  // Vowel modifier Visarg
       
   145             };
       
   146 
       
   147             checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS");
       
   148 
       
   149             char[] expectChars = {
       
   150                 '\u0905', // a4 -- Vowel A
       
   151                 '\u0906', // a5 -- Vowel AA
       
   152                 '\u0907', // a6 -- Vowel I
       
   153                 '\u0908', // a7 -- Vowel II
       
   154                 '\u0909', // a8 -- Vowel U
       
   155                 '\u090a', // a9 -- Vowel UU
       
   156                 '\u090b', // aa -- Vowel RI
       
   157                 '\u090e', // ab -- Vowel E ( Southern Scripts )
       
   158                 '\u090f', // ac -- Vowel EY
       
   159                 '\u0910', // ad -- Vowel AI
       
   160                 '\u090d', // ae -- Vowel AYE ( Devanagari Script )
       
   161                 '\u0912', // af -- Vowel O ( Southern Scripts )
       
   162                 '\u0913', // b0 -- Vowel OW
       
   163                 '\u0914', // b1 -- Vowel AU
       
   164                 '\u0911', // b2 -- Vowel AWE ( Devanagari Script )
       
   165             };
       
   166 
       
   167             checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS");
       
   168 
       
   169             char[] expectConsChars =
       
   170             {
       
   171                 '\u0915', // b3 -- Consonant KA
       
   172                 '\u0916', // b4 -- Consonant KHA
       
   173                 '\u0917', // b5 -- Consonant GA
       
   174                 '\u0918', // b6 -- Consonant GHA
       
   175                 '\u0919', // b7 -- Consonant NGA
       
   176                 '\u091a', // b8 -- Consonant CHA
       
   177                 '\u091b', // b9 -- Consonant CHHA
       
   178                 '\u091c', // ba -- Consonant JA
       
   179                 '\u091d', // bb -- Consonant JHA
       
   180                 '\u091e', // bc -- Consonant JNA
       
   181                 '\u091f', // bd -- Consonant Hard TA
       
   182                 '\u0920', // be -- Consonant Hard THA
       
   183                 '\u0921', // bf -- Consonant Hard DA
       
   184                 '\u0922', // c0 -- Consonant Hard DHA
       
   185                 '\u0923', // c1 -- Consonant Hard NA
       
   186                 '\u0924', // c2 -- Consonant Soft TA
       
   187                 '\u0925', // c3 -- Consonant Soft THA
       
   188                 '\u0926', // c4 -- Consonant Soft DA
       
   189                 '\u0927', // c5 -- Consonant Soft DHA
       
   190                 '\u0928', // c6 -- Consonant Soft NA
       
   191                 '\u0929', // c7 -- Consonant NA ( Tamil )
       
   192                 '\u092a', // c8 -- Consonant PA
       
   193                 '\u092b', // c9 -- Consonant PHA
       
   194                 '\u092c', // ca -- Consonant BA
       
   195                 '\u092d', // cb -- Consonant BHA
       
   196                 '\u092e', // cc -- Consonant MA
       
   197                 '\u092f', // cd -- Consonant YA
       
   198                 '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya )
       
   199                 '\u0930', // cf -- Consonant RA
       
   200                 '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts )
       
   201                 '\u0932', // d1 -- Consonant LA
       
   202                 '\u0933', // d2 -- Consonant Hard LA
       
   203                 '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam )
       
   204                 '\u0935', // d4 -- Consonant VA
       
   205                 '\u0936', // d5 -- Consonant SHA
       
   206                 '\u0937', // d6 -- Consonant Hard SHA
       
   207                 '\u0938', // d7 -- Consonant SA
       
   208                 '\u0939', // d8 -- Consonant HA
       
   209             };
       
   210 
       
   211             checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS");
       
   212 
       
   213             char[] matraChars = {
       
   214                 '\u093e', // da -- Vowel Sign AA
       
   215                 '\u093f', // db -- Vowel Sign I
       
   216                 '\u0940', // dc -- Vowel Sign II
       
   217                 '\u0941', // dd -- Vowel Sign U
       
   218                 '\u0942', // de -- Vowel Sign UU
       
   219                 '\u0943', // df -- Vowel Sign RI
       
   220                 '\u0946', // e0 -- Vowel Sign E ( Southern Scripts )
       
   221                 '\u0947', // e1 -- Vowel Sign EY
       
   222                 '\u0948', // e2 -- Vowel Sign AI
       
   223                 '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script )
       
   224                 '\u094a', // e4 -- Vowel Sign O ( Southern Scripts )
       
   225                 '\u094b', // e5 -- Vowel Sign OW
       
   226                 '\u094c', // e6 -- Vowel Sign AU
       
   227                 '\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script )
       
   228             };
       
   229 
       
   230             // Matras or Vowel signs alter the implicit
       
   231             // vowel sound associated with an Indic consonant.
       
   232 
       
   233             checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS");
       
   234 
       
   235             char[] loneContextModifierChars = {
       
   236             '\u094d', // e8 -- Vowel Omission Sign ( Halant )
       
   237             '\u093c', // e9 -- Diacritic Sign ( Nukta )
       
   238             '\u0964' // ea -- Full Stop ( Viram, Northern Scripts )
       
   239             };
       
   240 
       
   241             checkRange(0xe8, 0xea,
       
   242                        loneContextModifierChars, "LONE INDIC CONTEXT CHARS");
       
   243 
       
   244 
       
   245             // Test Indic script numeral chars
       
   246             // (as opposed to international numerals)
       
   247 
       
   248             char[] expectNumeralChars =
       
   249             {
       
   250                 '\u0966', // f1 -- Digit 0
       
   251                 '\u0967', // f2 -- Digit 1
       
   252                 '\u0968', // f3 -- Digit 2
       
   253                 '\u0969', // f4 -- Digit 3
       
   254                 '\u096a', // f5 -- Digit 4
       
   255                 '\u096b', // f6 -- Digit 5
       
   256                 '\u096c', // f7 -- Digit 6
       
   257                 '\u096d', // f8 -- Digit 7
       
   258                 '\u096e', // f9 -- Digit 8
       
   259                 '\u096f'  // fa -- Digit 9
       
   260             };
       
   261 
       
   262             checkRange(0xf1, 0xfa,
       
   263                        expectNumeralChars, "NUMERAL/DIGIT CHARACTERS");
       
   264             int lookupOffset = 0;
       
   265 
       
   266             char[] expectNuktaSub = {
       
   267                 '\u0950',
       
   268                 '\u090c',
       
   269                 '\u0961',
       
   270                 '\u0960',
       
   271                 '\u0962',
       
   272                 '\u0963',
       
   273                 '\u0944',
       
   274                 '\u093d'
       
   275             };
       
   276 
       
   277             /*
       
   278              * ISCII uses a number of code extension techniques
       
   279              * to access a number of lesser used characters.
       
   280              * The Nukta character which ordinarily signifies
       
   281              * a diacritic is used in combination with existing
       
   282              * characters to escape them to a different character.
       
   283              * value.
       
   284             */
       
   285 
       
   286             byte[] codeExtensionBytes = {
       
   287                 (byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta
       
   288                                          // =>DEVANAGARI OM SIGN
       
   289                 (byte)0xa6 , (byte)0xe9, // Vowel I + Nukta
       
   290                                          // => DEVANAGARI VOCALIC L
       
   291                 (byte)0xa7 , (byte)0xe9, // Vowel II + Nukta
       
   292                                          // => DEVANAGARI VOCALIC LL
       
   293                 (byte)0xaa , (byte)0xe9, // Vowel RI + Nukta
       
   294                                          // => DEVANAGARI VOCALIC RR
       
   295                 (byte)0xdb , (byte)0xe9, //  Vowel sign I + Nukta
       
   296                                          // => DEVANAGARI VOWEL SIGN VOCALIC L
       
   297                 (byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta
       
   298                                          // => DEVANAGARI VOWEL SIGN VOCALIC LL
       
   299 
       
   300                 (byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta
       
   301                                          // => DEVANAGARI VOWEL SIGN VOCALIC RR
       
   302                 (byte)0xea , (byte)0xe9  // Full stop/Phrase separator + Nukta
       
   303                                          // => DEVANAGARI SIGN AVAGRAHA
       
   304             };
       
   305 
       
   306             lookupOffset = 0;
       
   307             byte[] bytePair = new byte[2];
       
   308 
       
   309             for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) {
       
   310                 bytePair[0] = (byte) codeExtensionBytes[lookupOffset++];
       
   311                 bytePair[1] = (byte) codeExtensionBytes[lookupOffset++];
       
   312 
       
   313                 String unicodeStr = new String (bytePair,"ISCII91");
       
   314                 if (unicodeStr.charAt(0) != expectNuktaSub[i]) {
       
   315                     throw new Exception("Failed Nukta Sub");
       
   316                 }
       
   317             }
       
   318 
       
   319             lookupOffset = 0;
       
   320             byte[] comboBytes = {
       
   321                 (byte)0xe8 , (byte)0xe8, //HALANT + HALANT
       
   322                 (byte)0xe8 , (byte)0xe9  //HALANT + NUKTA    aka. Soft Halant
       
   323             };
       
   324             char[] expectCombChars = {
       
   325                 '\u094d',
       
   326                 '\u200c',
       
   327                 '\u094d',
       
   328                 '\u200d'
       
   329             };
       
   330 
       
   331             for (int i=0; i < (comboBytes.length)/2; i++ ) {
       
   332                 bytePair[0] = (byte) comboBytes[lookupOffset++];
       
   333                 bytePair[1] = (byte) comboBytes[lookupOffset];
       
   334                 String unicodeStr = new String (bytePair, "ISCII91");
       
   335                 if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1]
       
   336                     && unicodeStr.charAt(1) != expectCombChars[lookupOffset]) {
       
   337                     throw new Exception("Failed ISCII91 Regression Test");
       
   338                 }
       
   339                 lookupOffset++;
       
   340             }
       
   341 
       
   342         } catch (UnsupportedEncodingException e) {
       
   343              System.err.println ("ISCII91 encoding not supported");
       
   344              throw new Exception ("Failed ISCII91 Regression Test");
       
   345         }
       
   346     }
       
   347 
       
   348     public static void main (String[] args) throws Exception {
       
   349         test();
       
   350     }
       
   351 }