jdk/src/share/classes/sun/io/ByteToCharISCII91.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1999, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.io;
       
    27 
       
    28 import sun.nio.cs.ext.ISCII91;
       
    29 
       
    30 /*
       
    31  * Copyright (c) 1998 International Business Machines.
       
    32  * All Rights Reserved.
       
    33  *
       
    34  * Author : Sunanda Bera, C. Thirumalesh
       
    35  * Last Modified : 23,November,1998
       
    36  *
       
    37  * Purpose : Defines class ByteToCharISCII91.
       
    38  *
       
    39  *
       
    40  * Revision History
       
    41  * ======== =======
       
    42  *
       
    43  * Date        By            Description
       
    44  * ----        --            -----------
       
    45  *
       
    46  *
       
    47  */
       
    48 
       
    49 /**
       
    50  * Converter class. Converts between Unicode encoding and ISCII91 encoding.
       
    51  * ISCII91 is the character encoding as defined in Indian Standard document
       
    52  * IS 13194:1991 ( Indian Script Code for Information Interchange ).
       
    53  *
       
    54  * @see sun.io.ByteToCharConverter
       
    55  */
       
    56 public class ByteToCharISCII91 extends ByteToCharConverter {
       
    57 
       
    58     private static final char[] directMapTable = ISCII91.getDirectMapTable();
       
    59 
       
    60     private static final char NUKTA_CHAR = '\u093c';
       
    61     private static final char HALANT_CHAR = '\u094d';
       
    62     private static final char ZWNJ_CHAR = '\u200c';
       
    63     private static final char ZWJ_CHAR = '\u200d';
       
    64     private static final char INVALID_CHAR = '\uffff';
       
    65 
       
    66     private char contextChar = INVALID_CHAR;
       
    67     private boolean needFlushing = false;
       
    68 
       
    69 /**
       
    70  * Converts ISCII91 characters to Unicode.
       
    71  * @see sun.io.ByteToCharConverter#convert
       
    72  */
       
    73     public int convert(byte input[], int inStart, int inEnd,
       
    74                         char output[], int outStart, int outEnd)
       
    75     throws ConversionBufferFullException, UnknownCharacterException {
       
    76         /*Rules:
       
    77          * 1)ATR,EXT,following character to be replaced with '\ufffd'
       
    78          * 2)Halant + Halant => '\u094d' (Virama) + '\u200c'(ZWNJ)
       
    79          * 3)Halant + Nukta => '\u094d' (Virama) + '\u200d'(ZWJ)
       
    80          */
       
    81         charOff = outStart;
       
    82         byteOff = inStart;
       
    83         while (byteOff < inEnd) {
       
    84             if (charOff >= outEnd) {
       
    85                 throw new ConversionBufferFullException();
       
    86             }
       
    87             int index = input[byteOff++];
       
    88             index = ( index < 0 )? ( index + 255 ):index;
       
    89             char currentChar = directMapTable[index];
       
    90 
       
    91             // if the contextChar is either ATR || EXT set the output to '\ufffd'
       
    92             if(contextChar == '\ufffd') {
       
    93                 output[charOff++] = '\ufffd';
       
    94                 contextChar = INVALID_CHAR;
       
    95                 needFlushing = false;
       
    96                 continue;
       
    97             }
       
    98 
       
    99             switch(currentChar) {
       
   100             case '\u0901':
       
   101             case '\u0907':
       
   102             case '\u0908':
       
   103             case '\u090b':
       
   104             case '\u093f':
       
   105             case '\u0940':
       
   106             case '\u0943':
       
   107             case '\u0964':
       
   108                 if(needFlushing) {
       
   109                     output[charOff++] = contextChar;
       
   110                     contextChar = currentChar;
       
   111                     continue;
       
   112                 }
       
   113                 contextChar = currentChar;
       
   114                 needFlushing = true;
       
   115                 continue;
       
   116             case NUKTA_CHAR:
       
   117                 switch(contextChar) {
       
   118                 case '\u0901':
       
   119                     output[charOff] = '\u0950';
       
   120                     break;
       
   121                 case '\u0907':
       
   122                     output[charOff] = '\u090c';
       
   123                     break;
       
   124                 case '\u0908':
       
   125                     output[charOff] = '\u0961';
       
   126                     break;
       
   127                 case '\u090b':
       
   128                     output[charOff] = '\u0960';
       
   129                     break;
       
   130                 case '\u093f':
       
   131                     output[charOff] = '\u0962';
       
   132                     break;
       
   133                 case '\u0940':
       
   134                     output[charOff] = '\u0963';
       
   135                     break;
       
   136                 case '\u0943':
       
   137                     output[charOff] = '\u0944';
       
   138                     break;
       
   139                 case '\u0964':
       
   140                     output[charOff] = '\u093d';
       
   141                     break;
       
   142                 case HALANT_CHAR:
       
   143                     if(needFlushing) {
       
   144                         output[charOff++] = contextChar;
       
   145                         contextChar = currentChar;
       
   146                         continue;
       
   147                     }
       
   148                     output[charOff] = ZWJ_CHAR;
       
   149                     break;
       
   150                 default:
       
   151                     if(needFlushing) {
       
   152                         output[charOff++] = contextChar;
       
   153                         contextChar = currentChar;
       
   154                         continue;
       
   155                     }
       
   156                     output[charOff] = NUKTA_CHAR;
       
   157                 }
       
   158                 break;
       
   159             case HALANT_CHAR:
       
   160                 if(needFlushing) {
       
   161                     output[charOff++] = contextChar;
       
   162                     contextChar = currentChar;
       
   163                     continue;
       
   164                 }
       
   165                 if(contextChar == HALANT_CHAR) {
       
   166                     output[charOff] = ZWNJ_CHAR;
       
   167                     break;
       
   168                 }
       
   169                 output[charOff] = HALANT_CHAR;
       
   170                 break;
       
   171             case INVALID_CHAR:
       
   172                 if(needFlushing) {
       
   173                     output[charOff++] = contextChar;
       
   174                     contextChar = currentChar;
       
   175                     continue;
       
   176                 }
       
   177                 if(subMode) {
       
   178                     output[charOff] = subChars[0];
       
   179                     break;
       
   180                 } else {
       
   181                     contextChar = INVALID_CHAR;
       
   182                     throw new UnknownCharacterException();
       
   183                 }
       
   184             default:
       
   185                 if(needFlushing) {
       
   186                     output[charOff++] = contextChar;
       
   187                     contextChar = currentChar;
       
   188                     continue;
       
   189                 }
       
   190                 output[charOff] = currentChar;
       
   191                 break;
       
   192         }//end switch
       
   193 
       
   194         contextChar = currentChar;
       
   195         needFlushing = false;
       
   196         charOff++;
       
   197         }//end while
       
   198         return charOff - outStart;
       
   199     } //convert()
       
   200 
       
   201 /**
       
   202  * @see sun.io.ByteToCharConverter#flush
       
   203  */
       
   204     public  int flush( char[] output, int outStart, int outEnd )
       
   205     throws MalformedInputException, ConversionBufferFullException
       
   206     {
       
   207         int charsWritten = 0;
       
   208         //if the last char was not flushed, flush it!
       
   209         if(needFlushing) {
       
   210             output[outStart] = contextChar;
       
   211             charsWritten = 1;
       
   212         }
       
   213         contextChar = INVALID_CHAR;
       
   214         needFlushing = false;
       
   215         byteOff = charOff = 0;
       
   216         return charsWritten;
       
   217     }//flush()
       
   218 /**
       
   219  * Returns the character set id for the conversion.
       
   220  */
       
   221     public String getCharacterEncoding()
       
   222     {
       
   223         return "ISCII91";
       
   224     }//getCharacterEncoding()
       
   225 /**
       
   226  * @see sun.io.ByteToCharConverter#reset
       
   227  */
       
   228     public void reset()
       
   229     {
       
   230         byteOff = charOff = 0;
       
   231     }//reset()
       
   232 
       
   233 }//end of class definition