jdk/src/share/classes/sun/io/ByteToCharGB18030.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 /*
       
    27  */
       
    28 
       
    29 package sun.io;
       
    30 
       
    31 import sun.nio.cs.ext.GB18030;
       
    32 
       
    33 public class ByteToCharGB18030 extends ByteToCharGB18030DB {
       
    34 
       
    35     private static final int GB18030_SINGLE_BYTE = 1;
       
    36     private static final int GB18030_DOUBLE_BYTE = 2;
       
    37     private static final int GB18030_FOUR_BYTE = 3;
       
    38     private static short[] decoderIndex1;
       
    39     private static String[] decoderIndex2;
       
    40 
       
    41     private int currentState;
       
    42     private int savedSize;
       
    43     private byte[] savedBytes;
       
    44 
       
    45     public ByteToCharGB18030() {
       
    46         super();
       
    47         GB18030 nioCoder = new GB18030();
       
    48         savedBytes = new byte[3];
       
    49         currentState = GB18030_DOUBLE_BYTE;
       
    50         decoderIndex1 = nioCoder.getDecoderIndex1();
       
    51         decoderIndex2 = nioCoder.getDecoderIndex2();
       
    52         savedSize = 0;
       
    53     }
       
    54 
       
    55     public short[] getOuter() {
       
    56         return(index1);
       
    57     }
       
    58 
       
    59     public String[] getInner() {
       
    60         return(index2);
       
    61     }
       
    62 
       
    63     public short[] getDBIndex1() {
       
    64         return(super.index1);
       
    65     }
       
    66 
       
    67     public String[] getDBIndex2() {
       
    68         return(super.index2);
       
    69     }
       
    70 
       
    71     public int flush(char [] output, int outStart, int outEnd)
       
    72         throws MalformedInputException
       
    73     {
       
    74         if (savedSize != 0) {
       
    75             savedSize = 0;
       
    76             currentState = GB18030_DOUBLE_BYTE;
       
    77             badInputLength = 0;
       
    78             throw new MalformedInputException();
       
    79         }
       
    80         byteOff = charOff = 0;
       
    81         return 0;
       
    82     }
       
    83 
       
    84 
       
    85     /**
       
    86      * Character conversion
       
    87      */
       
    88     public int convert(byte[] input, int inOff, int inEnd,
       
    89                        char[] output, int outOff, int outEnd)
       
    90         throws UnknownCharacterException, MalformedInputException,
       
    91                ConversionBufferFullException
       
    92     {
       
    93         int inputSize = 0;
       
    94         char outputChar = '\uFFFD';
       
    95         // readOff keeps the actual buffer's pointer.
       
    96         // byteOff keeps original buffer's pointer.
       
    97         int readOff = byteOff = inOff;
       
    98 
       
    99         if (savedSize != 0) {
       
   100             // Filter illegal bytes when they are detected in saved
       
   101             // partial input from a previous conversion attempt.
       
   102             if (((savedBytes[0] & 0xFF) < 0x81 || savedBytes[0] > 0xFE) ||
       
   103                  (savedSize > 1 &&
       
   104                  (savedBytes[1] & 0xFF) < 0x30 ) ||
       
   105                  (savedSize > 2 &&
       
   106                  ((savedBytes[2] & 0xFF) < 0x81 ||
       
   107                  (savedBytes[2] & 0xFF) > 0xFE ))) {
       
   108                     badInputLength = 0;
       
   109                     throw new MalformedInputException();
       
   110             }
       
   111 
       
   112             byte[] newBuf = new byte[inEnd - inOff + savedSize];
       
   113             for (int i = 0; i < savedSize; i++) {
       
   114                 newBuf[i] = savedBytes[i];
       
   115             }
       
   116             System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
       
   117             byteOff -= savedSize;
       
   118             input = newBuf;
       
   119             inOff = 0;
       
   120             inEnd = newBuf.length;
       
   121             savedSize = 0;
       
   122         }
       
   123 
       
   124         charOff = outOff;
       
   125         readOff = inOff;
       
   126 
       
   127         while(readOff < inEnd) {
       
   128             int byte1 = 0 , byte2 = 0, byte3 = 0, byte4 = 0;
       
   129 
       
   130             // Is there room in the output buffer for the result?
       
   131             if (charOff >= outEnd) {
       
   132                 throw new ConversionBufferFullException();
       
   133             }
       
   134 
       
   135             // Get the input byte
       
   136             byte1 = input[readOff++] & 0xFF;
       
   137             inputSize = 1;
       
   138 
       
   139             if ((byte1 & (byte)0x80) == 0){ // US-ASCII range
       
   140                 outputChar = (char)byte1;
       
   141                 currentState = GB18030_SINGLE_BYTE;
       
   142             }
       
   143 
       
   144             else if (byte1 < 0x81 || byte1 > 0xfe) {
       
   145                 if (subMode)
       
   146                     outputChar = subChars[0];
       
   147                 else {
       
   148                     badInputLength = 1;
       
   149                     throw new UnknownCharacterException();
       
   150                 }
       
   151             }
       
   152             else {
       
   153                 // Either 2 or 4 byte sequence follows
       
   154                 // If an underrun is detected save for later
       
   155                 // replay.
       
   156 
       
   157                 if (readOff + inputSize > inEnd) {
       
   158                     savedBytes[0]=(byte)byte1;
       
   159                     savedSize = 1;
       
   160                     break;
       
   161                 }
       
   162 
       
   163                 byte2 = input[readOff++] & 0xFF;
       
   164                 inputSize = 2;
       
   165 
       
   166                 if (byte2 < 0x30) {
       
   167                     badInputLength = 1;
       
   168                     throw new MalformedInputException();
       
   169                 }
       
   170                 else if (byte2 >= 0x30 && byte2 <= 0x39) {
       
   171                     currentState = GB18030_FOUR_BYTE;
       
   172                     inputSize = 4;
       
   173 
       
   174                     if (readOff + 2 > inEnd) {
       
   175                         if (readOff + 1 > inEnd) {
       
   176                             savedBytes[0] = (byte)byte1;
       
   177                             savedBytes[1] = (byte)byte2;
       
   178                             savedSize = 2;
       
   179                         }
       
   180                         else {
       
   181                             savedBytes[0] = (byte)byte1;
       
   182                             savedBytes[1] = (byte)byte2;
       
   183                             savedBytes[2] = input[readOff++];
       
   184                             savedSize = 3;
       
   185                         }
       
   186                         break;
       
   187                     }
       
   188                     byte3 = input[readOff++] & 0xFF;
       
   189                     if (byte3 < 0x81 || byte3 > 0xfe) {
       
   190                         badInputLength = 3;
       
   191                         throw new MalformedInputException();
       
   192                     }
       
   193 
       
   194                     byte4 = input[readOff++] & 0xFF;
       
   195 
       
   196                     if (byte4 < 0x30 || byte4 > 0x39) {
       
   197                         badInputLength = 4;
       
   198                         throw new MalformedInputException();
       
   199                     }
       
   200                 }
       
   201                 else if (byte2 == 0x7f || byte2 == 0xff ||
       
   202                         (byte2 < 0x40 )) {
       
   203                    badInputLength = 2;
       
   204                    throw new MalformedInputException();
       
   205                 }
       
   206                 else
       
   207                     currentState = GB18030_DOUBLE_BYTE;
       
   208             }
       
   209 
       
   210             switch (currentState){
       
   211                 case GB18030_SINGLE_BYTE:
       
   212                     output[charOff++] = (char)(byte1);
       
   213                     break;
       
   214                 case GB18030_DOUBLE_BYTE:
       
   215                     output[charOff++] = super.getUnicode(byte1, byte2);
       
   216                     break;
       
   217                 case GB18030_FOUR_BYTE:
       
   218                     int offset = (((byte1 - 0x81) * 10 +
       
   219                                    (byte2 - 0x30)) * 126 +
       
   220                                     byte3 - 0x81) * 10 + byte4 - 0x30;
       
   221                     int hiByte = (offset >>8) & 0xFF;
       
   222                     int lowByte = (offset & 0xFF);
       
   223 
       
   224                 // Mixture of table lookups and algorithmic calculation
       
   225                 // of character values.
       
   226 
       
   227                 // BMP Ranges
       
   228 
       
   229                 if (offset <= 0x4A62)
       
   230                     output[charOff++] = getChar(offset);
       
   231                 else if (offset > 0x4A62 && offset <= 0x82BC)
       
   232                     output[charOff++] = (char) (offset + 0x5543);
       
   233                 else if (offset >= 0x82BD && offset <= 0x830D)
       
   234                     output[charOff++] = getChar(offset);
       
   235                 else if (offset >= 0x830D && offset <= 0x93A8)
       
   236                     output[charOff++] = (char) (offset + 0x6557);
       
   237                 else if (offset >= 0x93A9 && offset <= 0x99FB)
       
   238                     output[charOff++] = getChar(offset);
       
   239                 // Supplemental UCS planes handled via surrogates
       
   240                 else if (offset >= 0x2E248 && offset < 0x12E248) {
       
   241                     if (offset >= 0x12E248) {
       
   242                         if (subMode)
       
   243                            return subChars[0];
       
   244                         else {
       
   245                            badInputLength = 4;
       
   246                            throw new UnknownCharacterException();
       
   247                         }
       
   248                     }
       
   249 
       
   250                     if (charOff +2 > outEnd) {
       
   251                         throw new ConversionBufferFullException();
       
   252                     }
       
   253                     offset -= 0x1e248;
       
   254                     char highSurr = (char) ((offset - 0x10000) / 0x400 + 0xD800);
       
   255                     char lowSurr = (char) ((offset - 0x10000) % 0x400 + 0xDC00);
       
   256                     output[charOff++] = highSurr;
       
   257                     output[charOff++] = lowSurr;
       
   258                 }
       
   259                 else {
       
   260                     badInputLength = 4;
       
   261                     throw new MalformedInputException();
       
   262                     }
       
   263                 break;
       
   264               }
       
   265         byteOff += inputSize;
       
   266         }
       
   267 
       
   268         byteOff += savedSize;
       
   269         return charOff - outOff;
       
   270     }
       
   271 
       
   272     public void reset() {
       
   273         byteOff = charOff = 0;
       
   274         currentState = GB18030_DOUBLE_BYTE;
       
   275         savedSize = 0;
       
   276     }
       
   277 
       
   278     public String getCharacterEncoding() {
       
   279         return "GB18030";
       
   280     }
       
   281 
       
   282     private char getChar(int offset) throws UnknownCharacterException {
       
   283         int byte1 = (offset >>8) & 0xFF;
       
   284         int byte2 = (offset & 0xFF);
       
   285         int start = 0, end = 0xFF;
       
   286 
       
   287         if (((byte1 < 0) || (byte1 > getOuter().length))
       
   288              || ((byte2 < start) || (byte2 > end))) {
       
   289                 if (subMode)
       
   290                    return subChars[0];
       
   291                 else {
       
   292                    badInputLength = 1;
       
   293                    throw new UnknownCharacterException();
       
   294                 }
       
   295         }
       
   296 
       
   297         int n = (decoderIndex1[byte1] & 0xf) * (end - start + 1) + (byte2 - start);
       
   298         return decoderIndex2[decoderIndex1[byte1] >> 4].charAt(n);
       
   299     }
       
   300 }