jdk/src/share/classes/sun/io/CharToByteGB18030.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 /*
       
    27  */
       
    28 
       
    29 
       
    30 package sun.io;
       
    31 import sun.nio.cs.ext.GB18030;
       
    32 
       
    33 public class CharToByteGB18030 extends CharToByteConverter
       
    34 {
       
    35 
       
    36     private char highHalfZoneCode;
       
    37     boolean flushed = true;
       
    38 
       
    39     private final static int GB18030_SINGLE_BYTE = 1;
       
    40     private final static int GB18030_DOUBLE_BYTE = 2;
       
    41     private final static int GB18030_FOUR_BYTE = 3;
       
    42     private static short[] index1;
       
    43     private static String[] index2;
       
    44     private int currentState;
       
    45 
       
    46     public CharToByteGB18030() {
       
    47         GB18030 nioCoder = new GB18030();
       
    48         currentState = GB18030_DOUBLE_BYTE;
       
    49         subBytes = new byte[1];
       
    50         subBytes[0] = (byte)'?';
       
    51         index1 = nioCoder.getEncoderIndex1();
       
    52         index2 = nioCoder.getEncoderIndex2();
       
    53     }
       
    54 
       
    55     public int flush(byte[] output, int outStart, int outEnd)
       
    56         throws MalformedInputException
       
    57     {
       
    58         if (highHalfZoneCode != 0) {
       
    59             highHalfZoneCode = 0;
       
    60             badInputLength = 0;
       
    61             throw new MalformedInputException();
       
    62         }
       
    63         reset();
       
    64         flushed = true;
       
    65         return 0;
       
    66     }
       
    67 
       
    68     public void reset() {
       
    69         byteOff = charOff = 0;
       
    70         currentState = GB18030_DOUBLE_BYTE;
       
    71     }
       
    72 
       
    73     public boolean canConvert(char c) {
       
    74         // converts all but unpaired surrogates
       
    75         // and illegal chars, U+FFFE & U+FFFF
       
    76 
       
    77         if ((c >= 0xd800 && c <=0xdfff) || (c >= 0xfffe))
       
    78             return false;
       
    79         else
       
    80             return true;
       
    81     }
       
    82 
       
    83     /**
       
    84      * Character conversion
       
    85      */
       
    86     public int convert(char[] input, int inOff, int inEnd,
       
    87                        byte[] output, int outOff, int outEnd)
       
    88         throws UnknownCharacterException, MalformedInputException,
       
    89                ConversionBufferFullException
       
    90     {
       
    91         int linearDiffValue = 0;
       
    92         int hiByte = 0 , loByte = 0;  // low and high order bytes
       
    93         char inputChar;  // Input character to be converted
       
    94         charOff = inOff;
       
    95         byteOff = outOff;
       
    96         int inputSize;  // Size of the input
       
    97         int outputSize; // Size of the output
       
    98 
       
    99         flushed = false;
       
   100 
       
   101         if (highHalfZoneCode != 0) {
       
   102             if (input[inOff] >= 0xDC00 && input[inOff] <= 0xDFFF) {
       
   103 
       
   104                 // This is legal UTF16 sequence, so shunt in the high
       
   105                 // surrogate for conversion by convert() loop.
       
   106 
       
   107                 char[] newBuf = new char[inEnd - inOff + 1];
       
   108                 newBuf[0] = highHalfZoneCode;
       
   109                 System.arraycopy(input, inOff, newBuf, 1, inEnd - inOff);
       
   110                 charOff -= 1;
       
   111                 input = newBuf;
       
   112                 inOff = 0;
       
   113                 inEnd = newBuf.length;
       
   114                 highHalfZoneCode = 0;
       
   115             } else {
       
   116                 // This is illegal UTF16 sequence.
       
   117                 badInputLength = 0;
       
   118                 throw new MalformedInputException();
       
   119             }
       
   120         }
       
   121 
       
   122         // Main encode loop
       
   123 
       
   124         while (charOff < inEnd) {
       
   125             inputChar = input[charOff++];
       
   126 
       
   127             if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
       
   128                 // Is this the last character of the input?
       
   129                 if (charOff + 1 > inEnd) {
       
   130                     highHalfZoneCode = inputChar;
       
   131                     break;
       
   132                 }
       
   133 
       
   134                 char previousChar = inputChar;
       
   135                 inputChar = input[charOff];
       
   136 
       
   137                 // Is there a low surrogate following?
       
   138                 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   139                     inputSize = 2;
       
   140                     charOff++;
       
   141                     linearDiffValue = ( previousChar - 0xD800) * 0x400 +
       
   142                                 ( inputChar - 0xDC00) + 0x2E248;
       
   143 
       
   144                     currentState = GB18030_FOUR_BYTE;
       
   145                 } else {
       
   146                     // We have a malformed surrogate pair
       
   147                     badInputLength = 1;
       
   148                     throw new MalformedInputException();
       
   149                 }
       
   150             }
       
   151             // Is this an unaccompanied low surrogate?
       
   152             else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   153                 badInputLength = 1;
       
   154                 throw new MalformedInputException();
       
   155             }
       
   156 
       
   157             // Not part of a surrogate
       
   158             else if (inputChar >= 0x0000 && inputChar <= 0x007F) {
       
   159                 if (byteOff >= outEnd) {
       
   160                    throw new ConversionBufferFullException();
       
   161                 }
       
   162                 currentState = GB18030_SINGLE_BYTE;
       
   163                 output[byteOff++] = (byte) inputChar;
       
   164             }
       
   165             else if (inputChar <= 0xA4C6 || inputChar >= 0xE000) {
       
   166                 int outByteVal = getGB18030(index1, index2, inputChar);
       
   167 
       
   168                 if (outByteVal == 0xFFFD ) {
       
   169                     if (subMode) {
       
   170                         if (byteOff >= outEnd) {
       
   171                            throw new ConversionBufferFullException();
       
   172                         } else {
       
   173                             output[byteOff++] = subBytes[0];
       
   174                             continue;
       
   175                         }
       
   176                     } else {
       
   177                         badInputLength = 1;
       
   178                         throw new UnknownCharacterException();
       
   179                     }
       
   180                 }
       
   181 
       
   182                 hiByte = (outByteVal & 0xFF00) >> 8;
       
   183                 loByte = (outByteVal & 0xFF);
       
   184 
       
   185                 linearDiffValue = (hiByte - 0x20) * 256 + loByte;
       
   186 
       
   187                 if (inputChar >= 0xE000 && inputChar < 0xF900)
       
   188                         linearDiffValue += 0x82BD;
       
   189                 else if (inputChar >= 0xF900)
       
   190                         linearDiffValue += 0x93A9;
       
   191 
       
   192                 if (hiByte > 0x80)
       
   193                      currentState = GB18030_DOUBLE_BYTE;
       
   194                 else
       
   195                      currentState = GB18030_FOUR_BYTE;
       
   196             }
       
   197             else if (inputChar >= 0xA4C7 && inputChar <= 0xD7FF) {
       
   198                 linearDiffValue = inputChar - 0x5543;
       
   199                 currentState = GB18030_FOUR_BYTE;
       
   200             }
       
   201             else {
       
   202                 badInputLength = 1;
       
   203                 throw new MalformedInputException();
       
   204             }
       
   205 
       
   206             if (currentState == GB18030_SINGLE_BYTE)
       
   207                 continue;
       
   208 
       
   209             if (currentState == GB18030_DOUBLE_BYTE) {
       
   210                 if (byteOff + 2 > outEnd) {
       
   211                     throw new ConversionBufferFullException();
       
   212                 }
       
   213                 output[byteOff++] = (byte)hiByte;
       
   214                 output[byteOff++] = (byte)loByte;
       
   215             }
       
   216             else { // Four Byte encoding
       
   217                 if (byteOff + 4 > outEnd) {
       
   218                     throw new ConversionBufferFullException();
       
   219                 }
       
   220 
       
   221                 byte b1, b2, b3, b4;
       
   222 
       
   223                 b4 = (byte)((linearDiffValue % 10) + 0x30);
       
   224                 linearDiffValue /= 10;
       
   225                 b3 = (byte)((linearDiffValue % 126) + 0x81);
       
   226                 linearDiffValue /= 126;
       
   227                 b2 = (byte)((linearDiffValue % 10) + 0x30);
       
   228                 b1 = (byte)((linearDiffValue / 10) + 0x81);
       
   229                 output[byteOff++] = b1;
       
   230                 output[byteOff++] = b2;
       
   231                 output[byteOff++] = b3;
       
   232                 output[byteOff++] = b4;
       
   233             }
       
   234         }
       
   235         // Return number of bytes written to the output buffer.
       
   236         return byteOff - outOff;
       
   237     }
       
   238 
       
   239 
       
   240     /**
       
   241      * returns the maximum number of bytes needed to convert a char
       
   242      */
       
   243     public int getMaxBytesPerChar() {
       
   244         return 4;
       
   245     }
       
   246 
       
   247 
       
   248     /**
       
   249      * Return the character set ID
       
   250      */
       
   251     public String getCharacterEncoding() {
       
   252         return "GB18030";
       
   253     }
       
   254 
       
   255     private int getGB18030(short[] outerIndex, String[] innerIndex, char ch) {
       
   256         int offset = outerIndex[((ch & 0xff00) >> 8 )] << 8;
       
   257 
       
   258         return innerIndex[offset >> 12].charAt((offset & 0xfff) + (ch & 0xff));
       
   259     }
       
   260 
       
   261 }