jdk/src/share/classes/sun/io/ByteToCharGB18030.java
author duke
Sat, 01 Dec 2007 00:00:00 +0000
changeset 2 90ce3da70b43
child 5506 202f599c92aa
permissions -rw-r--r--
Initial load

/*
 * Copyright 2001-2003 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */

/*
 */

package sun.io;

import sun.nio.cs.ext.GB18030;

public class ByteToCharGB18030 extends ByteToCharGB18030DB {

    private static final int GB18030_SINGLE_BYTE = 1;
    private static final int GB18030_DOUBLE_BYTE = 2;
    private static final int GB18030_FOUR_BYTE = 3;
    private static short[] decoderIndex1;
    private static String[] decoderIndex2;

    private int currentState;
    private int savedSize;
    private byte[] savedBytes;

    public ByteToCharGB18030() {
        super();
        GB18030 nioCoder = new GB18030();
        savedBytes = new byte[3];
        currentState = GB18030_DOUBLE_BYTE;
        decoderIndex1 = nioCoder.getDecoderIndex1();
        decoderIndex2 = nioCoder.getDecoderIndex2();
        savedSize = 0;
    }

    public short[] getOuter() {
        return(index1);
    }

    public String[] getInner() {
        return(index2);
    }

    public short[] getDBIndex1() {
        return(super.index1);
    }

    public String[] getDBIndex2() {
        return(super.index2);
    }

    public int flush(char [] output, int outStart, int outEnd)
        throws MalformedInputException
    {
        if (savedSize != 0) {
            savedSize = 0;
            currentState = GB18030_DOUBLE_BYTE;
            badInputLength = 0;
            throw new MalformedInputException();
        }
        byteOff = charOff = 0;
        return 0;
    }


    /**
     * Character conversion
     */
    public int convert(byte[] input, int inOff, int inEnd,
                       char[] output, int outOff, int outEnd)
        throws UnknownCharacterException, MalformedInputException,
               ConversionBufferFullException
    {
        int inputSize = 0;
        char outputChar = '\uFFFD';
        // readOff keeps the actual buffer's pointer.
        // byteOff keeps original buffer's pointer.
        int readOff = byteOff = inOff;

        if (savedSize != 0) {
            // Filter illegal bytes when they are detected in saved
            // partial input from a previous conversion attempt.
            if (((savedBytes[0] & 0xFF) < 0x81 || savedBytes[0] > 0xFE) ||
                 (savedSize > 1 &&
                 (savedBytes[1] & 0xFF) < 0x30 ) ||
                 (savedSize > 2 &&
                 ((savedBytes[2] & 0xFF) < 0x81 ||
                 (savedBytes[2] & 0xFF) > 0xFE ))) {
                    badInputLength = 0;
                    throw new MalformedInputException();
            }

            byte[] newBuf = new byte[inEnd - inOff + savedSize];
            for (int i = 0; i < savedSize; i++) {
                newBuf[i] = savedBytes[i];
            }
            System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
            byteOff -= savedSize;
            input = newBuf;
            inOff = 0;
            inEnd = newBuf.length;
            savedSize = 0;
        }

        charOff = outOff;
        readOff = inOff;

        while(readOff < inEnd) {
            int byte1 = 0 , byte2 = 0, byte3 = 0, byte4 = 0;

            // Is there room in the output buffer for the result?
            if (charOff >= outEnd) {
                throw new ConversionBufferFullException();
            }

            // Get the input byte
            byte1 = input[readOff++] & 0xFF;
            inputSize = 1;

            if ((byte1 & (byte)0x80) == 0){ // US-ASCII range
                outputChar = (char)byte1;
                currentState = GB18030_SINGLE_BYTE;
            }

            else if (byte1 < 0x81 || byte1 > 0xfe) {
                if (subMode)
                    outputChar = subChars[0];
                else {
                    badInputLength = 1;
                    throw new UnknownCharacterException();
                }
            }
            else {
                // Either 2 or 4 byte sequence follows
                // If an underrun is detected save for later
                // replay.

                if (readOff + inputSize > inEnd) {
                    savedBytes[0]=(byte)byte1;
                    savedSize = 1;
                    break;
                }

                byte2 = input[readOff++] & 0xFF;
                inputSize = 2;

                if (byte2 < 0x30) {
                    badInputLength = 1;
                    throw new MalformedInputException();
                }
                else if (byte2 >= 0x30 && byte2 <= 0x39) {
                    currentState = GB18030_FOUR_BYTE;
                    inputSize = 4;

                    if (readOff + 2 > inEnd) {
                        if (readOff + 1 > inEnd) {
                            savedBytes[0] = (byte)byte1;
                            savedBytes[1] = (byte)byte2;
                            savedSize = 2;
                        }
                        else {
                            savedBytes[0] = (byte)byte1;
                            savedBytes[1] = (byte)byte2;
                            savedBytes[2] = input[readOff++];
                            savedSize = 3;
                        }
                        break;
                    }
                    byte3 = input[readOff++] & 0xFF;
                    if (byte3 < 0x81 || byte3 > 0xfe) {
                        badInputLength = 3;
                        throw new MalformedInputException();
                    }

                    byte4 = input[readOff++] & 0xFF;

                    if (byte4 < 0x30 || byte4 > 0x39) {
                        badInputLength = 4;
                        throw new MalformedInputException();
                    }
                }
                else if (byte2 == 0x7f || byte2 == 0xff ||
                        (byte2 < 0x40 )) {
                   badInputLength = 2;
                   throw new MalformedInputException();
                }
                else
                    currentState = GB18030_DOUBLE_BYTE;
            }

            switch (currentState){
                case GB18030_SINGLE_BYTE:
                    output[charOff++] = (char)(byte1);
                    break;
                case GB18030_DOUBLE_BYTE:
                    output[charOff++] = super.getUnicode(byte1, byte2);
                    break;
                case GB18030_FOUR_BYTE:
                    int offset = (((byte1 - 0x81) * 10 +
                                   (byte2 - 0x30)) * 126 +
                                    byte3 - 0x81) * 10 + byte4 - 0x30;
                    int hiByte = (offset >>8) & 0xFF;
                    int lowByte = (offset & 0xFF);

                // Mixture of table lookups and algorithmic calculation
                // of character values.

                // BMP Ranges

                if (offset <= 0x4A62)
                    output[charOff++] = getChar(offset);
                else if (offset > 0x4A62 && offset <= 0x82BC)
                    output[charOff++] = (char) (offset + 0x5543);
                else if (offset >= 0x82BD && offset <= 0x830D)
                    output[charOff++] = getChar(offset);
                else if (offset >= 0x830D && offset <= 0x93A8)
                    output[charOff++] = (char) (offset + 0x6557);
                else if (offset >= 0x93A9 && offset <= 0x99FB)
                    output[charOff++] = getChar(offset);
                // Supplemental UCS planes handled via surrogates
                else if (offset >= 0x2E248 && offset < 0x12E248) {
                    if (offset >= 0x12E248) {
                        if (subMode)
                           return subChars[0];
                        else {
                           badInputLength = 4;
                           throw new UnknownCharacterException();
                        }
                    }

                    if (charOff +2 > outEnd) {
                        throw new ConversionBufferFullException();
                    }
                    offset -= 0x1e248;
                    char highSurr = (char) ((offset - 0x10000) / 0x400 + 0xD800);
                    char lowSurr = (char) ((offset - 0x10000) % 0x400 + 0xDC00);
                    output[charOff++] = highSurr;
                    output[charOff++] = lowSurr;
                }
                else {
                    badInputLength = 4;
                    throw new MalformedInputException();
                    }
                break;
              }
        byteOff += inputSize;
        }

        byteOff += savedSize;
        return charOff - outOff;
    }

    public void reset() {
        byteOff = charOff = 0;
        currentState = GB18030_DOUBLE_BYTE;
        savedSize = 0;
    }

    public String getCharacterEncoding() {
        return "GB18030";
    }

    private char getChar(int offset) throws UnknownCharacterException {
        int byte1 = (offset >>8) & 0xFF;
        int byte2 = (offset & 0xFF);
        int start = 0, end = 0xFF;

        if (((byte1 < 0) || (byte1 > getOuter().length))
             || ((byte2 < start) || (byte2 > end))) {
                if (subMode)
                   return subChars[0];
                else {
                   badInputLength = 1;
                   throw new UnknownCharacterException();
                }
        }

        int n = (decoderIndex1[byte1] & 0xf) * (end - start + 1) + (byte2 - start);
        return decoderIndex2[decoderIndex1[byte1] >> 4].charAt(n);
    }
}