jdk/src/share/classes/sun/io/ByteToCharUTF8.java
author sherman
Fri, 19 Jun 2009 14:39:06 -0700
changeset 3052 5c9886498f31
parent 2 90ce3da70b43
child 4818 fd477db6c4ee
permissions -rw-r--r--
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding 4891024: EUC-KR and JOHAB converters need to be updated to include two new characters 4287467: Character converter generator tool Summary: Migrated some of the doublebyte charsets to the new implementation. Reviewed-by: okutsu

/*
 * Copyright 1996-1997 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
package sun.io;


/**
 * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
 *
 * see CharToByteUTF8.java about UTF-8 format
 */

public class ByteToCharUTF8 extends ByteToCharConverter {

    private int savedSize;
    private byte[] savedBytes;

    public ByteToCharUTF8() {
        super();
        savedSize = 0;
        savedBytes = new byte[5];
    }

    public int flush(char[] output, int outStart, int outEnd)
        throws MalformedInputException
    {
        if (savedSize != 0) {
            savedSize = 0;
            badInputLength = 0;
            throw new MalformedInputException();
        }
        byteOff = charOff = 0;
        return 0;
    }

    /**
     * Character converson
     */
    public int convert(byte[] input, int inOff, int inEnd,
                       char[] output, int outOff, int outEnd)
        throws MalformedInputException, ConversionBufferFullException
    {
        int byte1, byte2, byte3, byte4;
        char[] outputChar = new char[2];
        int outputSize;
        int byteOffAdjustment = 0;

        if (savedSize != 0) {
            byte[] newBuf;
            newBuf = new byte[inEnd - inOff + savedSize];
            for (int i = 0; i < savedSize; i++) {
                newBuf[i] = savedBytes[i];
            }
            System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
            input = newBuf;
            inOff = 0;
            inEnd = newBuf.length;
            byteOffAdjustment = -savedSize;
            savedSize = 0;
        }

        charOff = outOff;
        byteOff = inOff;
        int startByteOff;

        while(byteOff < inEnd) {

            startByteOff = byteOff;
            byte1 = input[byteOff++] & 0xff;

            if ((byte1 & 0x80) == 0){
                outputChar[0] = (char)byte1;
                outputSize = 1;
            } else if ((byte1 & 0xe0) == 0xc0) {
                if (byteOff >= inEnd) {
                    savedSize = 1;
                    savedBytes[0] = (byte)byte1;
                    break;
                }
                byte2 = input[byteOff++] & 0xff;
                if ((byte2 & 0xc0) != 0x80) {
                    badInputLength = 2;
                    byteOff += byteOffAdjustment;
                    throw new MalformedInputException();
                }
                outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
                outputSize = 1;
            } else if ((byte1 & 0xf0) == 0xe0){
                if (byteOff + 1 >= inEnd) {
                        savedBytes[0] = (byte)byte1;
                    if (byteOff >= inEnd) {
                        savedSize = 1;
                    } else {
                        savedSize = 2;
                        savedBytes[1] = (byte)input[byteOff++];
                    }
                    break;
                }
                byte2 = input[byteOff++] & 0xff;
                byte3 = input[byteOff++] & 0xff;
                if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
                    badInputLength = 3;
                    byteOff += byteOffAdjustment;
                    throw new MalformedInputException();
                }
                outputChar[0] = (char)(((byte1 & 0x0f) << 12)
                                       | ((byte2 & 0x3f) << 6)
                                       | (byte3 & 0x3f));
                outputSize = 1;
            } else if ((byte1 & 0xf8) == 0xf0) {
                if (byteOff + 2 >= inEnd) {
                    savedBytes[0] = (byte)byte1;
                    if (byteOff >= inEnd) {
                        savedSize = 1;
                    } else if (byteOff + 1 >= inEnd) {
                        savedSize = 2;
                        savedBytes[1] = (byte)input[byteOff++];
                    } else {
                        savedSize = 3;
                        savedBytes[1] = (byte)input[byteOff++];
                        savedBytes[2] = (byte)input[byteOff++];
                    }
                    break;
                }
                byte2 = input[byteOff++] & 0xff;
                byte3 = input[byteOff++] & 0xff;
                byte4 = input[byteOff++] & 0xff;
                if ((byte2 & 0xc0) != 0x80 ||
                    (byte3 & 0xc0) != 0x80 ||
                    (byte4 & 0xc0) != 0x80) {
                    badInputLength = 4;
                    byteOff += byteOffAdjustment;
                    throw new MalformedInputException();
                }
                // this byte sequence is UTF16 character
                int ucs4 = (int)(0x07 & byte1) << 18 |
                           (int)(0x3f & byte2) << 12 |
                           (int)(0x3f & byte3) <<  6 |
                           (int)(0x3f & byte4);
                outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
                outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
                outputSize = 2;
            } else {
                badInputLength = 1;
                byteOff += byteOffAdjustment;
                throw new MalformedInputException();
            }

            if (charOff + outputSize > outEnd) {
                byteOff = startByteOff;
                byteOff += byteOffAdjustment;
                throw new ConversionBufferFullException();
            }

            for (int i = 0; i < outputSize; i++) {
                output[charOff + i] = outputChar[i];
            }
            charOff += outputSize;
        }

        byteOff += byteOffAdjustment;
        return charOff - outOff;
    }

    /*
     * Return the character set id
     */
    public String getCharacterEncoding() {
        return "UTF8";
    }

    /*
     *   Reset after finding bad input
     */
    public void reset() {
        byteOff = charOff = 0;
        savedSize = 0;
    }
}