diff -r 64f7ee2f31dd -r 2f6d68f22eae jdk/src/share/classes/sun/io/CharToByteUTF8.java --- a/jdk/src/share/classes/sun/io/CharToByteUTF8.java Mon Aug 29 14:27:21 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,159 +0,0 @@ -/* - * Copyright (c) 1996, 1997, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package sun.io; - - -/** - * UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter - * It's represented like below. - * - * # Bits Bit pattern - * 1 7 0xxxxxxx - * 2 11 110xxxxx 10xxxxxx - * 3 16 1110xxxx 10xxxxxx 10xxxxxx - * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - * - * UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6 - */ - -public class CharToByteUTF8 extends CharToByteConverter { - - private char highHalfZoneCode; - - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException - { - if (highHalfZoneCode != 0) { - highHalfZoneCode = 0; - badInputLength = 0; - throw new MalformedInputException(); - } - byteOff = charOff = 0; - return 0; - } - - /** - * Character conversion - */ - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws ConversionBufferFullException, MalformedInputException - { - char inputChar; - byte[] outputByte = new byte[6]; - int inputSize; - int outputSize; - - charOff = inOff; - byteOff = outOff; - - if (highHalfZoneCode != 0) { - inputChar = highHalfZoneCode; - highHalfZoneCode = 0; - if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) { - // This is legal UTF16 sequence. - int ucs4 = (highHalfZoneCode - 0xd800) * 0x400 - + (input[inOff] - 0xdc00) + 0x10000; - output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07); - output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f)); - output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f)); - output[3] = (byte)(0x80 | (ucs4 & 0x3f)); - charOff++; - highHalfZoneCode = 0; - } else { - // This is illegal UTF16 sequence. - badInputLength = 0; - throw new MalformedInputException(); - } - } - - while(charOff < inEnd) { - inputChar = input[charOff]; - if (inputChar < 0x80) { - outputByte[0] = (byte)inputChar; - inputSize = 1; - outputSize = 1; - } else if (inputChar < 0x800) { - outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f)); - outputByte[1] = (byte)(0x80 | (inputChar & 0x3f)); - inputSize = 1; - outputSize = 2; - } else if (inputChar >= 0xd800 && inputChar <= 0xdbff) { - // this is in UTF-16 - if (charOff + 1 >= inEnd) { - highHalfZoneCode = inputChar; - break; - } - // check next char is valid - char lowChar = input[charOff + 1]; - if (lowChar < 0xdc00 || lowChar > 0xdfff) { - badInputLength = 1; - throw new MalformedInputException(); - } - int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00) - + 0x10000; - outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07); - outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f)); - outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f)); - outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f)); - outputSize = 4; - inputSize = 2; - } else { - outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f); - outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f)); - outputByte[2] = (byte)(0x80 | (inputChar & 0x3f)); - inputSize = 1; - outputSize = 3; - } - if (byteOff + outputSize > outEnd) { - throw new ConversionBufferFullException(); - } - for (int i = 0; i < outputSize; i++) { - output[byteOff++] = outputByte[i]; - } - charOff += inputSize; - } - return byteOff - outOff; - } - - public boolean canConvert(char ch) { - return true; - } - - public int getMaxBytesPerChar() { - return 3; - } - - public void reset() { - byteOff = charOff = 0; - highHalfZoneCode = 0; - } - - public String getCharacterEncoding() { - return "UTF8"; - } -}