jdk-sandbox: comparison jdk/src/share/classes/sun/io/CharToByteUTF8.java

equal deleted inserted replaced

-:64f7ee2f31dd
+:2f6d68f22eae
-/*
-* Copyright (c) 1996, 1997, Oracle and/or its affiliates. All rights reserved.
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.  Oracle designates this
-* particular file as subject to the "Classpath" exception as provided
-* by Oracle in the LICENSE file that accompanied this code.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*/
-package sun.io;
-/**
-* UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter
-* It's represented like below.
-*
-* # Bits   Bit pattern
-* 1    7   0xxxxxxx
-* 2   11   110xxxxx 10xxxxxx
-* 3   16   1110xxxx 10xxxxxx 10xxxxxx
-* 4   21   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-* 5   26   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-* 6   31   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-*
-*     UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6
-*/
-public class CharToByteUTF8 extends CharToByteConverter {
-private char highHalfZoneCode;
-public int flush(byte[] output, int outStart, int outEnd)
-throws MalformedInputException
-{
-if (highHalfZoneCode != 0) {
-highHalfZoneCode = 0;
-badInputLength = 0;
-throw new MalformedInputException();
-}
-byteOff = charOff = 0;
-return 0;
-}
-/**
-* Character conversion
-*/
-public int convert(char[] input, int inOff, int inEnd,
-byte[] output, int outOff, int outEnd)
-throws ConversionBufferFullException, MalformedInputException
-{
-char inputChar;
-byte[] outputByte = new byte[6];
-int inputSize;
-int outputSize;
-charOff = inOff;
-byteOff = outOff;
-if (highHalfZoneCode != 0) {
-inputChar = highHalfZoneCode;
-highHalfZoneCode = 0;
-if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
-// This is legal UTF16 sequence.
-int ucs4 = (highHalfZoneCode - 0xd800) * 0x400
-+ (input[inOff] - 0xdc00) + 0x10000;
-output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
-output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
-output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
-output[3] = (byte)(0x80 | (ucs4 & 0x3f));
-charOff++;
-highHalfZoneCode = 0;
-} else {
-// This is illegal UTF16 sequence.
-badInputLength = 0;
-throw new MalformedInputException();
-}
-}
-while(charOff < inEnd) {
-inputChar = input[charOff];
-if (inputChar < 0x80) {
-outputByte[0] = (byte)inputChar;
-inputSize = 1;
-outputSize = 1;
-} else if (inputChar < 0x800) {
-outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));
-outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));
-inputSize = 1;
-outputSize = 2;
-} else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
-// this is <high-half zone code> in UTF-16
-if (charOff + 1 >= inEnd) {
-highHalfZoneCode = inputChar;
-break;
-}
-// check next char is valid <low-half zone code>
-char lowChar = input[charOff + 1];
-if (lowChar < 0xdc00 || lowChar > 0xdfff) {
-badInputLength = 1;
-throw new MalformedInputException();
-}
-int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)
-+ 0x10000;
-outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
-outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
-outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
-outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f));
-outputSize = 4;
-inputSize = 2;
-} else {
-outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f);
-outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));
-outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));
-inputSize = 1;
-outputSize = 3;
-}
-if (byteOff + outputSize > outEnd) {
-throw new ConversionBufferFullException();
-}
-for (int i = 0; i < outputSize; i++) {
-output[byteOff++] = outputByte[i];
-}
-charOff += inputSize;
-}
-return byteOff - outOff;
-}
-public boolean canConvert(char ch) {
-return true;
-}
-public int getMaxBytesPerChar() {
-return 3;
-}
-public void reset() {
-byteOff = charOff = 0;
-highHalfZoneCode = 0;
-}
-public String getCharacterEncoding() {
-return "UTF8";
-}
-}

changeset 10372	2f6d68f22eae
parent 10321	64f7ee2f31dd
parent 10371	7da2112e4236
child 10373	d4c5e59b82f8