6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
4891024: EUC-KR and JOHAB converters need to be updated to include two new characters
4287467: Character converter generator tool
Summary: Migrated some of the doublebyte charsets to the new implementation.
Reviewed-by: okutsu
/*
* Copyright 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.io;
/**
* UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
*
* see CharToByteUTF8.java about UTF-8 format
*/
public class ByteToCharUTF8 extends ByteToCharConverter {
private int savedSize;
private byte[] savedBytes;
public ByteToCharUTF8() {
super();
savedSize = 0;
savedBytes = new byte[5];
}
public int flush(char[] output, int outStart, int outEnd)
throws MalformedInputException
{
if (savedSize != 0) {
savedSize = 0;
badInputLength = 0;
throw new MalformedInputException();
}
byteOff = charOff = 0;
return 0;
}
/**
* Character converson
*/
public int convert(byte[] input, int inOff, int inEnd,
char[] output, int outOff, int outEnd)
throws MalformedInputException, ConversionBufferFullException
{
int byte1, byte2, byte3, byte4;
char[] outputChar = new char[2];
int outputSize;
int byteOffAdjustment = 0;
if (savedSize != 0) {
byte[] newBuf;
newBuf = new byte[inEnd - inOff + savedSize];
for (int i = 0; i < savedSize; i++) {
newBuf[i] = savedBytes[i];
}
System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
input = newBuf;
inOff = 0;
inEnd = newBuf.length;
byteOffAdjustment = -savedSize;
savedSize = 0;
}
charOff = outOff;
byteOff = inOff;
int startByteOff;
while(byteOff < inEnd) {
startByteOff = byteOff;
byte1 = input[byteOff++] & 0xff;
if ((byte1 & 0x80) == 0){
outputChar[0] = (char)byte1;
outputSize = 1;
} else if ((byte1 & 0xe0) == 0xc0) {
if (byteOff >= inEnd) {
savedSize = 1;
savedBytes[0] = (byte)byte1;
break;
}
byte2 = input[byteOff++] & 0xff;
if ((byte2 & 0xc0) != 0x80) {
badInputLength = 2;
byteOff += byteOffAdjustment;
throw new MalformedInputException();
}
outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
outputSize = 1;
} else if ((byte1 & 0xf0) == 0xe0){
if (byteOff + 1 >= inEnd) {
savedBytes[0] = (byte)byte1;
if (byteOff >= inEnd) {
savedSize = 1;
} else {
savedSize = 2;
savedBytes[1] = (byte)input[byteOff++];
}
break;
}
byte2 = input[byteOff++] & 0xff;
byte3 = input[byteOff++] & 0xff;
if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
badInputLength = 3;
byteOff += byteOffAdjustment;
throw new MalformedInputException();
}
outputChar[0] = (char)(((byte1 & 0x0f) << 12)
| ((byte2 & 0x3f) << 6)
| (byte3 & 0x3f));
outputSize = 1;
} else if ((byte1 & 0xf8) == 0xf0) {
if (byteOff + 2 >= inEnd) {
savedBytes[0] = (byte)byte1;
if (byteOff >= inEnd) {
savedSize = 1;
} else if (byteOff + 1 >= inEnd) {
savedSize = 2;
savedBytes[1] = (byte)input[byteOff++];
} else {
savedSize = 3;
savedBytes[1] = (byte)input[byteOff++];
savedBytes[2] = (byte)input[byteOff++];
}
break;
}
byte2 = input[byteOff++] & 0xff;
byte3 = input[byteOff++] & 0xff;
byte4 = input[byteOff++] & 0xff;
if ((byte2 & 0xc0) != 0x80 ||
(byte3 & 0xc0) != 0x80 ||
(byte4 & 0xc0) != 0x80) {
badInputLength = 4;
byteOff += byteOffAdjustment;
throw new MalformedInputException();
}
// this byte sequence is UTF16 character
int ucs4 = (int)(0x07 & byte1) << 18 |
(int)(0x3f & byte2) << 12 |
(int)(0x3f & byte3) << 6 |
(int)(0x3f & byte4);
outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
outputSize = 2;
} else {
badInputLength = 1;
byteOff += byteOffAdjustment;
throw new MalformedInputException();
}
if (charOff + outputSize > outEnd) {
byteOff = startByteOff;
byteOff += byteOffAdjustment;
throw new ConversionBufferFullException();
}
for (int i = 0; i < outputSize; i++) {
output[charOff + i] = outputChar[i];
}
charOff += outputSize;
}
byteOff += byteOffAdjustment;
return charOff - outOff;
}
/*
* Return the character set id
*/
public String getCharacterEncoding() {
return "UTF8";
}
/*
* Reset after finding bad input
*/
public void reset() {
byteOff = charOff = 0;
savedSize = 0;
}
}