jdk-sandbox: jdk/src/share/classes/sun/io/CharToByteUTF8.java@90ce3da70b43 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
90ce3da70b43 Initial load duke parents: diff changeset	2	* Copyright 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load duke parents: diff changeset	7	* published by the Free Software Foundation. Sun designates this
90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load duke parents: diff changeset	9	* by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
90ce3da70b43 Initial load duke parents: diff changeset	21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load duke parents: diff changeset	22	* CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load duke parents: diff changeset	23	* have any questions.
90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25	package sun.io;
90ce3da70b43 Initial load duke parents: diff changeset	26
90ce3da70b43 Initial load duke parents: diff changeset	27
90ce3da70b43 Initial load duke parents: diff changeset	28	/**
90ce3da70b43 Initial load duke parents: diff changeset	29	* UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter
90ce3da70b43 Initial load duke parents: diff changeset	30	* It's represented like below.
90ce3da70b43 Initial load duke parents: diff changeset	31	*
90ce3da70b43 Initial load duke parents: diff changeset	32	* # Bits Bit pattern
90ce3da70b43 Initial load duke parents: diff changeset	33	* 1 7 0xxxxxxx
90ce3da70b43 Initial load duke parents: diff changeset	34	* 2 11 110xxxxx 10xxxxxx
90ce3da70b43 Initial load duke parents: diff changeset	35	* 3 16 1110xxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load duke parents: diff changeset	36	* 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load duke parents: diff changeset	37	* 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load duke parents: diff changeset	38	* 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load duke parents: diff changeset	39	*
90ce3da70b43 Initial load duke parents: diff changeset	40	* UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6
90ce3da70b43 Initial load duke parents: diff changeset	41	*/
90ce3da70b43 Initial load duke parents: diff changeset	42
90ce3da70b43 Initial load duke parents: diff changeset	43	public class CharToByteUTF8 extends CharToByteConverter {
90ce3da70b43 Initial load duke parents: diff changeset	44
90ce3da70b43 Initial load duke parents: diff changeset	45	private char highHalfZoneCode;
90ce3da70b43 Initial load duke parents: diff changeset	46
90ce3da70b43 Initial load duke parents: diff changeset	47	public int flush(byte[] output, int outStart, int outEnd)
90ce3da70b43 Initial load duke parents: diff changeset	48	throws MalformedInputException
90ce3da70b43 Initial load duke parents: diff changeset	49	{
90ce3da70b43 Initial load duke parents: diff changeset	50	if (highHalfZoneCode != 0) {
90ce3da70b43 Initial load duke parents: diff changeset	51	highHalfZoneCode = 0;
90ce3da70b43 Initial load duke parents: diff changeset	52	badInputLength = 0;
90ce3da70b43 Initial load duke parents: diff changeset	53	throw new MalformedInputException();
90ce3da70b43 Initial load duke parents: diff changeset	54	}
90ce3da70b43 Initial load duke parents: diff changeset	55	byteOff = charOff = 0;
90ce3da70b43 Initial load duke parents: diff changeset	56	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	57	}
90ce3da70b43 Initial load duke parents: diff changeset	58
90ce3da70b43 Initial load duke parents: diff changeset	59	/**
90ce3da70b43 Initial load duke parents: diff changeset	60	* Character conversion
90ce3da70b43 Initial load duke parents: diff changeset	61	*/
90ce3da70b43 Initial load duke parents: diff changeset	62	public int convert(char[] input, int inOff, int inEnd,
90ce3da70b43 Initial load duke parents: diff changeset	63	byte[] output, int outOff, int outEnd)
90ce3da70b43 Initial load duke parents: diff changeset	64	throws ConversionBufferFullException, MalformedInputException
90ce3da70b43 Initial load duke parents: diff changeset	65	{
90ce3da70b43 Initial load duke parents: diff changeset	66	char inputChar;
90ce3da70b43 Initial load duke parents: diff changeset	67	byte[] outputByte = new byte[6];
90ce3da70b43 Initial load duke parents: diff changeset	68	int inputSize;
90ce3da70b43 Initial load duke parents: diff changeset	69	int outputSize;
90ce3da70b43 Initial load duke parents: diff changeset	70
90ce3da70b43 Initial load duke parents: diff changeset	71	charOff = inOff;
90ce3da70b43 Initial load duke parents: diff changeset	72	byteOff = outOff;
90ce3da70b43 Initial load duke parents: diff changeset	73
90ce3da70b43 Initial load duke parents: diff changeset	74	if (highHalfZoneCode != 0) {
90ce3da70b43 Initial load duke parents: diff changeset	75	inputChar = highHalfZoneCode;
90ce3da70b43 Initial load duke parents: diff changeset	76	highHalfZoneCode = 0;
90ce3da70b43 Initial load duke parents: diff changeset	77	if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
90ce3da70b43 Initial load duke parents: diff changeset	78	// This is legal UTF16 sequence.
90ce3da70b43 Initial load duke parents: diff changeset	79	int ucs4 = (highHalfZoneCode - 0xd800) * 0x400
90ce3da70b43 Initial load duke parents: diff changeset	80	+ (input[inOff] - 0xdc00) + 0x10000;
90ce3da70b43 Initial load duke parents: diff changeset	81	output[0] = (byte)(0xf0 \| ((ucs4 >> 18)) & 0x07);
90ce3da70b43 Initial load duke parents: diff changeset	82	output[1] = (byte)(0x80 \| ((ucs4 >> 12) & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	83	output[2] = (byte)(0x80 \| ((ucs4 >> 6) & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	84	output[3] = (byte)(0x80 \| (ucs4 & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	85	charOff++;
90ce3da70b43 Initial load duke parents: diff changeset	86	highHalfZoneCode = 0;
90ce3da70b43 Initial load duke parents: diff changeset	87	} else {
90ce3da70b43 Initial load duke parents: diff changeset	88	// This is illegal UTF16 sequence.
90ce3da70b43 Initial load duke parents: diff changeset	89	badInputLength = 0;
90ce3da70b43 Initial load duke parents: diff changeset	90	throw new MalformedInputException();
90ce3da70b43 Initial load duke parents: diff changeset	91	}
90ce3da70b43 Initial load duke parents: diff changeset	92	}
90ce3da70b43 Initial load duke parents: diff changeset	93
90ce3da70b43 Initial load duke parents: diff changeset	94	while(charOff < inEnd) {
90ce3da70b43 Initial load duke parents: diff changeset	95	inputChar = input[charOff];
90ce3da70b43 Initial load duke parents: diff changeset	96	if (inputChar < 0x80) {
90ce3da70b43 Initial load duke parents: diff changeset	97	outputByte[0] = (byte)inputChar;
90ce3da70b43 Initial load duke parents: diff changeset	98	inputSize = 1;
90ce3da70b43 Initial load duke parents: diff changeset	99	outputSize = 1;
90ce3da70b43 Initial load duke parents: diff changeset	100	} else if (inputChar < 0x800) {
90ce3da70b43 Initial load duke parents: diff changeset	101	outputByte[0] = (byte)(0xc0 \| ((inputChar >> 6) & 0x1f));
90ce3da70b43 Initial load duke parents: diff changeset	102	outputByte[1] = (byte)(0x80 \| (inputChar & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	103	inputSize = 1;
90ce3da70b43 Initial load duke parents: diff changeset	104	outputSize = 2;
90ce3da70b43 Initial load duke parents: diff changeset	105	} else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
90ce3da70b43 Initial load duke parents: diff changeset	106	// this is <high-half zone code> in UTF-16
90ce3da70b43 Initial load duke parents: diff changeset	107	if (charOff + 1 >= inEnd) {
90ce3da70b43 Initial load duke parents: diff changeset	108	highHalfZoneCode = inputChar;
90ce3da70b43 Initial load duke parents: diff changeset	109	break;
90ce3da70b43 Initial load duke parents: diff changeset	110	}
90ce3da70b43 Initial load duke parents: diff changeset	111	// check next char is valid <low-half zone code>
90ce3da70b43 Initial load duke parents: diff changeset	112	char lowChar = input[charOff + 1];
90ce3da70b43 Initial load duke parents: diff changeset	113	if (lowChar < 0xdc00 \|\| lowChar > 0xdfff) {
90ce3da70b43 Initial load duke parents: diff changeset	114	badInputLength = 1;
90ce3da70b43 Initial load duke parents: diff changeset	115	throw new MalformedInputException();
90ce3da70b43 Initial load duke parents: diff changeset	116	}
90ce3da70b43 Initial load duke parents: diff changeset	117	int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)
90ce3da70b43 Initial load duke parents: diff changeset	118	+ 0x10000;
90ce3da70b43 Initial load duke parents: diff changeset	119	outputByte[0] = (byte)(0xf0 \| ((ucs4 >> 18)) & 0x07);
90ce3da70b43 Initial load duke parents: diff changeset	120	outputByte[1] = (byte)(0x80 \| ((ucs4 >> 12) & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	121	outputByte[2] = (byte)(0x80 \| ((ucs4 >> 6) & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	122	outputByte[3] = (byte)(0x80 \| (ucs4 & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	123	outputSize = 4;
90ce3da70b43 Initial load duke parents: diff changeset	124	inputSize = 2;
90ce3da70b43 Initial load duke parents: diff changeset	125	} else {
90ce3da70b43 Initial load duke parents: diff changeset	126	outputByte[0] = (byte)(0xe0 \| ((inputChar >> 12)) & 0x0f);
90ce3da70b43 Initial load duke parents: diff changeset	127	outputByte[1] = (byte)(0x80 \| ((inputChar >> 6) & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	128	outputByte[2] = (byte)(0x80 \| (inputChar & 0x3f));
90ce3da70b43 Initial load duke parents: diff changeset	129	inputSize = 1;
90ce3da70b43 Initial load duke parents: diff changeset	130	outputSize = 3;
90ce3da70b43 Initial load duke parents: diff changeset	131	}
90ce3da70b43 Initial load duke parents: diff changeset	132	if (byteOff + outputSize > outEnd) {
90ce3da70b43 Initial load duke parents: diff changeset	133	throw new ConversionBufferFullException();
90ce3da70b43 Initial load duke parents: diff changeset	134	}
90ce3da70b43 Initial load duke parents: diff changeset	135	for (int i = 0; i < outputSize; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	136	output[byteOff++] = outputByte[i];
90ce3da70b43 Initial load duke parents: diff changeset	137	}
90ce3da70b43 Initial load duke parents: diff changeset	138	charOff += inputSize;
90ce3da70b43 Initial load duke parents: diff changeset	139	}
90ce3da70b43 Initial load duke parents: diff changeset	140	return byteOff - outOff;
90ce3da70b43 Initial load duke parents: diff changeset	141	}
90ce3da70b43 Initial load duke parents: diff changeset	142
90ce3da70b43 Initial load duke parents: diff changeset	143	public boolean canConvert(char ch) {
90ce3da70b43 Initial load duke parents: diff changeset	144	return true;
90ce3da70b43 Initial load duke parents: diff changeset	145	}
90ce3da70b43 Initial load duke parents: diff changeset	146
90ce3da70b43 Initial load duke parents: diff changeset	147	public int getMaxBytesPerChar() {
90ce3da70b43 Initial load duke parents: diff changeset	148	return 3;
90ce3da70b43 Initial load duke parents: diff changeset	149	}
90ce3da70b43 Initial load duke parents: diff changeset	150
90ce3da70b43 Initial load duke parents: diff changeset	151	public void reset() {
90ce3da70b43 Initial load duke parents: diff changeset	152	byteOff = charOff = 0;
90ce3da70b43 Initial load duke parents: diff changeset	153	highHalfZoneCode = 0;
90ce3da70b43 Initial load duke parents: diff changeset	154	}
90ce3da70b43 Initial load duke parents: diff changeset	155
90ce3da70b43 Initial load duke parents: diff changeset	156	public String getCharacterEncoding() {
90ce3da70b43 Initial load duke parents: diff changeset	157	return "UTF8";
90ce3da70b43 Initial load duke parents: diff changeset	158	}
90ce3da70b43 Initial load duke parents: diff changeset	159	}

author	duke
	Sat, 01 Dec 2007 00:00:00 +0000
changeset 2	90ce3da70b43
child 5506	202f599c92aa
permissions	-rw-r--r--