jdk/src/share/classes/sun/io/ByteToCharUTF8.java
changeset 2 90ce3da70b43
child 4818 fd477db6c4ee
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/io/ByteToCharUTF8.java	Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,200 @@
+/*
+ * Copyright 1996-1997 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+package sun.io;
+
+
+/**
+ * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
+ *
+ * see CharToByteUTF8.java about UTF-8 format
+ */
+
+public class ByteToCharUTF8 extends ByteToCharConverter {
+
+    private int savedSize;
+    private byte[] savedBytes;
+
+    public ByteToCharUTF8() {
+        super();
+        savedSize = 0;
+        savedBytes = new byte[5];
+    }
+
+    public int flush(char[] output, int outStart, int outEnd)
+        throws MalformedInputException
+    {
+        if (savedSize != 0) {
+            savedSize = 0;
+            badInputLength = 0;
+            throw new MalformedInputException();
+        }
+        byteOff = charOff = 0;
+        return 0;
+    }
+
+    /**
+     * Character converson
+     */
+    public int convert(byte[] input, int inOff, int inEnd,
+                       char[] output, int outOff, int outEnd)
+        throws MalformedInputException, ConversionBufferFullException
+    {
+        int byte1, byte2, byte3, byte4;
+        char[] outputChar = new char[2];
+        int outputSize;
+        int byteOffAdjustment = 0;
+
+        if (savedSize != 0) {
+            byte[] newBuf;
+            newBuf = new byte[inEnd - inOff + savedSize];
+            for (int i = 0; i < savedSize; i++) {
+                newBuf[i] = savedBytes[i];
+            }
+            System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
+            input = newBuf;
+            inOff = 0;
+            inEnd = newBuf.length;
+            byteOffAdjustment = -savedSize;
+            savedSize = 0;
+        }
+
+        charOff = outOff;
+        byteOff = inOff;
+        int startByteOff;
+
+        while(byteOff < inEnd) {
+
+            startByteOff = byteOff;
+            byte1 = input[byteOff++] & 0xff;
+
+            if ((byte1 & 0x80) == 0){
+                outputChar[0] = (char)byte1;
+                outputSize = 1;
+            } else if ((byte1 & 0xe0) == 0xc0) {
+                if (byteOff >= inEnd) {
+                    savedSize = 1;
+                    savedBytes[0] = (byte)byte1;
+                    break;
+                }
+                byte2 = input[byteOff++] & 0xff;
+                if ((byte2 & 0xc0) != 0x80) {
+                    badInputLength = 2;
+                    byteOff += byteOffAdjustment;
+                    throw new MalformedInputException();
+                }
+                outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
+                outputSize = 1;
+            } else if ((byte1 & 0xf0) == 0xe0){
+                if (byteOff + 1 >= inEnd) {
+                        savedBytes[0] = (byte)byte1;
+                    if (byteOff >= inEnd) {
+                        savedSize = 1;
+                    } else {
+                        savedSize = 2;
+                        savedBytes[1] = (byte)input[byteOff++];
+                    }
+                    break;
+                }
+                byte2 = input[byteOff++] & 0xff;
+                byte3 = input[byteOff++] & 0xff;
+                if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
+                    badInputLength = 3;
+                    byteOff += byteOffAdjustment;
+                    throw new MalformedInputException();
+                }
+                outputChar[0] = (char)(((byte1 & 0x0f) << 12)
+                                       | ((byte2 & 0x3f) << 6)
+                                       | (byte3 & 0x3f));
+                outputSize = 1;
+            } else if ((byte1 & 0xf8) == 0xf0) {
+                if (byteOff + 2 >= inEnd) {
+                    savedBytes[0] = (byte)byte1;
+                    if (byteOff >= inEnd) {
+                        savedSize = 1;
+                    } else if (byteOff + 1 >= inEnd) {
+                        savedSize = 2;
+                        savedBytes[1] = (byte)input[byteOff++];
+                    } else {
+                        savedSize = 3;
+                        savedBytes[1] = (byte)input[byteOff++];
+                        savedBytes[2] = (byte)input[byteOff++];
+                    }
+                    break;
+                }
+                byte2 = input[byteOff++] & 0xff;
+                byte3 = input[byteOff++] & 0xff;
+                byte4 = input[byteOff++] & 0xff;
+                if ((byte2 & 0xc0) != 0x80 ||
+                    (byte3 & 0xc0) != 0x80 ||
+                    (byte4 & 0xc0) != 0x80) {
+                    badInputLength = 4;
+                    byteOff += byteOffAdjustment;
+                    throw new MalformedInputException();
+                }
+                // this byte sequence is UTF16 character
+                int ucs4 = (int)(0x07 & byte1) << 18 |
+                           (int)(0x3f & byte2) << 12 |
+                           (int)(0x3f & byte3) <<  6 |
+                           (int)(0x3f & byte4);
+                outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
+                outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
+                outputSize = 2;
+            } else {
+                badInputLength = 1;
+                byteOff += byteOffAdjustment;
+                throw new MalformedInputException();
+            }
+
+            if (charOff + outputSize > outEnd) {
+                byteOff = startByteOff;
+                byteOff += byteOffAdjustment;
+                throw new ConversionBufferFullException();
+            }
+
+            for (int i = 0; i < outputSize; i++) {
+                output[charOff + i] = outputChar[i];
+            }
+            charOff += outputSize;
+        }
+
+        byteOff += byteOffAdjustment;
+        return charOff - outOff;
+    }
+
+    /*
+     * Return the character set id
+     */
+    public String getCharacterEncoding() {
+        return "UTF8";
+    }
+
+    /*
+     *   Reset after finding bad input
+     */
+    public void reset() {
+        byteOff = charOff = 0;
+        savedSize = 0;
+    }
+}