jdk/src/share/classes/sun/io/CharToByteGB18030.java
changeset 2 90ce3da70b43
child 5506 202f599c92aa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/io/CharToByteGB18030.java	Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,261 @@
+/*
+ * Copyright 2001-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ */
+
+
+package sun.io;
+import sun.nio.cs.ext.GB18030;
+
+public class CharToByteGB18030 extends CharToByteConverter
+{
+
+    private char highHalfZoneCode;
+    boolean flushed = true;
+
+    private final static int GB18030_SINGLE_BYTE = 1;
+    private final static int GB18030_DOUBLE_BYTE = 2;
+    private final static int GB18030_FOUR_BYTE = 3;
+    private static short[] index1;
+    private static String[] index2;
+    private int currentState;
+
+    public CharToByteGB18030() {
+        GB18030 nioCoder = new GB18030();
+        currentState = GB18030_DOUBLE_BYTE;
+        subBytes = new byte[1];
+        subBytes[0] = (byte)'?';
+        index1 = nioCoder.getEncoderIndex1();
+        index2 = nioCoder.getEncoderIndex2();
+    }
+
+    public int flush(byte[] output, int outStart, int outEnd)
+        throws MalformedInputException
+    {
+        if (highHalfZoneCode != 0) {
+            highHalfZoneCode = 0;
+            badInputLength = 0;
+            throw new MalformedInputException();
+        }
+        reset();
+        flushed = true;
+        return 0;
+    }
+
+    public void reset() {
+        byteOff = charOff = 0;
+        currentState = GB18030_DOUBLE_BYTE;
+    }
+
+    public boolean canConvert(char c) {
+        // converts all but unpaired surrogates
+        // and illegal chars, U+FFFE & U+FFFF
+
+        if ((c >= 0xd800 && c <=0xdfff) || (c >= 0xfffe))
+            return false;
+        else
+            return true;
+    }
+
+    /**
+     * Character conversion
+     */
+    public int convert(char[] input, int inOff, int inEnd,
+                       byte[] output, int outOff, int outEnd)
+        throws UnknownCharacterException, MalformedInputException,
+               ConversionBufferFullException
+    {
+        int linearDiffValue = 0;
+        int hiByte = 0 , loByte = 0;  // low and high order bytes
+        char inputChar;  // Input character to be converted
+        charOff = inOff;
+        byteOff = outOff;
+        int inputSize;  // Size of the input
+        int outputSize; // Size of the output
+
+        flushed = false;
+
+        if (highHalfZoneCode != 0) {
+            if (input[inOff] >= 0xDC00 && input[inOff] <= 0xDFFF) {
+
+                // This is legal UTF16 sequence, so shunt in the high
+                // surrogate for conversion by convert() loop.
+
+                char[] newBuf = new char[inEnd - inOff + 1];
+                newBuf[0] = highHalfZoneCode;
+                System.arraycopy(input, inOff, newBuf, 1, inEnd - inOff);
+                charOff -= 1;
+                input = newBuf;
+                inOff = 0;
+                inEnd = newBuf.length;
+                highHalfZoneCode = 0;
+            } else {
+                // This is illegal UTF16 sequence.
+                badInputLength = 0;
+                throw new MalformedInputException();
+            }
+        }
+
+        // Main encode loop
+
+        while (charOff < inEnd) {
+            inputChar = input[charOff++];
+
+            if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
+                // Is this the last character of the input?
+                if (charOff + 1 > inEnd) {
+                    highHalfZoneCode = inputChar;
+                    break;
+                }
+
+                char previousChar = inputChar;
+                inputChar = input[charOff];
+
+                // Is there a low surrogate following?
+                if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
+                    inputSize = 2;
+                    charOff++;
+                    linearDiffValue = ( previousChar - 0xD800) * 0x400 +
+                                ( inputChar - 0xDC00) + 0x2E248;
+
+                    currentState = GB18030_FOUR_BYTE;
+                } else {
+                    // We have a malformed surrogate pair
+                    badInputLength = 1;
+                    throw new MalformedInputException();
+                }
+            }
+            // Is this an unaccompanied low surrogate?
+            else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
+                badInputLength = 1;
+                throw new MalformedInputException();
+            }
+
+            // Not part of a surrogate
+            else if (inputChar >= 0x0000 && inputChar <= 0x007F) {
+                if (byteOff >= outEnd) {
+                   throw new ConversionBufferFullException();
+                }
+                currentState = GB18030_SINGLE_BYTE;
+                output[byteOff++] = (byte) inputChar;
+            }
+            else if (inputChar <= 0xA4C6 || inputChar >= 0xE000) {
+                int outByteVal = getGB18030(index1, index2, inputChar);
+
+                if (outByteVal == 0xFFFD ) {
+                    if (subMode) {
+                        if (byteOff >= outEnd) {
+                           throw new ConversionBufferFullException();
+                        } else {
+                            output[byteOff++] = subBytes[0];
+                            continue;
+                        }
+                    } else {
+                        badInputLength = 1;
+                        throw new UnknownCharacterException();
+                    }
+                }
+
+                hiByte = (outByteVal & 0xFF00) >> 8;
+                loByte = (outByteVal & 0xFF);
+
+                linearDiffValue = (hiByte - 0x20) * 256 + loByte;
+
+                if (inputChar >= 0xE000 && inputChar < 0xF900)
+                        linearDiffValue += 0x82BD;
+                else if (inputChar >= 0xF900)
+                        linearDiffValue += 0x93A9;
+
+                if (hiByte > 0x80)
+                     currentState = GB18030_DOUBLE_BYTE;
+                else
+                     currentState = GB18030_FOUR_BYTE;
+            }
+            else if (inputChar >= 0xA4C7 && inputChar <= 0xD7FF) {
+                linearDiffValue = inputChar - 0x5543;
+                currentState = GB18030_FOUR_BYTE;
+            }
+            else {
+                badInputLength = 1;
+                throw new MalformedInputException();
+            }
+
+            if (currentState == GB18030_SINGLE_BYTE)
+                continue;
+
+            if (currentState == GB18030_DOUBLE_BYTE) {
+                if (byteOff + 2 > outEnd) {
+                    throw new ConversionBufferFullException();
+                }
+                output[byteOff++] = (byte)hiByte;
+                output[byteOff++] = (byte)loByte;
+            }
+            else { // Four Byte encoding
+                if (byteOff + 4 > outEnd) {
+                    throw new ConversionBufferFullException();
+                }
+
+                byte b1, b2, b3, b4;
+
+                b4 = (byte)((linearDiffValue % 10) + 0x30);
+                linearDiffValue /= 10;
+                b3 = (byte)((linearDiffValue % 126) + 0x81);
+                linearDiffValue /= 126;
+                b2 = (byte)((linearDiffValue % 10) + 0x30);
+                b1 = (byte)((linearDiffValue / 10) + 0x81);
+                output[byteOff++] = b1;
+                output[byteOff++] = b2;
+                output[byteOff++] = b3;
+                output[byteOff++] = b4;
+            }
+        }
+        // Return number of bytes written to the output buffer.
+        return byteOff - outOff;
+    }
+
+
+    /**
+     * returns the maximum number of bytes needed to convert a char
+     */
+    public int getMaxBytesPerChar() {
+        return 4;
+    }
+
+
+    /**
+     * Return the character set ID
+     */
+    public String getCharacterEncoding() {
+        return "GB18030";
+    }
+
+    private int getGB18030(short[] outerIndex, String[] innerIndex, char ch) {
+        int offset = outerIndex[((ch & 0xff00) >> 8 )] << 8;
+
+        return innerIndex[offset >> 12].charAt((offset & 0xfff) + (ch & 0xff));
+    }
+
+}