jdk/src/share/classes/sun/nio/cs/CharsetMapping.java
changeset 400 9f3afee24320
child 688 5ebcfc3cebe4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/nio/cs/CharsetMapping.java	Thu Apr 10 14:45:58 2008 -0700
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.nio.cs;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.*;
+import java.security.*;
+
+public class CharsetMapping {
+    public final static char UNMAPPABLE_DECODING = '\uFFFD';
+    public final static int  UNMAPPABLE_ENCODING = -1;
+
+    char[] b2cSB;                //singlebyte b->c
+    char[] b2cDB1;               //dobulebyte b->c /db1
+    char[] b2cDB2;               //dobulebyte b->c /db2
+
+    int    b2Min, b2Max;         //min/max(start/end) value of 2nd byte
+    int    b1MinDB1, b1MaxDB1;   //min/Max(start/end) value of 1st byte/db1
+    int    b1MinDB2, b1MaxDB2;   //min/Max(start/end) value of 1st byte/db2
+    int    dbSegSize;
+
+    char[] c2b;
+    char[] c2bIndex;
+
+    // Supplementary
+    char[] b2cSupp;
+    char[] c2bSupp;
+
+    // Composite
+    Entry[] b2cComp;
+    Entry[] c2bComp;
+
+    public char decodeSingle(int b) {
+        return b2cSB[b];
+    }
+
+    public char decodeDouble(int b1, int b2) {
+        if (b2 >= b2Min && b2 < b2Max) {
+            b2 -= b2Min;
+            if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {
+                b1 -= b1MinDB1;
+                return b2cDB1[b1 * dbSegSize + b2];
+            }
+            if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {
+                b1 -= b1MinDB2;
+                return b2cDB2[b1 * dbSegSize + b2];
+            }
+        }
+        return UNMAPPABLE_DECODING;
+    }
+
+    // for jis0213 all supplementary characters are in 0x2xxxx range,
+    // so only the xxxx part is now stored, should actually store the
+    // codepoint value instead.
+    public char[] decodeSurrogate(int db, char[] cc) {
+        int end = b2cSupp.length / 2;
+        int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);
+        if (i >= 0) {
+            Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);
+            return cc;
+        }
+        return null;
+    }
+
+    public char[] decodeComposite(Entry comp, char[] cc) {
+        int i = findBytes(b2cComp, comp);
+        if (i >= 0) {
+            cc[0] = (char)b2cComp[i].cp;
+            cc[1] = (char)b2cComp[i].cp2;
+            return cc;
+        }
+        return null;
+    }
+
+    public int encodeChar(char ch) {
+        int index = c2bIndex[ch >> 8];
+        if (index == 0xffff)
+            return UNMAPPABLE_ENCODING;
+        return c2b[index + (ch & 0xff)];
+    }
+
+    public int encodeSurrogate(char hi, char lo) {
+        char c = (char)Character.toCodePoint(hi, lo);
+        int end = c2bSupp.length / 2;
+        int i = Arrays.binarySearch(c2bSupp, 0, end, c);
+        if (i >= 0)
+            return c2bSupp[end + i];
+        return UNMAPPABLE_ENCODING;
+    }
+
+    public boolean isCompositeBase(Entry comp) {
+        if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {
+            return (findCP(c2bComp, comp) >= 0);
+        }
+        return false;
+    }
+
+    public int encodeComposite(Entry comp) {
+        int i = findComp(c2bComp, comp);
+        if (i >= 0)
+            return c2bComp[i].bs;
+        return UNMAPPABLE_ENCODING;
+    }
+
+    // init the CharsetMapping object from the .dat binary file
+    public static CharsetMapping get(final InputStream is) {
+        return AccessController.doPrivileged(new PrivilegedAction<CharsetMapping>() {
+            public CharsetMapping run() {
+                return new CharsetMapping().load(is);
+            }
+        });
+    }
+
+    public static class Entry {
+        public int bs;   //byte sequence reps
+        public int cp;   //Unicode codepoint
+        public int cp2;  //CC of composite
+    }
+
+    static Comparator<Entry> comparatorBytes =
+        new Comparator<Entry>() {
+            public int compare(Entry m1, Entry m2) {
+                return m1.bs - m2.bs;
+            }
+            public boolean equals(Object obj) {
+                return this == obj;
+            }
+    };
+
+    static Comparator<Entry> comparatorCP =
+        new Comparator<Entry>() {
+            public int compare(Entry m1, Entry m2) {
+                return m1.cp - m2.cp;
+            }
+            public boolean equals(Object obj) {
+                return this == obj;
+            }
+    };
+
+    static Comparator<Entry> comparatorComp =
+        new Comparator<Entry>() {
+            public int compare(Entry m1, Entry m2) {
+                 int v = m1.cp - m2.cp;
+                 if (v == 0)
+                   v = m1.cp2 - m2.cp2;
+                 return v;
+            }
+            public boolean equals(Object obj) {
+                return this == obj;
+            }
+    };
+
+    static int findBytes(Entry[] a, Entry k) {
+        return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);
+    }
+
+    static int findCP(Entry[] a, Entry k) {
+        return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);
+    }
+
+    static int findComp(Entry[] a, Entry k) {
+        return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);
+    }
+
+    /*****************************************************************************/
+    // tags of different charset mapping tables
+    private final static int MAP_SINGLEBYTE      = 0x1; // 0..256  : c
+    private final static int MAP_DOUBLEBYTE1     = 0x2; // min..max: c
+    private final static int MAP_DOUBLEBYTE2     = 0x3; // min..max: c [DB2]
+    private final static int MAP_SUPPLEMENT      = 0x5; //           db,c
+    private final static int MAP_SUPPLEMENT_C2B  = 0x6; //           c,db
+    private final static int MAP_COMPOSITE       = 0x7; //           db,base,cc
+    private final static int MAP_INDEXC2B        = 0x8; // index table of c->bb
+
+    private static final boolean readNBytes(InputStream in, byte[] bb, int N)
+        throws IOException
+    {
+        int off = 0;
+        while (N > 0) {
+            int n = in.read(bb, off, N);
+            if (n == -1)
+                return false;
+            N = N - n;
+            off += n;
+        }
+        return true;
+    }
+
+    int off = 0;
+    byte[] bb;
+    private char[] readCharArray() {
+        // first 2 bytes are the number of "chars" stored in this table
+        int size  = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        char [] cc = new char[size];
+        for (int i = 0; i < size; i++) {
+            cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));
+        }
+        return cc;
+    }
+
+    void readSINGLEBYTE() {
+        char[] map = readCharArray();
+        for (int i = 0; i < map.length; i++) {
+            char c = map[i];
+            if (c != UNMAPPABLE_DECODING) {
+                c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;
+            }
+        }
+        b2cSB = map;
+    }
+
+    void readINDEXC2B() {
+        char[] map = readCharArray();
+        for (int i = map.length - 1; i >= 0; i--) {
+            if (c2b == null && map[i] != -1) {
+                c2b = new char[map[i] + 256];
+                Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
+                break;
+            }
+        }
+        c2bIndex = map;
+    }
+
+    char[] readDB(int b1Min, int b2Min, int segSize) {
+        char[] map = readCharArray();
+        for (int i = 0; i < map.length; i++) {
+            char c = map[i];
+            if (c != UNMAPPABLE_DECODING) {
+                int b1 = i / segSize;
+                int b2 = i % segSize;
+                int b = (b1 + b1Min)* 256 + (b2 + b2Min);
+                //System.out.printf("    DB %x\t%x%n", b, c & 0xffff);
+                c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);
+            }
+        }
+        return map;
+    }
+
+    void readDOUBLEBYTE1() {
+        b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        b2Min =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        b2Max =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        dbSegSize = b2Max - b2Min + 1;
+        b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);
+    }
+
+    void readDOUBLEBYTE2() {
+        b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        b2Min =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        b2Max =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+        dbSegSize = b2Max - b2Min + 1;
+        b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);
+    }
+
+    void readCOMPOSITE() {
+        char[] map = readCharArray();
+        int mLen = map.length/3;
+        b2cComp = new Entry[mLen];
+        c2bComp = new Entry[mLen];
+        for (int i = 0, j= 0; i < mLen; i++) {
+            Entry m = new Entry();
+            m.bs = map[j++];
+            m.cp = map[j++];
+            m.cp2 = map[j++];
+            b2cComp[i] = m;
+            c2bComp[i] = m;
+        }
+        Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);
+    }
+
+    CharsetMapping load(InputStream in) {
+        try {
+            // The first 4 bytes are the size of the total data followed in
+            // this .dat file.
+            int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |
+                      ((in.read()&0xff) << 8) | (in.read()&0xff);
+            bb = new byte[len];
+            off = 0;
+            //System.out.printf("In : Total=%d%n", len);
+            // Read in all bytes
+            if (!readNBytes(in, bb, len))
+                throw new RuntimeException("Corrupted data file");
+            in.close();
+
+            while (off < len) {
+                int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+                switch(type) {
+                case MAP_INDEXC2B:
+                    readINDEXC2B();
+                    break;
+                case MAP_SINGLEBYTE:
+                    readSINGLEBYTE();
+                    break;
+                case MAP_DOUBLEBYTE1:
+                    readDOUBLEBYTE1();
+                    break;
+                case MAP_DOUBLEBYTE2:
+                    readDOUBLEBYTE2();
+                    break;
+                case MAP_SUPPLEMENT:
+                    b2cSupp = readCharArray();
+                    break;
+                case MAP_SUPPLEMENT_C2B:
+                    c2bSupp = readCharArray();
+                    break;
+                case MAP_COMPOSITE:
+                    readCOMPOSITE();
+                    break;
+                default:
+                    throw new RuntimeException("Corrupted data file");
+                }
+            }
+            bb = null;
+            return this;
+        } catch (IOException x) {
+            x.printStackTrace();
+            return null;
+        }
+    }
+}