--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/nio/cs/CharsetMapping.java Thu Apr 10 14:45:58 2008 -0700
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.nio.cs;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.*;
+import java.security.*;
+
+public class CharsetMapping {
+ public final static char UNMAPPABLE_DECODING = '\uFFFD';
+ public final static int UNMAPPABLE_ENCODING = -1;
+
+ char[] b2cSB; //singlebyte b->c
+ char[] b2cDB1; //dobulebyte b->c /db1
+ char[] b2cDB2; //dobulebyte b->c /db2
+
+ int b2Min, b2Max; //min/max(start/end) value of 2nd byte
+ int b1MinDB1, b1MaxDB1; //min/Max(start/end) value of 1st byte/db1
+ int b1MinDB2, b1MaxDB2; //min/Max(start/end) value of 1st byte/db2
+ int dbSegSize;
+
+ char[] c2b;
+ char[] c2bIndex;
+
+ // Supplementary
+ char[] b2cSupp;
+ char[] c2bSupp;
+
+ // Composite
+ Entry[] b2cComp;
+ Entry[] c2bComp;
+
+ public char decodeSingle(int b) {
+ return b2cSB[b];
+ }
+
+ public char decodeDouble(int b1, int b2) {
+ if (b2 >= b2Min && b2 < b2Max) {
+ b2 -= b2Min;
+ if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {
+ b1 -= b1MinDB1;
+ return b2cDB1[b1 * dbSegSize + b2];
+ }
+ if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {
+ b1 -= b1MinDB2;
+ return b2cDB2[b1 * dbSegSize + b2];
+ }
+ }
+ return UNMAPPABLE_DECODING;
+ }
+
+ // for jis0213 all supplementary characters are in 0x2xxxx range,
+ // so only the xxxx part is now stored, should actually store the
+ // codepoint value instead.
+ public char[] decodeSurrogate(int db, char[] cc) {
+ int end = b2cSupp.length / 2;
+ int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);
+ if (i >= 0) {
+ Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);
+ return cc;
+ }
+ return null;
+ }
+
+ public char[] decodeComposite(Entry comp, char[] cc) {
+ int i = findBytes(b2cComp, comp);
+ if (i >= 0) {
+ cc[0] = (char)b2cComp[i].cp;
+ cc[1] = (char)b2cComp[i].cp2;
+ return cc;
+ }
+ return null;
+ }
+
+ public int encodeChar(char ch) {
+ int index = c2bIndex[ch >> 8];
+ if (index == 0xffff)
+ return UNMAPPABLE_ENCODING;
+ return c2b[index + (ch & 0xff)];
+ }
+
+ public int encodeSurrogate(char hi, char lo) {
+ char c = (char)Character.toCodePoint(hi, lo);
+ int end = c2bSupp.length / 2;
+ int i = Arrays.binarySearch(c2bSupp, 0, end, c);
+ if (i >= 0)
+ return c2bSupp[end + i];
+ return UNMAPPABLE_ENCODING;
+ }
+
+ public boolean isCompositeBase(Entry comp) {
+ if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {
+ return (findCP(c2bComp, comp) >= 0);
+ }
+ return false;
+ }
+
+ public int encodeComposite(Entry comp) {
+ int i = findComp(c2bComp, comp);
+ if (i >= 0)
+ return c2bComp[i].bs;
+ return UNMAPPABLE_ENCODING;
+ }
+
+ // init the CharsetMapping object from the .dat binary file
+ public static CharsetMapping get(final InputStream is) {
+ return AccessController.doPrivileged(new PrivilegedAction<CharsetMapping>() {
+ public CharsetMapping run() {
+ return new CharsetMapping().load(is);
+ }
+ });
+ }
+
+ public static class Entry {
+ public int bs; //byte sequence reps
+ public int cp; //Unicode codepoint
+ public int cp2; //CC of composite
+ }
+
+ static Comparator<Entry> comparatorBytes =
+ new Comparator<Entry>() {
+ public int compare(Entry m1, Entry m2) {
+ return m1.bs - m2.bs;
+ }
+ public boolean equals(Object obj) {
+ return this == obj;
+ }
+ };
+
+ static Comparator<Entry> comparatorCP =
+ new Comparator<Entry>() {
+ public int compare(Entry m1, Entry m2) {
+ return m1.cp - m2.cp;
+ }
+ public boolean equals(Object obj) {
+ return this == obj;
+ }
+ };
+
+ static Comparator<Entry> comparatorComp =
+ new Comparator<Entry>() {
+ public int compare(Entry m1, Entry m2) {
+ int v = m1.cp - m2.cp;
+ if (v == 0)
+ v = m1.cp2 - m2.cp2;
+ return v;
+ }
+ public boolean equals(Object obj) {
+ return this == obj;
+ }
+ };
+
+ static int findBytes(Entry[] a, Entry k) {
+ return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);
+ }
+
+ static int findCP(Entry[] a, Entry k) {
+ return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);
+ }
+
+ static int findComp(Entry[] a, Entry k) {
+ return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);
+ }
+
+ /*****************************************************************************/
+ // tags of different charset mapping tables
+ private final static int MAP_SINGLEBYTE = 0x1; // 0..256 : c
+ private final static int MAP_DOUBLEBYTE1 = 0x2; // min..max: c
+ private final static int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2]
+ private final static int MAP_SUPPLEMENT = 0x5; // db,c
+ private final static int MAP_SUPPLEMENT_C2B = 0x6; // c,db
+ private final static int MAP_COMPOSITE = 0x7; // db,base,cc
+ private final static int MAP_INDEXC2B = 0x8; // index table of c->bb
+
+ private static final boolean readNBytes(InputStream in, byte[] bb, int N)
+ throws IOException
+ {
+ int off = 0;
+ while (N > 0) {
+ int n = in.read(bb, off, N);
+ if (n == -1)
+ return false;
+ N = N - n;
+ off += n;
+ }
+ return true;
+ }
+
+ int off = 0;
+ byte[] bb;
+ private char[] readCharArray() {
+ // first 2 bytes are the number of "chars" stored in this table
+ int size = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ char [] cc = new char[size];
+ for (int i = 0; i < size; i++) {
+ cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));
+ }
+ return cc;
+ }
+
+ void readSINGLEBYTE() {
+ char[] map = readCharArray();
+ for (int i = 0; i < map.length; i++) {
+ char c = map[i];
+ if (c != UNMAPPABLE_DECODING) {
+ c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;
+ }
+ }
+ b2cSB = map;
+ }
+
+ void readINDEXC2B() {
+ char[] map = readCharArray();
+ for (int i = map.length - 1; i >= 0; i--) {
+ if (c2b == null && map[i] != -1) {
+ c2b = new char[map[i] + 256];
+ Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
+ break;
+ }
+ }
+ c2bIndex = map;
+ }
+
+ char[] readDB(int b1Min, int b2Min, int segSize) {
+ char[] map = readCharArray();
+ for (int i = 0; i < map.length; i++) {
+ char c = map[i];
+ if (c != UNMAPPABLE_DECODING) {
+ int b1 = i / segSize;
+ int b2 = i % segSize;
+ int b = (b1 + b1Min)* 256 + (b2 + b2Min);
+ //System.out.printf(" DB %x\t%x%n", b, c & 0xffff);
+ c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);
+ }
+ }
+ return map;
+ }
+
+ void readDOUBLEBYTE1() {
+ b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ dbSegSize = b2Max - b2Min + 1;
+ b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);
+ }
+
+ void readDOUBLEBYTE2() {
+ b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ dbSegSize = b2Max - b2Min + 1;
+ b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);
+ }
+
+ void readCOMPOSITE() {
+ char[] map = readCharArray();
+ int mLen = map.length/3;
+ b2cComp = new Entry[mLen];
+ c2bComp = new Entry[mLen];
+ for (int i = 0, j= 0; i < mLen; i++) {
+ Entry m = new Entry();
+ m.bs = map[j++];
+ m.cp = map[j++];
+ m.cp2 = map[j++];
+ b2cComp[i] = m;
+ c2bComp[i] = m;
+ }
+ Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);
+ }
+
+ CharsetMapping load(InputStream in) {
+ try {
+ // The first 4 bytes are the size of the total data followed in
+ // this .dat file.
+ int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |
+ ((in.read()&0xff) << 8) | (in.read()&0xff);
+ bb = new byte[len];
+ off = 0;
+ //System.out.printf("In : Total=%d%n", len);
+ // Read in all bytes
+ if (!readNBytes(in, bb, len))
+ throw new RuntimeException("Corrupted data file");
+ in.close();
+
+ while (off < len) {
+ int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
+ switch(type) {
+ case MAP_INDEXC2B:
+ readINDEXC2B();
+ break;
+ case MAP_SINGLEBYTE:
+ readSINGLEBYTE();
+ break;
+ case MAP_DOUBLEBYTE1:
+ readDOUBLEBYTE1();
+ break;
+ case MAP_DOUBLEBYTE2:
+ readDOUBLEBYTE2();
+ break;
+ case MAP_SUPPLEMENT:
+ b2cSupp = readCharArray();
+ break;
+ case MAP_SUPPLEMENT_C2B:
+ c2bSupp = readCharArray();
+ break;
+ case MAP_COMPOSITE:
+ readCOMPOSITE();
+ break;
+ default:
+ throw new RuntimeException("Corrupted data file");
+ }
+ }
+ bb = null;
+ return this;
+ } catch (IOException x) {
+ x.printStackTrace();
+ return null;
+ }
+ }
+}