8073893: Enable charsets build system to configure euc_tw into java.base module/sun.nio.cs
Summary: to enable charsets build system to configure euc_tw into java.base module/sun.nio.cs
Reviewed-by: alanb, mchung
--- a/jdk/make/data/charsetmapping/charsets Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/data/charsetmapping/charsets Wed Feb 25 13:04:31 2015 -0800
@@ -503,7 +503,7 @@
charset x-EUC-TW EUC_TW
package sun.nio.cs.ext
- type source
+ type template
alias euc_tw # JDK historical
alias euctw
alias cns11643
@@ -1816,3 +1816,17 @@
ascii false
minmax 0x21 0x7e 0x21 0x7e
internal true # "internal implementation
+
+########################################################
+#
+# platform specific charsets, to be registered into spi
+##
+########################################################
+
+charset x-COMPOUND_TEXT COMPOUND_TEXT
+ package sun.nio.cs.ext
+ type source
+ os unix
+ alias COMPOUND_TEXT # JDK historical
+ alias x11-compound_text
+ alias x-compound-text
--- a/jdk/make/data/charsetmapping/stdcs-solaris Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/data/charsetmapping/stdcs-solaris Wed Feb 25 13:04:31 2015 -0800
@@ -9,6 +9,7 @@
EUC_JP
EUC_JP_LINUX
EUC_JP_Open
+EUC_TW
GBK
ISO_8859_11
ISO_8859_3
--- a/jdk/make/gensrc/Gensrc-jdk.charsets.gmk Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/gensrc/Gensrc-jdk.charsets.gmk Wed Feb 25 13:04:31 2015 -0800
@@ -50,7 +50,9 @@
$(TOOL_CHARSETMAPPING) $(CHARSET_DATA_DIR) $(CHARSET_GENSRC_JAVA_DIR_CS) \
extcs charsets $(CHARSET_STANDARD_OS) \
$(CHARSET_EXTENDED_JAVA_TEMPLATES) \
- $(CHARSET_EXTENDED_JAVA_DIR) $(LOG_INFO)
+ $(CHARSET_EXTENDED_JAVA_DIR) \
+ $(CHARSET_COPYRIGHT_HEADER) \
+ $(LOG_INFO)
$(TOUCH) '$@'
$(CHARSET_DONE_CS)-hkscs: $(CHARSET_COPYRIGHT_HEADER)/HKSCS.java \
--- a/jdk/make/gensrc/GensrcCharsetMapping.gmk Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/gensrc/GensrcCharsetMapping.gmk Wed Feb 25 13:04:31 2015 -0800
@@ -32,6 +32,7 @@
CHARSET_EXTSRC_DIR := $(JDK_TOPDIR)/src/jdk.charsets/share/classes/sun/nio/cs/ext
CHARSET_GENSRC_JAVA_DIR_BASE := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/sun/nio/cs
CHARSET_DONE_BASE := $(CHARSET_GENSRC_JAVA_DIR_BASE)/_the.charsetmapping
+CHARSET_COPYRIGHT_HEADER := $(JDK_TOPDIR)/make/src/classes/build/tools/charsetmapping
CHARSET_TEMPLATES := \
$(CHARSET_DATA_DIR)/SingleByte-X.java.template \
$(CHARSET_DATA_DIR)/DoubleByte-X.java.template
@@ -46,7 +47,9 @@
$(MKDIR) -p $(@D)
$(TOOL_CHARSETMAPPING) $(CHARSET_DATA_DIR) $(CHARSET_GENSRC_JAVA_DIR_BASE) \
stdcs charsets $(CHARSET_STANDARD_OS) \
- $(CHARSET_STANDARD_JAVA_TEMPLATES) $(CHARSET_EXTSRC_DIR) $(LOG_INFO)
+ $(CHARSET_STANDARD_JAVA_TEMPLATES) $(CHARSET_EXTSRC_DIR) \
+ $(CHARSET_COPYRIGHT_HEADER) \
+ $(LOG_INFO)
$(TOUCH) '$@'
GENSRC_JAVA_BASE += $(CHARSET_DONE_BASE)-stdcs
--- a/jdk/make/src/classes/build/tools/charsetmapping/Charset.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/src/classes/build/tools/charsetmapping/Charset.java Wed Feb 25 13:04:31 2015 -0800
@@ -31,6 +31,7 @@
String csName;
String hisName;
String type;
+ String os;
boolean isASCII;
int b1Min;
int b1Max;
--- a/jdk/make/src/classes/build/tools/charsetmapping/EUC_TW.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/src/classes/build/tools/charsetmapping/EUC_TW.java Wed Feb 25 13:04:31 2015 -0800
@@ -80,12 +80,12 @@
private static Pattern euctw = Pattern.compile("(?:8ea)?(\\p{XDigit}++)\\s++(\\p{XDigit}++)?\\s*+.*");
- static void genClass(String args[]) throws Exception
+ static void genClass(String pkg, String args[]) throws Exception
{
InputStream is = new FileInputStream(new File(args[0], "euc_tw.map"));
PrintStream ps = new PrintStream(new File(args[1], "EUC_TWMapping.java"),
"ISO-8859-1");
- String copyright = getCopyright(new File(args[3]));
+ String copyright = getCopyright(new File(args[7], "EUC_TW.java"));
// ranges of byte1 and byte2, something should come from a "config" file
@@ -128,7 +128,7 @@
out.format(copyright);
out.format("%n// -- This file was mechanically generated: Do not edit! -- //%n");
- out.format("package sun.nio.cs.ext;%n%n");
+ out.format("package %s;%n%n", pkg);
out.format("class EUC_TWMapping {%n%n");
// boundaries
--- a/jdk/make/src/classes/build/tools/charsetmapping/HKSCS.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/src/classes/build/tools/charsetmapping/HKSCS.java Wed Feb 25 13:04:31 2015 -0800
@@ -42,7 +42,7 @@
private static Pattern hkscs =
Pattern.compile("(?:0x)?+(\\p{XDigit}++)\\s++(?:0x|U\\+)?+(\\p{XDigit}++)?\\s*+(?:0x|U\\+)?(\\p{XDigit}++)?\\s*+.*");
- static void genClass2008(String srcDir, String dstDir, String pkgName)
+ static void genClass2008(String srcDir, String dstDir, String pkgName, File copyright)
throws Exception
{
// hkscs2008
@@ -53,10 +53,11 @@
pkgName,
"HKSCSMapping",
true,
- "");
+ getCopyright(copyright));
+
}
- static void genClassXP(String srcDir, String dstDir, String pkgName)
+ static void genClassXP(String srcDir, String dstDir, String pkgName, File copyright)
throws Exception
{
genClass0(new FileInputStream(new File(srcDir, "HKSCS_XP.map")),
@@ -66,7 +67,7 @@
pkgName,
"HKSCS_XPMapping",
false,
- "");
+ getCopyright(copyright));
}
static void genClass2001(String args[]) throws Exception {
--- a/jdk/make/src/classes/build/tools/charsetmapping/Main.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/src/classes/build/tools/charsetmapping/Main.java Wed Feb 25 13:04:31 2015 -0800
@@ -41,6 +41,7 @@
int OS = 4;
int TEMPLATE = 5;
int EXT_SRC = 6;
+ int COPYRIGHT_SRC = 7;
if (args.length < 3 ) {
System.out.println("Usage: java -jar charsetmapping.jar src dst spiType charsets os [template]");
@@ -54,6 +55,7 @@
String[] osStdcs = getOSStdCSList(new File(args[SRC_DIR], args[OS]));
boolean hasBig5_HKSCS = false;
boolean hasMS950_HKSCS_XP = false;
+ boolean hasEUC_TW = false;
for (String name : osStdcs) {
Charset cs = charsets.get(name);
if (cs != null) {
@@ -63,6 +65,8 @@
hasBig5_HKSCS = true;
} else if (name.equals("MS950_HKSCS_XP")) {
hasMS950_HKSCS_XP = true;
+ } else if (name.equals("EUC_TW")) {
+ hasEUC_TW = true;
}
}
for (Charset cs : charsets.values()) {
@@ -89,19 +93,28 @@
}
}
// provider StandardCharsets.java / ExtendedCharsets.java
- SPI.genClass(args[TYPE], charsets, args[SRC_DIR], args[DST_DIR], args[TEMPLATE]);
+ SPI.genClass(args[TYPE], charsets,
+ args[SRC_DIR], args[DST_DIR],
+ args[TEMPLATE],
+ args[OS].endsWith("windows") ? "windows" : "unix");
// HKSCSMapping2008/XP.java goes together with Big5/MS950XP_HKSCS
if (isStandard && hasBig5_HKSCS || isExtended && !hasBig5_HKSCS) {
HKSCS.genClass2008(args[SRC_DIR], args[DST_DIR],
- isStandard ? "sun.nio.cs" : "sun.nio.cs.ext");
+ isStandard ? "sun.nio.cs" : "sun.nio.cs.ext",
+ new File(args[COPYRIGHT_SRC], "HKSCS.java"));
}
if (isStandard && hasMS950_HKSCS_XP || isExtended && !hasMS950_HKSCS_XP) {
HKSCS.genClassXP(args[SRC_DIR], args[DST_DIR],
- isStandard ? "sun.nio.cs" : "sun.nio.cs.ext");
+ isStandard ? "sun.nio.cs" : "sun.nio.cs.ext",
+ new File(args[COPYRIGHT_SRC], "HKSCS.java"));
}
- } else if ("euctw".equals(args[TYPE])) {
- EUC_TW.genClass(args);
+ if (isStandard && hasEUC_TW) {
+ EUC_TW.genClass("sun.nio.cs", args);
+ }
+ if (!isStandard && !hasEUC_TW) {
+ EUC_TW.genClass("sun.nio.cs.ext", args);
+ }
} else if ("sjis0213".equals(args[TYPE])) {
JIS0213.genClass(args);
} else if ("hkscs".equals(args[TYPE])) {
@@ -157,6 +170,9 @@
case "type":
cs.type = tokens[2];
break;
+ case "os":
+ cs.os = tokens[2];
+ break;
case "hisname":
cs.hisName = tokens[2];
break;
--- a/jdk/make/src/classes/build/tools/charsetmapping/SPI.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/make/src/classes/build/tools/charsetmapping/SPI.java Wed Feb 25 13:04:31 2015 -0800
@@ -33,8 +33,10 @@
public class SPI {
- public static void genClass(String type, LinkedHashMap<String, Charset> charsets,
- String srcDir, String dstDir, String template)
+ public static void genClass(String type,
+ LinkedHashMap<String, Charset> charsets,
+ String srcDir, String dstDir, String template,
+ String os)
throws Exception
{
try (Scanner s = new Scanner(new File(template));
@@ -50,7 +52,8 @@
charsets.values()
.stream()
.filter(cs -> cs.pkgName.equals("sun.nio.cs.ext") &&
- !cs.isInternal)
+ !cs.isInternal &&
+ (cs.os == null || cs.os.equals(os)))
.forEach( cs -> {
out.printf(" charset(\"%s\", \"%s\",%n", cs.csName, cs.clzName);
out.printf(" new String[] {%n");
--- a/jdk/src/java.desktop/unix/classes/sun/awt/motif/X11CNS11643.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/src/java.desktop/unix/classes/sun/awt/motif/X11CNS11643.java Wed Feb 25 13:04:31 2015 -0800
@@ -28,7 +28,8 @@
import java.nio.CharBuffer;
import java.nio.ByteBuffer;
import java.nio.charset.*;
-import sun.nio.cs.ext.EUC_TW;
+import sun.nio.cs.*;
+import sun.nio.cs.ext.*;
public abstract class X11CNS11643 extends Charset {
private final int plane;
--- a/jdk/src/jdk.charsets/share/classes/sun/nio/cs/ext/EUC_TW.java Wed Feb 25 19:36:29 2015 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,546 +0,0 @@
-/*
- * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package sun.nio.cs.ext;
-
-import java.io.*;
-import java.nio.CharBuffer;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
-import java.util.Arrays;
-import sun.nio.cs.HistoricallyNamedCharset;
-import static sun.nio.cs.CharsetMapping.*;
-
-public class EUC_TW extends Charset implements HistoricallyNamedCharset
-{
- private static final int SS2 = 0x8E;
-
- /*
- (1) EUC_TW
- Second byte of EUC_TW for cs2 is in range of
- 0xA1-0xB0 for plane 1-16. According to CJKV /163,
- plane1 is coded in both cs1 and cs2. This impl
- however does not decode the codepoints of plane1
- in cs2, so only p2-p7 and p15 are supported in cs2.
-
- Plane2 0xA2;
- Plane3 0xA3;
- Plane4 0xA4;
- Plane5 0xA5;
- Plane6 0xA6;
- Plane7 0xA7;
- Plane15 0xAF;
-
- (2) Mapping
- The fact that all supplementary characters encoded in EUC_TW are
- in 0x2xxxx range gives us the room to optimize the data tables.
-
- Decoding:
- (1) save the lower 16-bit value of all codepoints of b->c mapping
- in a String array table String[plane] b2c.
- (2) save "codepoint is supplementary" info (one bit) in a
- byte[] b2cIsSupp, so 8 codepoints (same codepoint value, different
- plane No) share one byte.
-
- Encoding:
- (1)c->b mappings are stored in
- char[]c2b/char[]c2bIndex
- char[]c2bSupp/char[]c2bIndexsupp (indexed by lower 16-bit
- (2)byte[] c2bPlane stores the "plane info" of each euc-tw codepoints,
- BMP and Supp share the low/high 4 bits of one byte.
-
- Mapping tables are stored separated in EUC_TWMapping, which
- is generated by tool.
- */
-
- public EUC_TW() {
- super("x-EUC-TW", ExtendedCharsets.aliasesFor("x-EUC-TW"));
- }
-
- public String historicalName() {
- return "EUC_TW";
- }
-
- public boolean contains(Charset cs) {
- return ((cs.name().equals("US-ASCII"))
- || (cs instanceof EUC_TW));
- }
-
- public CharsetDecoder newDecoder() {
- return new Decoder(this);
- }
-
- public CharsetEncoder newEncoder() {
- return new Encoder(this);
- }
-
- public static class Decoder extends CharsetDecoder {
- public Decoder(Charset cs) {
- super(cs, 2.0f, 2.0f);
- }
-
- char[] c1 = new char[1];
- char[] c2 = new char[2];
- public char[] toUnicode(int b1, int b2, int p) {
- return decode(b1, b2, p, c1, c2);
- }
-
- static final String[] b2c = EUC_TWMapping.b2c;
- static final int b1Min = EUC_TWMapping.b1Min;
- static final int b1Max = EUC_TWMapping.b1Max;
- static final int b2Min = EUC_TWMapping.b2Min;
- static final int b2Max = EUC_TWMapping.b2Max;
- static final int dbSegSize = b2Max - b2Min + 1;
- static final byte[] b2cIsSupp;
-
- // adjust from cns planeNo to the plane index of b2c
- static final byte[] cnspToIndex = new byte[0x100];
- static {
- Arrays.fill(cnspToIndex, (byte)-1);
- cnspToIndex[0xa2] = 1; cnspToIndex[0xa3] = 2; cnspToIndex[0xa4] = 3;
- cnspToIndex[0xa5] = 4; cnspToIndex[0xa6] = 5; cnspToIndex[0xa7] = 6;
- cnspToIndex[0xaf] = 7;
- }
-
- //static final BitSet b2cIsSupp;
- static {
- String b2cIsSuppStr = EUC_TWMapping.b2cIsSuppStr;
- // work on a local copy is much faster than operate
- // directly on b2cIsSupp
- byte[] flag = new byte[b2cIsSuppStr.length() << 1];
- int off = 0;
- for (int i = 0; i < b2cIsSuppStr.length(); i++) {
- char c = b2cIsSuppStr.charAt(i);
- flag[off++] = (byte)(c >> 8);
- flag[off++] = (byte)(c & 0xff);
- }
- b2cIsSupp = flag;
- }
-
- static boolean isLegalDB(int b) {
- return b >= b1Min && b <= b1Max;
- }
-
- static char[] decode(int b1, int b2, int p, char[] c1, char[] c2)
- {
- if (b1 < b1Min || b1 > b1Max || b2 < b2Min || b2 > b2Max)
- return null;
- int index = (b1 - b1Min) * dbSegSize + b2 - b2Min;
- char c = b2c[p].charAt(index);
- if (c == UNMAPPABLE_DECODING)
- return null;
- if ((b2cIsSupp[index] & (1 << p)) == 0) {
- c1[0] = c;
- return c1;
- } else {
- c2[0] = Character.highSurrogate(0x20000 + c);
- c2[1] = Character.lowSurrogate(0x20000 + c);
- return c2;
- }
- }
-
- private CoderResult decodeArrayLoop(ByteBuffer src,
- CharBuffer dst)
- {
- byte[] sa = src.array();
- int sp = src.arrayOffset() + src.position();
- int sl = src.arrayOffset() + src.limit();
-
- char[] da = dst.array();
- int dp = dst.arrayOffset() + dst.position();
- int dl = dst.arrayOffset() + dst.limit();
- try {
- while (sp < sl) {
- int byte1 = sa[sp] & 0xff;
- if (byte1 == SS2) { // Codeset 2 G2
- if ( sl - sp < 4)
- return CoderResult.UNDERFLOW;
- int cnsPlane = cnspToIndex[sa[sp + 1] & 0xff];
- if (cnsPlane < 0)
- return CoderResult.malformedForLength(2);
- byte1 = sa[sp + 2] & 0xff;
- int byte2 = sa[sp + 3] & 0xff;
- char[] cc = toUnicode(byte1, byte2, cnsPlane);
- if (cc == null) {
- if (!isLegalDB(byte1) || !isLegalDB(byte2))
- return CoderResult.malformedForLength(4);
- return CoderResult.unmappableForLength(4);
- }
- if (dl - dp < cc.length)
- return CoderResult.OVERFLOW;
- if (cc.length == 1) {
- da[dp++] = cc[0];
- } else {
- da[dp++] = cc[0];
- da[dp++] = cc[1];
- }
- sp += 4;
- } else if (byte1 < 0x80) { // ASCII G0
- if (dl - dp < 1)
- return CoderResult.OVERFLOW;
- da[dp++] = (char) byte1;
- sp++;
- } else { // Codeset 1 G1
- if ( sl - sp < 2)
- return CoderResult.UNDERFLOW;
- int byte2 = sa[sp + 1] & 0xff;
- char[] cc = toUnicode(byte1, byte2, 0);
- if (cc == null) {
- if (!isLegalDB(byte1) || !isLegalDB(byte2))
- return CoderResult.malformedForLength(1);
- return CoderResult.unmappableForLength(2);
- }
- if (dl - dp < 1)
- return CoderResult.OVERFLOW;
- da[dp++] = cc[0];
- sp += 2;
- }
- }
- return CoderResult.UNDERFLOW;
- } finally {
- src.position(sp - src.arrayOffset());
- dst.position(dp - dst.arrayOffset());
- }
- }
-
- private CoderResult decodeBufferLoop(ByteBuffer src,
- CharBuffer dst)
- {
- int mark = src.position();
- try {
- while (src.hasRemaining()) {
- int byte1 = src.get() & 0xff;
- if (byte1 == SS2) { // Codeset 2 G2
- if ( src.remaining() < 3)
- return CoderResult.UNDERFLOW;
- int cnsPlane = cnspToIndex[src.get() & 0xff];
- if (cnsPlane < 0)
- return CoderResult.malformedForLength(2);
- byte1 = src.get() & 0xff;
- int byte2 = src.get() & 0xff;
- char[] cc = toUnicode(byte1, byte2, cnsPlane);
- if (cc == null) {
- if (!isLegalDB(byte1) || !isLegalDB(byte2))
- return CoderResult.malformedForLength(4);
- return CoderResult.unmappableForLength(4);
- }
- if (dst.remaining() < cc.length)
- return CoderResult.OVERFLOW;
- if (cc.length == 1) {
- dst.put(cc[0]);
- } else {
- dst.put(cc[0]);
- dst.put(cc[1]);
- }
- mark += 4;
- } else if (byte1 < 0x80) { // ASCII G0
- if (!dst.hasRemaining())
- return CoderResult.OVERFLOW;
- dst.put((char) byte1);
- mark++;
- } else { // Codeset 1 G1
- if (!src.hasRemaining())
- return CoderResult.UNDERFLOW;
- int byte2 = src.get() & 0xff;
- char[] cc = toUnicode(byte1, byte2, 0);
- if (cc == null) {
- if (!isLegalDB(byte1) || !isLegalDB(byte2))
- return CoderResult.malformedForLength(1);
- return CoderResult.unmappableForLength(2);
- }
- if (!dst.hasRemaining())
- return CoderResult.OVERFLOW;
- dst.put(cc[0]);
- mark +=2;
- }
- }
- return CoderResult.UNDERFLOW;
- } finally {
- src.position(mark);
- }
- }
-
- protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst)
- {
- if (src.hasArray() && dst.hasArray())
- return decodeArrayLoop(src, dst);
- else
- return decodeBufferLoop(src, dst);
- }
- }
-
- public static class Encoder extends CharsetEncoder {
- private byte[] bb = new byte[4];
-
- public Encoder(Charset cs) {
- super(cs, 4.0f, 4.0f);
- }
-
- public boolean canEncode(char c) {
- return (c <= '\u007f' || toEUC(c, bb) != -1);
- }
-
- public boolean canEncode(CharSequence cs) {
- int i = 0;
- while (i < cs.length()) {
- char c = cs.charAt(i++);
- if (Character.isHighSurrogate(c)) {
- if (i == cs.length())
- return false;
- char low = cs.charAt(i++);
- if (!Character.isLowSurrogate(low) || toEUC(c, low, bb) == -1)
- return false;
- } else if (!canEncode(c)) {
- return false;
- }
- }
- return true;
- }
-
- public int toEUC(char hi, char low, byte[] bb) {
- return encode(hi, low, bb);
- }
-
- public int toEUC(char c, byte[] bb) {
- return encode(c, bb);
- }
-
- private CoderResult encodeArrayLoop(CharBuffer src,
- ByteBuffer dst)
- {
- char[] sa = src.array();
- int sp = src.arrayOffset() + src.position();
- int sl = src.arrayOffset() + src.limit();
-
- byte[] da = dst.array();
- int dp = dst.arrayOffset() + dst.position();
- int dl = dst.arrayOffset() + dst.limit();
-
- int inSize;
- int outSize;
-
- try {
- while (sp < sl) {
- char c = sa[sp];
- inSize = 1;
- if (c < 0x80) { // ASCII
- bb[0] = (byte)c;
- outSize = 1;
- } else {
- outSize = toEUC(c, bb);
- if (outSize == -1) {
- // to check surrogates only after BMP failed
- // has the benefit of improving the BMP encoding
- // 10% faster, with the price of the slowdown of
- // supplementary character encoding. given the use
- // of supplementary characters is really rare, this
- // is something worth doing.
- if (Character.isHighSurrogate(c)) {
- if ((sp + 1) == sl)
- return CoderResult.UNDERFLOW;
- if (!Character.isLowSurrogate(sa[sp + 1]))
- return CoderResult.malformedForLength(1);
- outSize = toEUC(c, sa[sp+1], bb);
- inSize = 2;
- } else if (Character.isLowSurrogate(c)) {
- return CoderResult.malformedForLength(1);
- }
- }
- }
- if (outSize == -1)
- return CoderResult.unmappableForLength(inSize);
- if ( dl - dp < outSize)
- return CoderResult.OVERFLOW;
- for (int i = 0; i < outSize; i++)
- da[dp++] = bb[i];
- sp += inSize;
- }
- return CoderResult.UNDERFLOW;
- } finally {
- src.position(sp - src.arrayOffset());
- dst.position(dp - dst.arrayOffset());
- }
- }
-
- private CoderResult encodeBufferLoop(CharBuffer src,
- ByteBuffer dst)
- {
- int outSize;
- int inSize;
- int mark = src.position();
-
- try {
- while (src.hasRemaining()) {
- inSize = 1;
- char c = src.get();
- if (c < 0x80) { // ASCII
- outSize = 1;
- bb[0] = (byte)c;
- } else {
- outSize = toEUC(c, bb);
- if (outSize == -1) {
- if (Character.isHighSurrogate(c)) {
- if (!src.hasRemaining())
- return CoderResult.UNDERFLOW;
- char c2 = src.get();
- if (!Character.isLowSurrogate(c2))
- return CoderResult.malformedForLength(1);
- outSize = toEUC(c, c2, bb);
- inSize = 2;
- } else if (Character.isLowSurrogate(c)) {
- return CoderResult.malformedForLength(1);
- }
- }
- }
- if (outSize == -1)
- return CoderResult.unmappableForLength(inSize);
- if (dst.remaining() < outSize)
- return CoderResult.OVERFLOW;
- for (int i = 0; i < outSize; i++)
- dst.put(bb[i]);
- mark += inSize;
- }
- return CoderResult.UNDERFLOW;
- } finally {
- src.position(mark);
- }
- }
-
- protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst)
- {
- if (src.hasArray() && dst.hasArray())
- return encodeArrayLoop(src, dst);
- else
- return encodeBufferLoop(src, dst);
- }
-
- static int encode(char hi, char low, byte[] bb) {
- int c = Character.toCodePoint(hi, low);
- if ((c & 0xf0000) != 0x20000)
- return -1;
- c -= 0x20000;
- int index = c2bSuppIndex[c >> 8];
- if (index == UNMAPPABLE_ENCODING)
- return -1;
- index = index + (c & 0xff);
- int db = c2bSupp[index];
- if (db == UNMAPPABLE_ENCODING)
- return -1;
- int p = (c2bPlane[index] >> 4) & 0xf;
- bb[0] = (byte)SS2;
- bb[1] = (byte)(0xa0 | p);
- bb[2] = (byte)(db >> 8);
- bb[3] = (byte)db;
- return 4;
- }
-
- static int encode(char c, byte[] bb) {
- int index = c2bIndex[c >> 8];
- if (index == UNMAPPABLE_ENCODING)
- return -1;
- index = index + (c & 0xff);
- int db = c2b[index];
- if (db == UNMAPPABLE_ENCODING)
- return -1;
- int p = c2bPlane[index] & 0xf;
- if (p == 0) {
- bb[0] = (byte)(db >> 8);
- bb[1] = (byte)db;
- return 2;
- } else {
- bb[0] = (byte)SS2;
- bb[1] = (byte)(0xa0 | p);
- bb[2] = (byte)(db >> 8);
- bb[3] = (byte)db;
- return 4;
- }
- }
-
- static final char[] c2b;
- static final char[] c2bIndex;
- static final char[] c2bSupp;
- static final char[] c2bSuppIndex;
- static final byte[] c2bPlane;
- static {
- int b1Min = Decoder.b1Min;
- int b1Max = Decoder.b1Max;
- int b2Min = Decoder.b2Min;
- int b2Max = Decoder.b2Max;
- int dbSegSize = Decoder.dbSegSize;
- String[] b2c = Decoder.b2c;
- byte[] b2cIsSupp = Decoder.b2cIsSupp;
-
- c2bIndex = EUC_TWMapping.c2bIndex;
- c2bSuppIndex = EUC_TWMapping.c2bSuppIndex;
- char[] c2b0 = new char[EUC_TWMapping.C2BSIZE];
- char[] c2bSupp0 = new char[EUC_TWMapping.C2BSUPPSIZE];
- byte[] c2bPlane0 = new byte[Math.max(EUC_TWMapping.C2BSIZE,
- EUC_TWMapping.C2BSUPPSIZE)];
-
- Arrays.fill(c2b0, (char)UNMAPPABLE_ENCODING);
- Arrays.fill(c2bSupp0, (char)UNMAPPABLE_ENCODING);
-
- for (int p = 0; p < b2c.length; p++) {
- String db = b2c[p];
- /*
- adjust the "plane" from 0..7 to 0, 2, 3, 4, 5, 6, 7, 0xf,
- which helps balance between footprint (to save the plane
- info in 4 bits) and runtime performance (to require only
- one operation "0xa0 | plane" to encode the plane byte)
- */
- int plane = p;
- if (plane == 7)
- plane = 0xf;
- else if (plane != 0)
- plane = p + 1;
-
- int off = 0;
- for (int b1 = b1Min; b1 <= b1Max; b1++) {
- for (int b2 = b2Min; b2 <= b2Max; b2++) {
- char c = db.charAt(off);
- if (c != UNMAPPABLE_DECODING) {
- if ((b2cIsSupp[off] & (1 << p)) != 0) {
- int index = c2bSuppIndex[c >> 8] + (c&0xff);
- c2bSupp0[index] = (char)((b1 << 8) + b2);
- c2bPlane0[index] |= (byte)(plane << 4);
- } else {
- int index = c2bIndex[c >> 8] + (c&0xff);
- c2b0[index] = (char)((b1 << 8) + b2);
- c2bPlane0[index] |= (byte)plane;
- }
- }
- off++;
- }
- }
- }
- c2b = c2b0;
- c2bSupp = c2bSupp0;
- c2bPlane = c2bPlane0;
- }
- }
-}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/jdk.charsets/share/classes/sun/nio/cs/ext/EUC_TW.java.template Wed Feb 25 13:04:31 2015 -0800
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package $PACKAGE$;
+
+import java.io.*;
+import java.nio.CharBuffer;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.util.Arrays;
+import sun.nio.cs.HistoricallyNamedCharset;
+import static sun.nio.cs.CharsetMapping.*;
+
+public class EUC_TW extends Charset implements HistoricallyNamedCharset
+{
+ private static final int SS2 = 0x8E;
+
+ /*
+ (1) EUC_TW
+ Second byte of EUC_TW for cs2 is in range of
+ 0xA1-0xB0 for plane 1-16. According to CJKV /163,
+ plane1 is coded in both cs1 and cs2. This impl
+ however does not decode the codepoints of plane1
+ in cs2, so only p2-p7 and p15 are supported in cs2.
+
+ Plane2 0xA2;
+ Plane3 0xA3;
+ Plane4 0xA4;
+ Plane5 0xA5;
+ Plane6 0xA6;
+ Plane7 0xA7;
+ Plane15 0xAF;
+
+ (2) Mapping
+ The fact that all supplementary characters encoded in EUC_TW are
+ in 0x2xxxx range gives us the room to optimize the data tables.
+
+ Decoding:
+ (1) save the lower 16-bit value of all codepoints of b->c mapping
+ in a String array table String[plane] b2c.
+ (2) save "codepoint is supplementary" info (one bit) in a
+ byte[] b2cIsSupp, so 8 codepoints (same codepoint value, different
+ plane No) share one byte.
+
+ Encoding:
+ (1)c->b mappings are stored in
+ char[]c2b/char[]c2bIndex
+ char[]c2bSupp/char[]c2bIndexsupp (indexed by lower 16-bit
+ (2)byte[] c2bPlane stores the "plane info" of each euc-tw codepoints,
+ BMP and Supp share the low/high 4 bits of one byte.
+
+ Mapping tables are stored separated in EUC_TWMapping, which
+ is generated by tool.
+ */
+
+ public EUC_TW() {
+ super("x-EUC-TW", $ALIASES$);
+ }
+
+ public String historicalName() {
+ return "EUC_TW";
+ }
+
+ public boolean contains(Charset cs) {
+ return ((cs.name().equals("US-ASCII"))
+ || (cs instanceof EUC_TW));
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new Decoder(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new Encoder(this);
+ }
+
+ public static class Decoder extends CharsetDecoder {
+ public Decoder(Charset cs) {
+ super(cs, 2.0f, 2.0f);
+ }
+
+ char[] c1 = new char[1];
+ char[] c2 = new char[2];
+ public char[] toUnicode(int b1, int b2, int p) {
+ return decode(b1, b2, p, c1, c2);
+ }
+
+ static final String[] b2c = EUC_TWMapping.b2c;
+ static final int b1Min = EUC_TWMapping.b1Min;
+ static final int b1Max = EUC_TWMapping.b1Max;
+ static final int b2Min = EUC_TWMapping.b2Min;
+ static final int b2Max = EUC_TWMapping.b2Max;
+ static final int dbSegSize = b2Max - b2Min + 1;
+ static final byte[] b2cIsSupp;
+
+ // adjust from cns planeNo to the plane index of b2c
+ static final byte[] cnspToIndex = new byte[0x100];
+ static {
+ Arrays.fill(cnspToIndex, (byte)-1);
+ cnspToIndex[0xa2] = 1; cnspToIndex[0xa3] = 2; cnspToIndex[0xa4] = 3;
+ cnspToIndex[0xa5] = 4; cnspToIndex[0xa6] = 5; cnspToIndex[0xa7] = 6;
+ cnspToIndex[0xaf] = 7;
+ }
+
+ //static final BitSet b2cIsSupp;
+ static {
+ String b2cIsSuppStr = EUC_TWMapping.b2cIsSuppStr;
+ // work on a local copy is much faster than operate
+ // directly on b2cIsSupp
+ byte[] flag = new byte[b2cIsSuppStr.length() << 1];
+ int off = 0;
+ for (int i = 0; i < b2cIsSuppStr.length(); i++) {
+ char c = b2cIsSuppStr.charAt(i);
+ flag[off++] = (byte)(c >> 8);
+ flag[off++] = (byte)(c & 0xff);
+ }
+ b2cIsSupp = flag;
+ }
+
+ static boolean isLegalDB(int b) {
+ return b >= b1Min && b <= b1Max;
+ }
+
+ static char[] decode(int b1, int b2, int p, char[] c1, char[] c2)
+ {
+ if (b1 < b1Min || b1 > b1Max || b2 < b2Min || b2 > b2Max)
+ return null;
+ int index = (b1 - b1Min) * dbSegSize + b2 - b2Min;
+ char c = b2c[p].charAt(index);
+ if (c == UNMAPPABLE_DECODING)
+ return null;
+ if ((b2cIsSupp[index] & (1 << p)) == 0) {
+ c1[0] = c;
+ return c1;
+ } else {
+ c2[0] = Character.highSurrogate(0x20000 + c);
+ c2[1] = Character.lowSurrogate(0x20000 + c);
+ return c2;
+ }
+ }
+
+ private CoderResult decodeArrayLoop(ByteBuffer src,
+ CharBuffer dst)
+ {
+ byte[] sa = src.array();
+ int sp = src.arrayOffset() + src.position();
+ int sl = src.arrayOffset() + src.limit();
+
+ char[] da = dst.array();
+ int dp = dst.arrayOffset() + dst.position();
+ int dl = dst.arrayOffset() + dst.limit();
+ try {
+ while (sp < sl) {
+ int byte1 = sa[sp] & 0xff;
+ if (byte1 == SS2) { // Codeset 2 G2
+ if ( sl - sp < 4)
+ return CoderResult.UNDERFLOW;
+ int cnsPlane = cnspToIndex[sa[sp + 1] & 0xff];
+ if (cnsPlane < 0)
+ return CoderResult.malformedForLength(2);
+ byte1 = sa[sp + 2] & 0xff;
+ int byte2 = sa[sp + 3] & 0xff;
+ char[] cc = toUnicode(byte1, byte2, cnsPlane);
+ if (cc == null) {
+ if (!isLegalDB(byte1) || !isLegalDB(byte2))
+ return CoderResult.malformedForLength(4);
+ return CoderResult.unmappableForLength(4);
+ }
+ if (dl - dp < cc.length)
+ return CoderResult.OVERFLOW;
+ if (cc.length == 1) {
+ da[dp++] = cc[0];
+ } else {
+ da[dp++] = cc[0];
+ da[dp++] = cc[1];
+ }
+ sp += 4;
+ } else if (byte1 < 0x80) { // ASCII G0
+ if (dl - dp < 1)
+ return CoderResult.OVERFLOW;
+ da[dp++] = (char) byte1;
+ sp++;
+ } else { // Codeset 1 G1
+ if ( sl - sp < 2)
+ return CoderResult.UNDERFLOW;
+ int byte2 = sa[sp + 1] & 0xff;
+ char[] cc = toUnicode(byte1, byte2, 0);
+ if (cc == null) {
+ if (!isLegalDB(byte1) || !isLegalDB(byte2))
+ return CoderResult.malformedForLength(1);
+ return CoderResult.unmappableForLength(2);
+ }
+ if (dl - dp < 1)
+ return CoderResult.OVERFLOW;
+ da[dp++] = cc[0];
+ sp += 2;
+ }
+ }
+ return CoderResult.UNDERFLOW;
+ } finally {
+ src.position(sp - src.arrayOffset());
+ dst.position(dp - dst.arrayOffset());
+ }
+ }
+
+ private CoderResult decodeBufferLoop(ByteBuffer src,
+ CharBuffer dst)
+ {
+ int mark = src.position();
+ try {
+ while (src.hasRemaining()) {
+ int byte1 = src.get() & 0xff;
+ if (byte1 == SS2) { // Codeset 2 G2
+ if ( src.remaining() < 3)
+ return CoderResult.UNDERFLOW;
+ int cnsPlane = cnspToIndex[src.get() & 0xff];
+ if (cnsPlane < 0)
+ return CoderResult.malformedForLength(2);
+ byte1 = src.get() & 0xff;
+ int byte2 = src.get() & 0xff;
+ char[] cc = toUnicode(byte1, byte2, cnsPlane);
+ if (cc == null) {
+ if (!isLegalDB(byte1) || !isLegalDB(byte2))
+ return CoderResult.malformedForLength(4);
+ return CoderResult.unmappableForLength(4);
+ }
+ if (dst.remaining() < cc.length)
+ return CoderResult.OVERFLOW;
+ if (cc.length == 1) {
+ dst.put(cc[0]);
+ } else {
+ dst.put(cc[0]);
+ dst.put(cc[1]);
+ }
+ mark += 4;
+ } else if (byte1 < 0x80) { // ASCII G0
+ if (!dst.hasRemaining())
+ return CoderResult.OVERFLOW;
+ dst.put((char) byte1);
+ mark++;
+ } else { // Codeset 1 G1
+ if (!src.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ int byte2 = src.get() & 0xff;
+ char[] cc = toUnicode(byte1, byte2, 0);
+ if (cc == null) {
+ if (!isLegalDB(byte1) || !isLegalDB(byte2))
+ return CoderResult.malformedForLength(1);
+ return CoderResult.unmappableForLength(2);
+ }
+ if (!dst.hasRemaining())
+ return CoderResult.OVERFLOW;
+ dst.put(cc[0]);
+ mark +=2;
+ }
+ }
+ return CoderResult.UNDERFLOW;
+ } finally {
+ src.position(mark);
+ }
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst)
+ {
+ if (src.hasArray() && dst.hasArray())
+ return decodeArrayLoop(src, dst);
+ else
+ return decodeBufferLoop(src, dst);
+ }
+ }
+
+ public static class Encoder extends CharsetEncoder {
+ private byte[] bb = new byte[4];
+
+ public Encoder(Charset cs) {
+ super(cs, 4.0f, 4.0f);
+ }
+
+ public boolean canEncode(char c) {
+ return (c <= '\u007f' || toEUC(c, bb) != -1);
+ }
+
+ public boolean canEncode(CharSequence cs) {
+ int i = 0;
+ while (i < cs.length()) {
+ char c = cs.charAt(i++);
+ if (Character.isHighSurrogate(c)) {
+ if (i == cs.length())
+ return false;
+ char low = cs.charAt(i++);
+ if (!Character.isLowSurrogate(low) || toEUC(c, low, bb) == -1)
+ return false;
+ } else if (!canEncode(c)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public int toEUC(char hi, char low, byte[] bb) {
+ return encode(hi, low, bb);
+ }
+
+ public int toEUC(char c, byte[] bb) {
+ return encode(c, bb);
+ }
+
+ private CoderResult encodeArrayLoop(CharBuffer src,
+ ByteBuffer dst)
+ {
+ char[] sa = src.array();
+ int sp = src.arrayOffset() + src.position();
+ int sl = src.arrayOffset() + src.limit();
+
+ byte[] da = dst.array();
+ int dp = dst.arrayOffset() + dst.position();
+ int dl = dst.arrayOffset() + dst.limit();
+
+ int inSize;
+ int outSize;
+
+ try {
+ while (sp < sl) {
+ char c = sa[sp];
+ inSize = 1;
+ if (c < 0x80) { // ASCII
+ bb[0] = (byte)c;
+ outSize = 1;
+ } else {
+ outSize = toEUC(c, bb);
+ if (outSize == -1) {
+ // to check surrogates only after BMP failed
+ // has the benefit of improving the BMP encoding
+ // 10% faster, with the price of the slowdown of
+ // supplementary character encoding. given the use
+ // of supplementary characters is really rare, this
+ // is something worth doing.
+ if (Character.isHighSurrogate(c)) {
+ if ((sp + 1) == sl)
+ return CoderResult.UNDERFLOW;
+ if (!Character.isLowSurrogate(sa[sp + 1]))
+ return CoderResult.malformedForLength(1);
+ outSize = toEUC(c, sa[sp+1], bb);
+ inSize = 2;
+ } else if (Character.isLowSurrogate(c)) {
+ return CoderResult.malformedForLength(1);
+ }
+ }
+ }
+ if (outSize == -1)
+ return CoderResult.unmappableForLength(inSize);
+ if ( dl - dp < outSize)
+ return CoderResult.OVERFLOW;
+ for (int i = 0; i < outSize; i++)
+ da[dp++] = bb[i];
+ sp += inSize;
+ }
+ return CoderResult.UNDERFLOW;
+ } finally {
+ src.position(sp - src.arrayOffset());
+ dst.position(dp - dst.arrayOffset());
+ }
+ }
+
+ private CoderResult encodeBufferLoop(CharBuffer src,
+ ByteBuffer dst)
+ {
+ int outSize;
+ int inSize;
+ int mark = src.position();
+
+ try {
+ while (src.hasRemaining()) {
+ inSize = 1;
+ char c = src.get();
+ if (c < 0x80) { // ASCII
+ outSize = 1;
+ bb[0] = (byte)c;
+ } else {
+ outSize = toEUC(c, bb);
+ if (outSize == -1) {
+ if (Character.isHighSurrogate(c)) {
+ if (!src.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ char c2 = src.get();
+ if (!Character.isLowSurrogate(c2))
+ return CoderResult.malformedForLength(1);
+ outSize = toEUC(c, c2, bb);
+ inSize = 2;
+ } else if (Character.isLowSurrogate(c)) {
+ return CoderResult.malformedForLength(1);
+ }
+ }
+ }
+ if (outSize == -1)
+ return CoderResult.unmappableForLength(inSize);
+ if (dst.remaining() < outSize)
+ return CoderResult.OVERFLOW;
+ for (int i = 0; i < outSize; i++)
+ dst.put(bb[i]);
+ mark += inSize;
+ }
+ return CoderResult.UNDERFLOW;
+ } finally {
+ src.position(mark);
+ }
+ }
+
+ protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst)
+ {
+ if (src.hasArray() && dst.hasArray())
+ return encodeArrayLoop(src, dst);
+ else
+ return encodeBufferLoop(src, dst);
+ }
+
+ static int encode(char hi, char low, byte[] bb) {
+ int c = Character.toCodePoint(hi, low);
+ if ((c & 0xf0000) != 0x20000)
+ return -1;
+ c -= 0x20000;
+ int index = c2bSuppIndex[c >> 8];
+ if (index == UNMAPPABLE_ENCODING)
+ return -1;
+ index = index + (c & 0xff);
+ int db = c2bSupp[index];
+ if (db == UNMAPPABLE_ENCODING)
+ return -1;
+ int p = (c2bPlane[index] >> 4) & 0xf;
+ bb[0] = (byte)SS2;
+ bb[1] = (byte)(0xa0 | p);
+ bb[2] = (byte)(db >> 8);
+ bb[3] = (byte)db;
+ return 4;
+ }
+
+ static int encode(char c, byte[] bb) {
+ int index = c2bIndex[c >> 8];
+ if (index == UNMAPPABLE_ENCODING)
+ return -1;
+ index = index + (c & 0xff);
+ int db = c2b[index];
+ if (db == UNMAPPABLE_ENCODING)
+ return -1;
+ int p = c2bPlane[index] & 0xf;
+ if (p == 0) {
+ bb[0] = (byte)(db >> 8);
+ bb[1] = (byte)db;
+ return 2;
+ } else {
+ bb[0] = (byte)SS2;
+ bb[1] = (byte)(0xa0 | p);
+ bb[2] = (byte)(db >> 8);
+ bb[3] = (byte)db;
+ return 4;
+ }
+ }
+
+ static final char[] c2b;
+ static final char[] c2bIndex;
+ static final char[] c2bSupp;
+ static final char[] c2bSuppIndex;
+ static final byte[] c2bPlane;
+ static {
+ int b1Min = Decoder.b1Min;
+ int b1Max = Decoder.b1Max;
+ int b2Min = Decoder.b2Min;
+ int b2Max = Decoder.b2Max;
+ int dbSegSize = Decoder.dbSegSize;
+ String[] b2c = Decoder.b2c;
+ byte[] b2cIsSupp = Decoder.b2cIsSupp;
+
+ c2bIndex = EUC_TWMapping.c2bIndex;
+ c2bSuppIndex = EUC_TWMapping.c2bSuppIndex;
+ char[] c2b0 = new char[EUC_TWMapping.C2BSIZE];
+ char[] c2bSupp0 = new char[EUC_TWMapping.C2BSUPPSIZE];
+ byte[] c2bPlane0 = new byte[Math.max(EUC_TWMapping.C2BSIZE,
+ EUC_TWMapping.C2BSUPPSIZE)];
+
+ Arrays.fill(c2b0, (char)UNMAPPABLE_ENCODING);
+ Arrays.fill(c2bSupp0, (char)UNMAPPABLE_ENCODING);
+
+ for (int p = 0; p < b2c.length; p++) {
+ String db = b2c[p];
+ /*
+ adjust the "plane" from 0..7 to 0, 2, 3, 4, 5, 6, 7, 0xf,
+ which helps balance between footprint (to save the plane
+ info in 4 bits) and runtime performance (to require only
+ one operation "0xa0 | plane" to encode the plane byte)
+ */
+ int plane = p;
+ if (plane == 7)
+ plane = 0xf;
+ else if (plane != 0)
+ plane = p + 1;
+
+ int off = 0;
+ for (int b1 = b1Min; b1 <= b1Max; b1++) {
+ for (int b2 = b2Min; b2 <= b2Max; b2++) {
+ char c = db.charAt(off);
+ if (c != UNMAPPABLE_DECODING) {
+ if ((b2cIsSupp[off] & (1 << p)) != 0) {
+ int index = c2bSuppIndex[c >> 8] + (c&0xff);
+ c2bSupp0[index] = (char)((b1 << 8) + b2);
+ c2bPlane0[index] |= (byte)(plane << 4);
+ } else {
+ int index = c2bIndex[c >> 8] + (c&0xff);
+ c2b0[index] = (char)((b1 << 8) + b2);
+ c2bPlane0[index] |= (byte)plane;
+ }
+ }
+ off++;
+ }
+ }
+ }
+ c2b = c2b0;
+ c2bSupp = c2bSupp0;
+ c2bPlane = c2bPlane0;
+ }
+ }
+}
--- a/jdk/src/jdk.charsets/share/classes/sun/nio/cs/ext/ExtendedCharsets.java.template Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/src/jdk.charsets/share/classes/sun/nio/cs/ext/ExtendedCharsets.java.template Wed Feb 25 13:04:31 2015 -0800
@@ -222,16 +222,6 @@
}
}
- String osName = getProperty("os.name");
- if ("SunOS".equals(osName) || "Linux".equals(osName) || "AIX".equals(osName)
- || osName.contains("OS X")) {
- charset("x-COMPOUND_TEXT", "COMPOUND_TEXT",
- new String[] {
- "COMPOUND_TEXT", // JDK historical
- "x11-compound_text",
- "x-compound-text"
- });
- }
initialized = true;
}
--- a/jdk/src/jdk.charsets/share/classes/sun/nio/cs/ext/ISO2022_CN_CNS.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/src/jdk.charsets/share/classes/sun/nio/cs/ext/ISO2022_CN_CNS.java Wed Feb 25 13:04:31 2015 -0800
@@ -35,6 +35,7 @@
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import sun.nio.cs.HistoricallyNamedCharset;
+import sun.nio.cs.*;
public class ISO2022_CN_CNS extends ISO2022 implements HistoricallyNamedCharset
{
--- a/jdk/test/sun/nio/cs/X11CNS11643.java Wed Feb 25 19:36:29 2015 +0000
+++ b/jdk/test/sun/nio/cs/X11CNS11643.java Wed Feb 25 13:04:31 2015 -0800
@@ -24,7 +24,8 @@
import java.nio.CharBuffer;
import java.nio.ByteBuffer;
import java.nio.charset.*;
-import sun.nio.cs.ext.EUC_TW;
+import sun.nio.cs.*;
+import sun.nio.cs.ext.*;
public abstract class X11CNS11643 extends Charset {
private final int plane;