7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
Summary: implement sun.nio.cs.ArrayEn/Decoer in utf8
Reviewed-by: alanb
--- a/jdk/src/share/classes/java/lang/StringCoding.java Mon May 02 10:14:27 2011 -0700
+++ b/jdk/src/share/classes/java/lang/StringCoding.java Mon May 02 11:42:52 2011 -0700
@@ -222,13 +222,13 @@
off = 0;
}
}
+ cd.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
if (cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return safeTrim(ca, clen, cs, isTrusted);
} else {
- cd.onMalformedInput(CodingErrorAction.REPLACE)
- .onUnmappableCharacter(CodingErrorAction.REPLACE)
- .reset();
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
@@ -356,13 +356,13 @@
off = 0;
}
}
+ ce.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
if (ce instanceof ArrayEncoder) {
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
return safeTrim(ba, blen, cs, isTrusted);
} else {
- ce.onMalformedInput(CodingErrorAction.REPLACE)
- .onUnmappableCharacter(CodingErrorAction.REPLACE)
- .reset();
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, off, len);
try {
--- a/jdk/src/share/classes/java/util/zip/ZipCoder.java Mon May 02 10:14:27 2011 -0700
+++ b/jdk/src/share/classes/java/util/zip/ZipCoder.java Mon May 02 11:42:52 2011 -0700
@@ -34,6 +34,8 @@
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
+import sun.nio.cs.ArrayDecoder;
+import sun.nio.cs.ArrayEncoder;
/**
* Utility class for zipfile name and comment decoding and encoding
@@ -47,6 +49,15 @@
char[] ca = new char[len];
if (len == 0)
return new String(ca);
+ // UTF-8 only for now. Other ArrayDeocder only handles
+ // CodingErrorAction.REPLACE mode. ZipCoder uses
+ // REPORT mode.
+ if (isUTF8 && cd instanceof ArrayDecoder) {
+ int clen = ((ArrayDecoder)cd).decode(ba, 0, length, ca);
+ if (clen == -1) // malformed
+ throw new IllegalArgumentException("MALFORMED");
+ return new String(ca, 0, clen);
+ }
ByteBuffer bb = ByteBuffer.wrap(ba, 0, length);
CharBuffer cb = CharBuffer.wrap(ca);
CoderResult cr = cd.decode(bb, cb, true);
@@ -69,6 +80,14 @@
byte[] ba = new byte[len];
if (len == 0)
return ba;
+ // UTF-8 only for now. Other ArrayDeocder only handles
+ // CodingErrorAction.REPLACE mode.
+ if (isUTF8 && ce instanceof ArrayEncoder) {
+ int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
+ if (blen == -1) // malformed
+ throw new IllegalArgumentException("MALFORMED");
+ return Arrays.copyOf(ba, blen);
+ }
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca);
CoderResult cr = ce.encode(cb, bb, true);
@@ -85,7 +104,7 @@
// assume invoked only if "this" is not utf8
byte[] getBytesUTF8(String s) {
- if (isutf8)
+ if (isUTF8)
return getBytes(s);
if (utf8 == null)
utf8 = new ZipCoder(StandardCharset.UTF_8);
@@ -94,7 +113,7 @@
String toStringUTF8(byte[] ba, int len) {
- if (isutf8)
+ if (isUTF8)
return toString(ba, len);
if (utf8 == null)
utf8 = new ZipCoder(StandardCharset.UTF_8);
@@ -102,18 +121,18 @@
}
boolean isUTF8() {
- return isutf8;
+ return isUTF8;
}
private Charset cs;
private CharsetDecoder dec;
private CharsetEncoder enc;
- private boolean isutf8;
+ private boolean isUTF8;
private ZipCoder utf8;
private ZipCoder(Charset cs) {
this.cs = cs;
- this.isutf8 = cs.name().equals(StandardCharset.UTF_8.name());
+ this.isUTF8 = cs.name().equals(StandardCharset.UTF_8.name());
}
static ZipCoder get(Charset charset) {
--- a/jdk/src/share/classes/sun/nio/cs/UTF_8.java Mon May 02 10:14:27 2011 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/UTF_8.java Mon May 02 11:42:52 2011 -0700
@@ -32,6 +32,7 @@
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
/* Legal UTF-8 Byte Sequences
*
@@ -77,7 +78,8 @@
dst.position(dp - dst.arrayOffset());
}
- private static class Decoder extends CharsetDecoder {
+ private static class Decoder extends CharsetDecoder
+ implements ArrayDecoder {
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@@ -353,9 +355,132 @@
else
return decodeBufferLoop(src, dst);
}
+
+ private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
+ {
+ if (bb == null)
+ bb = ByteBuffer.wrap(ba);
+ bb.position(sp);
+ return bb;
+ }
+
+ // returns -1 if there is malformed byte(s) and the
+ // "action" for malformed input is not REPLACE.
+ public int decode(byte[] sa, int sp, int len, char[] da) {
+ final int sl = sp + len;
+ int dp = 0;
+ int dlASCII = Math.min(len, da.length);
+ ByteBuffer bb = null; // only necessary if malformed
+
+ // ASCII only optimized loop
+ while (dp < dlASCII && sa[sp] >= 0)
+ da[dp++] = (char) sa[sp++];
+
+ while (sp < sl) {
+ int b1 = sa[sp++];
+ if (b1 >= 0) {
+ // 1 byte, 7 bits: 0xxxxxxx
+ da[dp++] = (char) b1;
+ } else if ((b1 >> 5) == -2) {
+ // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
+ if (sp < sl) {
+ int b2 = sa[sp++];
+ if (isMalformed2(b1, b2)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp--; // malformedN(bb, 2) always returns 1
+ } else {
+ da[dp++] = (char) (((b1 << 6) ^ b2)^
+ (((byte) 0xC0 << 6) ^
+ ((byte) 0x80 << 0)));
+ }
+ continue;
+ }
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ return dp;
+ } else if ((b1 >> 4) == -2) {
+ // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
+ if (sp + 1 < sl) {
+ int b2 = sa[sp++];
+ int b3 = sa[sp++];
+ if (isMalformed3(b1, b2, b3)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp -=3;
+ bb = getByteBuffer(bb, sa, sp);
+ sp += malformedN(bb, 3).length();
+ } else {
+ da[dp++] = (char)((b1 << 12) ^
+ (b2 << 6) ^
+ (b3 ^
+ (((byte) 0xE0 << 12) ^
+ ((byte) 0x80 << 6) ^
+ ((byte) 0x80 << 0))));
+ }
+ continue;
+ }
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ return dp;
+ } else if ((b1 >> 3) == -2) {
+ // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (sp + 2 < sl) {
+ int b2 = sa[sp++];
+ int b3 = sa[sp++];
+ int b4 = sa[sp++];
+ int uc = ((b1 << 18) ^
+ (b2 << 12) ^
+ (b3 << 6) ^
+ (b4 ^
+ (((byte) 0xF0 << 18) ^
+ ((byte) 0x80 << 12) ^
+ ((byte) 0x80 << 6) ^
+ ((byte) 0x80 << 0))));
+ if (isMalformed4(b2, b3, b4) ||
+ // shortest form check
+ !Character.isSupplementaryCodePoint(uc)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp -= 4;
+ bb = getByteBuffer(bb, sa, sp);
+ sp += malformedN(bb, 4).length();
+ } else {
+ da[dp++] = Character.highSurrogate(uc);
+ da[dp++] = Character.lowSurrogate(uc);
+ }
+ continue;
+ }
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ return dp;
+ } else {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp--;
+ bb = getByteBuffer(bb, sa, sp);
+ CoderResult cr = malformedN(bb, 1);
+ if (!cr.isError()) {
+ // leading byte for 5 or 6-byte, but don't have enough
+ // bytes in buffer to check. Consumed rest as malformed.
+ return dp;
+ }
+ sp += cr.length();
+ }
+ }
+ return dp;
+ }
}
- private static class Encoder extends CharsetEncoder {
+ private static class Encoder extends CharsetEncoder
+ implements ArrayEncoder {
private Encoder(Charset cs) {
super(cs, 1.1f, 3.0f);
@@ -495,5 +620,50 @@
else
return encodeBufferLoop(src, dst);
}
+
+ // returns -1 if there is malformed char(s) and the
+ // "action" for malformed input is not REPLACE.
+ public int encode(char[] sa, int sp, int len, byte[] da) {
+ int sl = sp + len;
+ int dp = 0;
+ int dlASCII = dp + Math.min(len, da.length);
+
+ // ASCII only optimized loop
+ while (dp < dlASCII && sa[sp] < '\u0080')
+ da[dp++] = (byte) sa[sp++];
+
+ while (sp < sl) {
+ char c = sa[sp++];
+ if (c < 0x80) {
+ // Have at most seven bits
+ da[dp++] = (byte)c;
+ } else if (c < 0x800) {
+ // 2 bytes, 11 bits
+ da[dp++] = (byte)(0xc0 | (c >> 6));
+ da[dp++] = (byte)(0x80 | (c & 0x3f));
+ } else if (Character.isSurrogate(c)) {
+ if (sgp == null)
+ sgp = new Surrogate.Parser();
+ int uc = sgp.parse(c, sa, sp - 1, sl);
+ if (uc < 0) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement()[0];
+ } else {
+ da[dp++] = (byte)(0xf0 | ((uc >> 18)));
+ da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+ da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
+ da[dp++] = (byte)(0x80 | (uc & 0x3f));
+ sp++; // 2 chars
+ }
+ } else {
+ // 3 bytes, 16 bits
+ da[dp++] = (byte)(0xe0 | ((c >> 12)));
+ da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
+ da[dp++] = (byte)(0x80 | (c & 0x3f));
+ }
+ }
+ return dp;
+ }
}
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/StrCodingBenchmarkUTF8.java Mon May 02 11:42:52 2011 -0700
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class StrCodingBenchmarkUTF8 {
+
+ public static void main(String[] args) throws Throwable {
+
+ final int itrs = Integer.getInteger("iterations", 100000);
+ final int size = 2048;
+ final int subsize = Integer.getInteger("subsize", 128);
+ final Random rnd = new Random();
+ final int maxchar = 0x7f;
+
+ Charset charset = Charset.forName("UTF-8");
+ final String csn = charset.name();
+ final Charset cs = charset;
+
+ int[] starts = new int[] { 0, 0x80, 0x800, 0x10000};
+ for (int nb = 1; nb <= 4; nb++) {
+
+ final CharsetEncoder enc = cs.newEncoder();
+
+ char[] cc = new char[size];
+ int i = 0;
+ while (i < size - 3) {
+ i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i);
+ }
+
+ final String string = new String(cc);
+ final byte[] bytes = string.getBytes(cs);
+
+ System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb);
+ int sz = 12;
+ while (sz < size) {
+ System.out.printf(" [len=%d]%n", sz);
+ final byte[] bs = Arrays.copyOf(bytes, sz);
+ final String str = new String(bs, csn);
+ StrCodingBenchmark.Job[] jobs = {
+ new StrCodingBenchmark.Job("String decode: csn") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ new String(bs, csn);
+ }},
+
+ new StrCodingBenchmark.Job("String decode: cs") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ new String(bs, cs);
+ }},
+
+ new StrCodingBenchmark.Job("String encode: csn") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ str.getBytes(csn);
+ }},
+
+ new StrCodingBenchmark.Job("String encode: cs") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ str.getBytes(cs);
+ }},
+ };
+ StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs));
+ sz <<= 1;
+ }
+ }
+ }
+}
--- a/jdk/test/sun/nio/cs/TestStringCoding.java Mon May 02 10:14:27 2011 -0700
+++ b/jdk/test/sun/nio/cs/TestStringCoding.java Mon May 02 11:42:52 2011 -0700
@@ -24,7 +24,7 @@
*/
/* @test
- @bug 6636323 6636319
+ @bug 6636323 6636319 7040220
@summary Test if StringCoding and NIO result have the same de/encoding result
* @run main/othervm/timeout=2000 TestStringCoding
*/
@@ -111,6 +111,8 @@
//encode unmappable surrogates
if (enc instanceof sun.nio.cs.ArrayEncoder &&
cs.contains(Charset.forName("ASCII"))) {
+ if (cs.name().equals("UTF-8")) // utf8 handles surrogates
+ return;
enc.replaceWith(new byte[] { (byte)'A'});
sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/TestStringCodingUTF8.java Mon May 02 11:42:52 2011 -0700
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+ @bug 7040220
+ @summary Test if StringCoding and NIO result have the same de/encoding result for UTF-8
+ * @run main/othervm/timeout=2000 TestStringCodingUTF8
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class TestStringCodingUTF8 {
+ public static void main(String[] args) throws Throwable {
+ test();
+ // security manager on
+ System.setSecurityManager(new PermissiveSecurityManger());
+ test();
+ }
+
+ static void test() throws Throwable {
+ Charset cs = Charset.forName("UTF-8");
+ char[] bmp = new char[0x10000];
+ for (int i = 0; i < 0x10000; i++) {
+ bmp[i] = (char)i;
+ }
+ test(cs, bmp, 0, bmp.length);
+
+ ArrayList<Integer> list = new ArrayList<>(0x20000);
+ for (int i = 0; i < 0x20000; i++) {
+ list.add(i, i);
+ }
+ Collections.shuffle(list);
+ int j = 0;
+ char[] bmpsupp = new char[0x30000];
+ for (int i = 0; i < 0x20000; i++) {
+ j += Character.toChars(list.get(i), bmpsupp, j);
+ }
+ assert (j == bmpsupp.length);
+ test(cs, bmpsupp, 0, bmpsupp.length);
+
+ // randomed "off" and "len" on shuffled data
+ Random rnd = new Random();
+ int maxlen = 1000;
+ int itr = 5000;
+ for (int i = 0; i < itr; i++) {
+ int off = rnd.nextInt(bmpsupp.length - maxlen);
+ int len = rnd.nextInt(maxlen);
+ test(cs, bmpsupp, off, len);
+ }
+
+ // random length of bytes, test the edge corner case
+ for (int i = 0; i < itr; i++) {
+ byte[] ba = new byte[rnd.nextInt(maxlen)];
+ rnd.nextBytes(ba);
+ //new String(csn);
+ if (!new String(ba, cs.name()).equals(
+ new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(csn) failed");
+ //new String(cs);
+ if (!new String(ba, cs).equals(
+ new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(cs) failed");
+ }
+ System.out.println("done!");
+ }
+
+ static void test(Charset cs, char[] ca, int off, int len) throws Throwable {
+ String str = new String(ca, off, len);
+ byte[] ba = encode(cs, ca, off, len);
+
+ //getBytes(csn);
+ byte[] baStr = str.getBytes(cs.name());
+ if (!Arrays.equals(ba, baStr))
+ throw new RuntimeException("getBytes(csn) failed");
+
+ //getBytes(cs);
+ baStr = str.getBytes(cs);
+ if (!Arrays.equals(ba, baStr))
+ throw new RuntimeException("getBytes(cs) failed");
+
+ //new String(csn);
+ if (!new String(ba, cs.name()).equals(new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(csn) failed");
+
+ //new String(cs);
+ if (!new String(ba, cs).equals(new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(cs) failed");
+ }
+
+ // copy/paste of the StringCoding.decode()
+ static char[] decode(Charset cs, byte[] ba, int off, int len) {
+ CharsetDecoder cd = cs.newDecoder();
+ int en = (int)(len * cd.maxCharsPerByte());
+ char[] ca = new char[en];
+ if (len == 0)
+ return ca;
+ cd.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+
+ ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+ CharBuffer cb = CharBuffer.wrap(ca);
+ try {
+ CoderResult cr = cd.decode(bb, cb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = cd.flush(cb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return Arrays.copyOf(ca, cb.position());
+ }
+
+ // copy/paste of the StringCoding.encode()
+ static byte[] encode(Charset cs, char[] ca, int off, int len) {
+ CharsetEncoder ce = cs.newEncoder();
+ int en = (int)(len * ce.maxBytesPerChar());
+ byte[] ba = new byte[en];
+ if (len == 0)
+ return ba;
+ ce.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+ ByteBuffer bb = ByteBuffer.wrap(ba);
+ CharBuffer cb = CharBuffer.wrap(ca, off, len);
+ try {
+ CoderResult cr = ce.encode(cb, bb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = ce.flush(bb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return Arrays.copyOf(ba, bb.position());
+ }
+
+ static class PermissiveSecurityManger extends SecurityManager {
+ @Override public void checkPermission(java.security.Permission p) {}
+ }
+}
--- a/jdk/test/sun/nio/cs/TestUTF8.java Mon May 02 10:14:27 2011 -0700
+++ b/jdk/test/sun/nio/cs/TestUTF8.java Mon May 02 11:42:52 2011 -0700
@@ -23,7 +23,7 @@
/*
* @test
- * @bug 4486841
+ * @bug 4486841 7040220
* @summary Test UTF-8 charset
*/
@@ -70,6 +70,32 @@
return dec.decode(bbf, cbf, true);
}
+ // copy/paste of the StringCoding.decode()
+ static char[] decode(Charset cs, byte[] ba, int off, int len) {
+ CharsetDecoder cd = cs.newDecoder();
+ int en = (int)(len * cd.maxCharsPerByte());
+ char[] ca = new char[en];
+ if (len == 0)
+ return ca;
+ cd.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+
+ ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+ CharBuffer cb = CharBuffer.wrap(ca);
+ try {
+ CoderResult cr = cd.decode(bb, cb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = cd.flush(cb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return Arrays.copyOf(ca, cb.position());
+ }
+
static byte[] encode(char[] cc, String csn, boolean testDirect)
throws Exception {
ByteBuffer bbf;
@@ -142,7 +168,14 @@
bb = encode(cc, csn, true);
ccO = decode(bb, csn, true);
if (!Arrays.equals(cc, ccO)) {
- System.out.printf(" (direct) failed");
+ System.out.print(" (direct) failed");
+ }
+ // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
+ if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
+ System.out.printf(" String.getBytes() failed");
+ }
+ if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+ System.out.printf(" String.toCharArray() failed");
}
System.out.println();
}
@@ -168,6 +201,12 @@
if (!Arrays.equals(cc, ccO)) {
System.out.printf(" decoding(direct) failed%n");
}
+ // new String(bb, csn).getBytes(csn) will not return
+ // the 6 bytes surrogates as in bb, so only test
+ // toCharArray() here.
+ if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+ System.out.printf(" String.toCharArray() failed");
+ }
}
static void compare(String csn1, String csn2) throws Exception {
@@ -274,6 +313,7 @@
static void checkMalformed(String csn) throws Exception {
boolean failed = false;
System.out.printf(" Check malformed <%s>...%n", csn);
+ Charset cs = Charset.forName(csn);
for (boolean direct: new boolean[] {false, true}) {
for (byte[] bins : malformed) {
int mlen = bins[0];
@@ -285,10 +325,15 @@
ashex += Integer.toBinaryString((int)bin[i] & 0xff);
}
if (!cr.isMalformed()) {
- System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
+ System.out.printf(" FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
failed = true;
} else if (cr.length() != mlen) {
- System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
+ System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
+ failed = true;
+ }
+ if (!Arrays.equals(decode(cs, bin, 0, bin.length),
+ new String(bin, csn).toCharArray())) {
+ System.out.printf(" FAIL(new String(bb, %s)) failed%n", csn);
failed = true;
}
}