7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
authorsherman
Mon, 02 May 2011 11:42:52 -0700
changeset 9547 454881baaca0
parent 9546 7304634c0ad7
child 9548 225dbdc1cb74
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[]) Summary: implement sun.nio.cs.ArrayEn/Decoer in utf8 Reviewed-by: alanb
jdk/src/share/classes/java/lang/StringCoding.java
jdk/src/share/classes/java/util/zip/ZipCoder.java
jdk/src/share/classes/sun/nio/cs/UTF_8.java
jdk/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
jdk/test/sun/nio/cs/TestStringCoding.java
jdk/test/sun/nio/cs/TestStringCodingUTF8.java
jdk/test/sun/nio/cs/TestUTF8.java
--- a/jdk/src/share/classes/java/lang/StringCoding.java	Mon May 02 10:14:27 2011 -0700
+++ b/jdk/src/share/classes/java/lang/StringCoding.java	Mon May 02 11:42:52 2011 -0700
@@ -222,13 +222,13 @@
                 off = 0;
             }
         }
+        cd.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
         if (cd instanceof ArrayDecoder) {
             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
             return safeTrim(ca, clen, cs, isTrusted);
         } else {
-            cd.onMalformedInput(CodingErrorAction.REPLACE)
-              .onUnmappableCharacter(CodingErrorAction.REPLACE)
-              .reset();
             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
             CharBuffer cb = CharBuffer.wrap(ca);
             try {
@@ -356,13 +356,13 @@
                 off = 0;
             }
         }
+        ce.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
         if (ce instanceof ArrayEncoder) {
             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
             return safeTrim(ba, blen, cs, isTrusted);
         } else {
-            ce.onMalformedInput(CodingErrorAction.REPLACE)
-              .onUnmappableCharacter(CodingErrorAction.REPLACE)
-              .reset();
             ByteBuffer bb = ByteBuffer.wrap(ba);
             CharBuffer cb = CharBuffer.wrap(ca, off, len);
             try {
--- a/jdk/src/share/classes/java/util/zip/ZipCoder.java	Mon May 02 10:14:27 2011 -0700
+++ b/jdk/src/share/classes/java/util/zip/ZipCoder.java	Mon May 02 11:42:52 2011 -0700
@@ -34,6 +34,8 @@
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.Arrays;
+import sun.nio.cs.ArrayDecoder;
+import sun.nio.cs.ArrayEncoder;
 
 /**
  * Utility class for zipfile name and comment decoding and encoding
@@ -47,6 +49,15 @@
         char[] ca = new char[len];
         if (len == 0)
             return new String(ca);
+        // UTF-8 only for now. Other ArrayDeocder only handles
+        // CodingErrorAction.REPLACE mode. ZipCoder uses
+        // REPORT mode.
+        if (isUTF8 && cd instanceof ArrayDecoder) {
+            int clen = ((ArrayDecoder)cd).decode(ba, 0, length, ca);
+            if (clen == -1)    // malformed
+                throw new IllegalArgumentException("MALFORMED");
+            return new String(ca, 0, clen);
+        }
         ByteBuffer bb = ByteBuffer.wrap(ba, 0, length);
         CharBuffer cb = CharBuffer.wrap(ca);
         CoderResult cr = cd.decode(bb, cb, true);
@@ -69,6 +80,14 @@
         byte[] ba = new byte[len];
         if (len == 0)
             return ba;
+        // UTF-8 only for now. Other ArrayDeocder only handles
+        // CodingErrorAction.REPLACE mode.
+        if (isUTF8 && ce instanceof ArrayEncoder) {
+            int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
+            if (blen == -1)    // malformed
+                throw new IllegalArgumentException("MALFORMED");
+            return Arrays.copyOf(ba, blen);
+        }
         ByteBuffer bb = ByteBuffer.wrap(ba);
         CharBuffer cb = CharBuffer.wrap(ca);
         CoderResult cr = ce.encode(cb, bb, true);
@@ -85,7 +104,7 @@
 
     // assume invoked only if "this" is not utf8
     byte[] getBytesUTF8(String s) {
-        if (isutf8)
+        if (isUTF8)
             return getBytes(s);
         if (utf8 == null)
             utf8 = new ZipCoder(StandardCharset.UTF_8);
@@ -94,7 +113,7 @@
 
 
     String toStringUTF8(byte[] ba, int len) {
-        if (isutf8)
+        if (isUTF8)
             return toString(ba, len);
         if (utf8 == null)
             utf8 = new ZipCoder(StandardCharset.UTF_8);
@@ -102,18 +121,18 @@
     }
 
     boolean isUTF8() {
-        return isutf8;
+        return isUTF8;
     }
 
     private Charset cs;
     private CharsetDecoder dec;
     private CharsetEncoder enc;
-    private boolean isutf8;
+    private boolean isUTF8;
     private ZipCoder utf8;
 
     private ZipCoder(Charset cs) {
         this.cs = cs;
-        this.isutf8 = cs.name().equals(StandardCharset.UTF_8.name());
+        this.isUTF8 = cs.name().equals(StandardCharset.UTF_8.name());
     }
 
     static ZipCoder get(Charset charset) {
--- a/jdk/src/share/classes/sun/nio/cs/UTF_8.java	Mon May 02 10:14:27 2011 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/UTF_8.java	Mon May 02 11:42:52 2011 -0700
@@ -32,6 +32,7 @@
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
 
 /* Legal UTF-8 Byte Sequences
  *
@@ -77,7 +78,8 @@
         dst.position(dp - dst.arrayOffset());
     }
 
-    private static class Decoder extends CharsetDecoder {
+    private static class Decoder extends CharsetDecoder
+                                 implements ArrayDecoder {
         private Decoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
         }
@@ -353,9 +355,132 @@
             else
                 return decodeBufferLoop(src, dst);
         }
+
+        private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
+        {
+            if (bb == null)
+                bb = ByteBuffer.wrap(ba);
+            bb.position(sp);
+            return bb;
+        }
+
+        // returns -1 if there is malformed byte(s) and the
+        // "action" for malformed input is not REPLACE.
+        public int decode(byte[] sa, int sp, int len, char[] da) {
+            final int sl = sp + len;
+            int dp = 0;
+            int dlASCII = Math.min(len, da.length);
+            ByteBuffer bb = null;  // only necessary if malformed
+
+            // ASCII only optimized loop
+            while (dp < dlASCII && sa[sp] >= 0)
+                da[dp++] = (char) sa[sp++];
+
+            while (sp < sl) {
+                int b1 = sa[sp++];
+                if (b1 >= 0) {
+                    // 1 byte, 7 bits: 0xxxxxxx
+                    da[dp++] = (char) b1;
+                } else if ((b1 >> 5) == -2) {
+                    // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
+                    if (sp < sl) {
+                        int b2 = sa[sp++];
+                        if (isMalformed2(b1, b2)) {
+                            if (malformedInputAction() != CodingErrorAction.REPLACE)
+                                return -1;
+                            da[dp++] = replacement().charAt(0);
+                            sp--;            // malformedN(bb, 2) always returns 1
+                        } else {
+                            da[dp++] = (char) (((b1 << 6) ^ b2)^
+                                           (((byte) 0xC0 << 6) ^
+                                            ((byte) 0x80 << 0)));
+                        }
+                        continue;
+                    }
+                    if (malformedInputAction() != CodingErrorAction.REPLACE)
+                        return -1;
+                    da[dp++] = replacement().charAt(0);
+                    return dp;
+                } else if ((b1 >> 4) == -2) {
+                    // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
+                    if (sp + 1 < sl) {
+                        int b2 = sa[sp++];
+                        int b3 = sa[sp++];
+                        if (isMalformed3(b1, b2, b3)) {
+                            if (malformedInputAction() != CodingErrorAction.REPLACE)
+                                return -1;
+                            da[dp++] = replacement().charAt(0);
+                            sp -=3;
+                            bb = getByteBuffer(bb, sa, sp);
+                            sp += malformedN(bb, 3).length();
+                        } else {
+                            da[dp++] = (char)((b1 << 12) ^
+                                              (b2 <<  6) ^
+                                              (b3 ^
+                                              (((byte) 0xE0 << 12) ^
+                                              ((byte) 0x80 <<  6) ^
+                                              ((byte) 0x80 <<  0))));
+                        }
+                        continue;
+                    }
+                    if (malformedInputAction() != CodingErrorAction.REPLACE)
+                        return -1;
+                    da[dp++] = replacement().charAt(0);
+                    return dp;
+                } else if ((b1 >> 3) == -2) {
+                    // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    if (sp + 2 < sl) {
+                        int b2 = sa[sp++];
+                        int b3 = sa[sp++];
+                        int b4 = sa[sp++];
+                        int uc = ((b1 << 18) ^
+                                  (b2 << 12) ^
+                                  (b3 <<  6) ^
+                                  (b4 ^
+                                   (((byte) 0xF0 << 18) ^
+                                   ((byte) 0x80 << 12) ^
+                                   ((byte) 0x80 <<  6) ^
+                                   ((byte) 0x80 <<  0))));
+                        if (isMalformed4(b2, b3, b4) ||
+                            // shortest form check
+                            !Character.isSupplementaryCodePoint(uc)) {
+                            if (malformedInputAction() != CodingErrorAction.REPLACE)
+                                return -1;
+                            da[dp++] = replacement().charAt(0);
+                            sp -= 4;
+                            bb = getByteBuffer(bb, sa, sp);
+                            sp += malformedN(bb, 4).length();
+                        } else {
+                            da[dp++] = Character.highSurrogate(uc);
+                            da[dp++] = Character.lowSurrogate(uc);
+                        }
+                        continue;
+                    }
+                    if (malformedInputAction() != CodingErrorAction.REPLACE)
+                        return -1;
+                    da[dp++] = replacement().charAt(0);
+                    return dp;
+                } else {
+                    if (malformedInputAction() != CodingErrorAction.REPLACE)
+                        return -1;
+                    da[dp++] = replacement().charAt(0);
+                    sp--;
+                    bb = getByteBuffer(bb, sa, sp);
+                    CoderResult cr = malformedN(bb, 1);
+                    if (!cr.isError()) {
+                        // leading byte for 5 or 6-byte, but don't have enough
+                        // bytes in buffer to check. Consumed rest as malformed.
+                        return dp;
+                    }
+                    sp +=  cr.length();
+                }
+            }
+            return dp;
+        }
     }
 
-    private static class Encoder extends CharsetEncoder {
+    private static class Encoder extends CharsetEncoder
+                                 implements ArrayEncoder {
 
         private Encoder(Charset cs) {
             super(cs, 1.1f, 3.0f);
@@ -495,5 +620,50 @@
             else
                 return encodeBufferLoop(src, dst);
         }
+
+        // returns -1 if there is malformed char(s) and the
+        // "action" for malformed input is not REPLACE.
+        public int encode(char[] sa, int sp, int len, byte[] da) {
+            int sl = sp + len;
+            int dp = 0;
+            int dlASCII = dp + Math.min(len, da.length);
+
+            // ASCII only optimized loop
+            while (dp < dlASCII && sa[sp] < '\u0080')
+                da[dp++] = (byte) sa[sp++];
+
+            while (sp < sl) {
+                char c = sa[sp++];
+                if (c < 0x80) {
+                    // Have at most seven bits
+                    da[dp++] = (byte)c;
+                } else if (c < 0x800) {
+                    // 2 bytes, 11 bits
+                    da[dp++] = (byte)(0xc0 | (c >> 6));
+                    da[dp++] = (byte)(0x80 | (c & 0x3f));
+                } else if (Character.isSurrogate(c)) {
+                    if (sgp == null)
+                        sgp = new Surrogate.Parser();
+                    int uc = sgp.parse(c, sa, sp - 1, sl);
+                    if (uc < 0) {
+                        if (malformedInputAction() != CodingErrorAction.REPLACE)
+                            return -1;
+                        da[dp++] = replacement()[0];
+                    } else {
+                        da[dp++] = (byte)(0xf0 | ((uc >> 18)));
+                        da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+                        da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
+                        da[dp++] = (byte)(0x80 | (uc & 0x3f));
+                        sp++;  // 2 chars
+                    }
+                } else {
+                    // 3 bytes, 16 bits
+                    da[dp++] = (byte)(0xe0 | ((c >> 12)));
+                    da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
+                    da[dp++] = (byte)(0x80 | (c & 0x3f));
+                }
+            }
+            return dp;
+        }
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/StrCodingBenchmarkUTF8.java	Mon May 02 11:42:52 2011 -0700
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class StrCodingBenchmarkUTF8 {
+
+    public static void main(String[] args) throws Throwable {
+
+        final int itrs = Integer.getInteger("iterations", 100000);
+        final int size = 2048;
+        final int subsize    = Integer.getInteger("subsize", 128);
+        final Random rnd = new Random();
+        final int maxchar    = 0x7f;
+
+        Charset charset = Charset.forName("UTF-8");
+        final String csn = charset.name();
+        final Charset cs = charset;
+
+        int[] starts = new int[] { 0, 0x80, 0x800, 0x10000};
+        for (int nb = 1; nb <= 4; nb++) {
+
+            final CharsetEncoder enc = cs.newEncoder();
+
+            char[] cc = new char[size];
+            int i = 0;
+            while (i < size - 3) {
+                i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i);
+            }
+
+            final String string = new String(cc);
+            final byte[] bytes  = string.getBytes(cs);
+
+            System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb);
+            int sz = 12;
+            while (sz < size) {
+                System.out.printf("   [len=%d]%n", sz);
+                final byte[] bs  = Arrays.copyOf(bytes, sz);
+                final String str = new String(bs, csn);
+                StrCodingBenchmark.Job[] jobs = {
+                    new StrCodingBenchmark.Job("String decode: csn") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                            new String(bs, csn);
+                    }},
+
+                    new StrCodingBenchmark.Job("String decode: cs") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                            new String(bs, cs);
+                    }},
+
+                    new StrCodingBenchmark.Job("String encode: csn") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                                str.getBytes(csn);
+                    }},
+
+                    new StrCodingBenchmark.Job("String encode: cs") {
+                    public void work() throws Throwable {
+                         for (int i = 0; i < itrs; i++)
+                          str.getBytes(cs);
+                    }},
+                };
+                StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs));
+                sz <<= 1;
+            }
+        }
+    }
+}
--- a/jdk/test/sun/nio/cs/TestStringCoding.java	Mon May 02 10:14:27 2011 -0700
+++ b/jdk/test/sun/nio/cs/TestStringCoding.java	Mon May 02 11:42:52 2011 -0700
@@ -24,7 +24,7 @@
  */
 
 /* @test
-   @bug 6636323 6636319
+   @bug 6636323 6636319 7040220
    @summary Test if StringCoding and NIO result have the same de/encoding result
  * @run main/othervm/timeout=2000 TestStringCoding
  */
@@ -111,6 +111,8 @@
         //encode unmappable surrogates
         if (enc instanceof sun.nio.cs.ArrayEncoder &&
             cs.contains(Charset.forName("ASCII"))) {
+            if (cs.name().equals("UTF-8"))    // utf8 handles surrogates
+                return;
             enc.replaceWith(new byte[] { (byte)'A'});
             sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/TestStringCodingUTF8.java	Mon May 02 11:42:52 2011 -0700
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+   @bug 7040220
+   @summary Test if StringCoding and NIO result have the same de/encoding result for UTF-8
+ * @run main/othervm/timeout=2000 TestStringCodingUTF8
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class TestStringCodingUTF8 {
+    public static void main(String[] args) throws Throwable {
+        test();
+        // security manager on
+        System.setSecurityManager(new PermissiveSecurityManger());
+        test();
+    }
+
+    static void test() throws Throwable {
+        Charset cs = Charset.forName("UTF-8");
+        char[] bmp = new char[0x10000];
+        for (int i = 0; i < 0x10000; i++) {
+            bmp[i] = (char)i;
+        }
+        test(cs, bmp, 0, bmp.length);
+
+        ArrayList<Integer> list = new ArrayList<>(0x20000);
+        for (int i = 0; i < 0x20000; i++) {
+            list.add(i, i);
+        }
+        Collections.shuffle(list);
+        int j = 0;
+        char[] bmpsupp = new char[0x30000];
+        for (int i = 0; i < 0x20000; i++) {
+            j += Character.toChars(list.get(i), bmpsupp, j);
+        }
+        assert (j == bmpsupp.length);
+        test(cs, bmpsupp, 0, bmpsupp.length);
+
+        // randomed "off" and "len" on shuffled data
+        Random rnd = new Random();
+        int maxlen = 1000;
+        int itr = 5000;
+        for (int i = 0; i < itr; i++) {
+            int off = rnd.nextInt(bmpsupp.length - maxlen);
+            int len = rnd.nextInt(maxlen);
+            test(cs, bmpsupp, off, len);
+        }
+
+        // random length of bytes, test the edge corner case
+        for (int i = 0; i < itr; i++) {
+            byte[] ba = new byte[rnd.nextInt(maxlen)];
+            rnd.nextBytes(ba);
+            //new String(csn);
+            if (!new String(ba, cs.name()).equals(
+                 new String(decode(cs, ba, 0, ba.length))))
+                throw new RuntimeException("new String(csn) failed");
+            //new String(cs);
+            if (!new String(ba, cs).equals(
+                 new String(decode(cs, ba, 0, ba.length))))
+                throw new RuntimeException("new String(cs) failed");
+        }
+        System.out.println("done!");
+    }
+
+    static void test(Charset cs, char[] ca, int off, int len) throws Throwable {
+        String str = new String(ca, off, len);
+        byte[] ba = encode(cs, ca, off, len);
+
+        //getBytes(csn);
+        byte[] baStr = str.getBytes(cs.name());
+        if (!Arrays.equals(ba, baStr))
+            throw new RuntimeException("getBytes(csn) failed");
+
+        //getBytes(cs);
+        baStr = str.getBytes(cs);
+        if (!Arrays.equals(ba, baStr))
+            throw new RuntimeException("getBytes(cs) failed");
+
+        //new String(csn);
+        if (!new String(ba, cs.name()).equals(new String(decode(cs, ba, 0, ba.length))))
+            throw new RuntimeException("new String(csn) failed");
+
+        //new String(cs);
+        if (!new String(ba, cs).equals(new String(decode(cs, ba, 0, ba.length))))
+            throw new RuntimeException("new String(cs) failed");
+    }
+
+    // copy/paste of the StringCoding.decode()
+    static char[] decode(Charset cs, byte[] ba, int off, int len) {
+        CharsetDecoder cd = cs.newDecoder();
+        int en = (int)(len * cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (len == 0)
+            return ca;
+        cd.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+
+        ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+        CharBuffer cb = CharBuffer.wrap(ca);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return Arrays.copyOf(ca, cb.position());
+    }
+
+    // copy/paste of the StringCoding.encode()
+    static byte[] encode(Charset cs, char[] ca, int off, int len) {
+        CharsetEncoder ce = cs.newEncoder();
+        int en = (int)(len * ce.maxBytesPerChar());
+        byte[] ba = new byte[en];
+        if (len == 0)
+            return ba;
+        ce.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+        ByteBuffer bb = ByteBuffer.wrap(ba);
+        CharBuffer cb = CharBuffer.wrap(ca, off, len);
+        try {
+            CoderResult cr = ce.encode(cb, bb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = ce.flush(bb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return Arrays.copyOf(ba, bb.position());
+    }
+
+    static class PermissiveSecurityManger extends SecurityManager {
+        @Override public void checkPermission(java.security.Permission p) {}
+    }
+}
--- a/jdk/test/sun/nio/cs/TestUTF8.java	Mon May 02 10:14:27 2011 -0700
+++ b/jdk/test/sun/nio/cs/TestUTF8.java	Mon May 02 11:42:52 2011 -0700
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 4486841
+ * @bug 4486841 7040220
  * @summary Test UTF-8 charset
  */
 
@@ -70,6 +70,32 @@
         return dec.decode(bbf, cbf, true);
     }
 
+    // copy/paste of the StringCoding.decode()
+    static char[] decode(Charset cs, byte[] ba, int off, int len) {
+        CharsetDecoder cd = cs.newDecoder();
+        int en = (int)(len * cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (len == 0)
+            return ca;
+        cd.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+
+        ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+        CharBuffer cb = CharBuffer.wrap(ca);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return Arrays.copyOf(ca, cb.position());
+    }
+
     static byte[] encode(char[] cc, String csn, boolean testDirect)
         throws Exception {
         ByteBuffer bbf;
@@ -142,7 +168,14 @@
         bb = encode(cc, csn, true);
         ccO = decode(bb, csn, true);
         if (!Arrays.equals(cc, ccO)) {
-            System.out.printf("    (direct) failed");
+            System.out.print("    (direct) failed");
+        }
+        // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
+        if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
+            System.out.printf("    String.getBytes() failed");
+        }
+        if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+            System.out.printf("    String.toCharArray() failed");
         }
         System.out.println();
     }
@@ -168,6 +201,12 @@
         if (!Arrays.equals(cc, ccO)) {
             System.out.printf("    decoding(direct) failed%n");
         }
+        // new String(bb, csn).getBytes(csn) will not return
+        // the 6 bytes surrogates as in bb, so only test
+        // toCharArray() here.
+        if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+            System.out.printf("    String.toCharArray() failed");
+        }
     }
 
     static void compare(String csn1, String csn2) throws Exception {
@@ -274,6 +313,7 @@
     static void checkMalformed(String csn) throws Exception {
         boolean failed = false;
         System.out.printf("    Check malformed <%s>...%n", csn);
+        Charset cs = Charset.forName(csn);
         for (boolean direct: new boolean[] {false, true}) {
             for (byte[] bins : malformed) {
                 int mlen = bins[0];
@@ -285,10 +325,15 @@
                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
                 }
                 if (!cr.isMalformed()) {
-                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
+                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
                     failed = true;
                 } else if (cr.length() != mlen) {
-                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
+                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
+                    failed = true;
+                }
+                if (!Arrays.equals(decode(cs, bin, 0, bin.length),
+                                   new String(bin, csn).toCharArray())) {
+                    System.out.printf("        FAIL(new String(bb, %s)) failed%n", csn);
                     failed = true;
                 }
             }