6636323: Optimize handling of builtin charsets
authorsherman
Mon, 23 Mar 2009 09:19:23 -0700
changeset 2294 4259115772f7
parent 2292 7f173614953a
child 2295 61e8b790642b
6636323: Optimize handling of builtin charsets 6636319: Encoders should implement isLegalReplacement(byte[] repl) Summary: optimized new String(byte[], cs/csn) and String.getBytes(cs/csn) for speed and memory consumption in singlebyte case. Reviewed-by: alanb
jdk/make/java/nio/FILES_java.gmk
jdk/src/share/classes/java/lang/StringCoding.java
jdk/src/share/classes/sun/nio/cs/ArrayDecoder.java
jdk/src/share/classes/sun/nio/cs/ArrayEncoder.java
jdk/src/share/classes/sun/nio/cs/ISO_8859_1.java
jdk/src/share/classes/sun/nio/cs/SingleByte.java
jdk/src/share/classes/sun/nio/cs/US_ASCII.java
jdk/test/sun/nio/cs/FindEncoderBugs.java
jdk/test/sun/nio/cs/StrCodingBenchmark.java
jdk/test/sun/nio/cs/TestStringCoding.java
--- a/jdk/make/java/nio/FILES_java.gmk	Sat Mar 21 13:52:13 2009 -0700
+++ b/jdk/make/java/nio/FILES_java.gmk	Mon Mar 23 09:19:23 2009 -0700
@@ -220,6 +220,8 @@
 	sun/nio/ch/Util.java \
 	\
 	sun/nio/cs/AbstractCharsetProvider.java \
+	sun/nio/cs/ArrayDecoder.java \
+	sun/nio/cs/ArrayEncoder.java \
 	sun/nio/cs/FastCharsetProvider.java \
 	sun/nio/cs/HistoricallyNamedCharset.java \
 	sun/nio/cs/ISO_8859_1.java \
--- a/jdk/src/share/classes/java/lang/StringCoding.java	Sat Mar 21 13:52:13 2009 -0700
+++ b/jdk/src/share/classes/java/lang/StringCoding.java	Mon Mar 23 09:19:23 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2000-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,13 +25,10 @@
 
 package java.lang;
 
-import java.io.CharConversionException;
 import java.io.UnsupportedEncodingException;
 import java.lang.ref.SoftReference;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
-import java.nio.BufferOverflowException;
-import java.nio.BufferUnderflowException;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
@@ -39,11 +36,12 @@
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.MalformedInputException;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.Arrays;
 import sun.misc.MessageUtils;
 import sun.nio.cs.HistoricallyNamedCharset;
+import sun.nio.cs.ArrayDecoder;
+import sun.nio.cs.ArrayEncoder;
 
 /**
  * Utility class for string encoding and decoding.
@@ -74,10 +72,8 @@
 
     // Trim the given byte array to the given length
     //
-    private static byte[] safeTrim(byte[] ba, int len, Charset cs) {
-        if (len == ba.length
-            && (System.getSecurityManager() == null
-                || cs.getClass().getClassLoader0() == null))
+    private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
+        if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
             return ba;
         else
             return Arrays.copyOf(ba, len);
@@ -85,10 +81,9 @@
 
     // Trim the given char array to the given length
     //
-    private static char[] safeTrim(char[] ca, int len, Charset cs) {
-        if (len == ca.length
-            && (System.getSecurityManager() == null
-                || cs.getClass().getClassLoader0() == null))
+    private static char[] safeTrim(char[] ca, int len,
+                                   Charset cs, boolean isTrusted) {
+        if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
             return ca;
         else
             return Arrays.copyOf(ca, len);
@@ -128,6 +123,7 @@
         private final String requestedCharsetName;
         private final Charset cs;
         private final CharsetDecoder cd;
+        private final boolean isTrusted;
 
         private StringDecoder(Charset cs, String rcn) {
             this.requestedCharsetName = rcn;
@@ -135,6 +131,7 @@
             this.cd = cs.newDecoder()
                 .onMalformedInput(CodingErrorAction.REPLACE)
                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
+            this.isTrusted = (cs.getClass().getClassLoader0() == null);
         }
 
         String charsetName() {
@@ -152,24 +149,28 @@
             char[] ca = new char[en];
             if (len == 0)
                 return ca;
-            cd.reset();
-            ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
-            CharBuffer cb = CharBuffer.wrap(ca);
-            try {
-                CoderResult cr = cd.decode(bb, cb, true);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-                cr = cd.flush(cb);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-            } catch (CharacterCodingException x) {
-                // Substitution is always enabled,
-                // so this shouldn't happen
-                throw new Error(x);
+            if (cd instanceof ArrayDecoder) {
+                int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
+                return safeTrim(ca, clen, cs, isTrusted);
+            } else {
+                cd.reset();
+                ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+                CharBuffer cb = CharBuffer.wrap(ca);
+                try {
+                    CoderResult cr = cd.decode(bb, cb, true);
+                    if (!cr.isUnderflow())
+                        cr.throwException();
+                    cr = cd.flush(cb);
+                    if (!cr.isUnderflow())
+                        cr.throwException();
+                } catch (CharacterCodingException x) {
+                    // Substitution is always enabled,
+                    // so this shouldn't happen
+                    throw new Error(x);
+                }
+                return safeTrim(ca, cb.position(), cs, isTrusted);
             }
-            return safeTrim(ca, cb.position(), cs);
         }
-
     }
 
     static char[] decode(String charsetName, byte[] ba, int off, int len)
@@ -193,8 +194,57 @@
     }
 
     static char[] decode(Charset cs, byte[] ba, int off, int len) {
-        StringDecoder sd = new StringDecoder(cs, cs.name());
-        return sd.decode(Arrays.copyOfRange(ba, off, off + len), 0, len);
+        // (1)We never cache the "external" cs, the only benefit of creating
+        // an additional StringDe/Encoder object to wrap it is to share the
+        // de/encode() method. These SD/E objects are short-lifed, the young-gen
+        // gc should be able to take care of them well. But the best approash
+        // is still not to generate them if not really necessary.
+        // (2)The defensive copy of the input byte/char[] has a big performance
+        // impact, as well as the outgoing result byte/char[]. Need to do the
+        // optimization check of (sm==null && classLoader0==null) for both.
+        // (3)getClass().getClassLoader0() is expensive
+        // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
+        // is only chcked (and then isTrusted gets set) when (SM==null). It is
+        // possible that the SM==null for now but then SM is NOT null later
+        // when safeTrim() is invoked...the "safe" way to do is to redundant
+        // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
+        // but it then can be argued that the SM is null when the opertaion
+        // is started...
+        CharsetDecoder cd = cs.newDecoder();
+        int en = scale(len, cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (len == 0)
+            return ca;
+        boolean isTrusted = false;
+        if (System.getSecurityManager() != null) {
+            if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
+                ba =  Arrays.copyOfRange(ba, off, off + len);
+                off = 0;
+            }
+        }
+        if (cd instanceof ArrayDecoder) {
+            int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
+            return safeTrim(ca, clen, cs, isTrusted);
+        } else {
+            cd.onMalformedInput(CodingErrorAction.REPLACE)
+              .onUnmappableCharacter(CodingErrorAction.REPLACE)
+              .reset();
+            ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+            CharBuffer cb = CharBuffer.wrap(ca);
+            try {
+                CoderResult cr = cd.decode(bb, cb, true);
+                if (!cr.isUnderflow())
+                    cr.throwException();
+                cr = cd.flush(cb);
+                if (!cr.isUnderflow())
+                    cr.throwException();
+            } catch (CharacterCodingException x) {
+                // Substitution is always enabled,
+                // so this shouldn't happen
+                throw new Error(x);
+            }
+            return safeTrim(ca, cb.position(), cs, isTrusted);
+        }
     }
 
     static char[] decode(byte[] ba, int off, int len) {
@@ -218,14 +268,12 @@
         }
     }
 
-
-
-
     // -- Encoding --
     private static class StringEncoder {
         private Charset cs;
         private CharsetEncoder ce;
         private final String requestedCharsetName;
+        private final boolean isTrusted;
 
         private StringEncoder(Charset cs, String rcn) {
             this.requestedCharsetName = rcn;
@@ -233,6 +281,7 @@
             this.ce = cs.newEncoder()
                 .onMalformedInput(CodingErrorAction.REPLACE)
                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
+            this.isTrusted = (cs.getClass().getClassLoader0() == null);
         }
 
         String charsetName() {
@@ -250,23 +299,27 @@
             byte[] ba = new byte[en];
             if (len == 0)
                 return ba;
-
-            ce.reset();
-            ByteBuffer bb = ByteBuffer.wrap(ba);
-            CharBuffer cb = CharBuffer.wrap(ca, off, len);
-            try {
-                CoderResult cr = ce.encode(cb, bb, true);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-                cr = ce.flush(bb);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-            } catch (CharacterCodingException x) {
-                // Substitution is always enabled,
-                // so this shouldn't happen
-                throw new Error(x);
+            if (ce instanceof ArrayEncoder) {
+                int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
+                return safeTrim(ba, blen, cs, isTrusted);
+            } else {
+                ce.reset();
+                ByteBuffer bb = ByteBuffer.wrap(ba);
+                CharBuffer cb = CharBuffer.wrap(ca, off, len);
+                try {
+                    CoderResult cr = ce.encode(cb, bb, true);
+                    if (!cr.isUnderflow())
+                        cr.throwException();
+                    cr = ce.flush(bb);
+                    if (!cr.isUnderflow())
+                        cr.throwException();
+                } catch (CharacterCodingException x) {
+                    // Substitution is always enabled,
+                    // so this shouldn't happen
+                    throw new Error(x);
+                }
+                return safeTrim(ba, bb.position(), cs, isTrusted);
             }
-            return safeTrim(ba, bb.position(), cs);
         }
     }
 
@@ -291,8 +344,39 @@
     }
 
     static byte[] encode(Charset cs, char[] ca, int off, int len) {
-        StringEncoder se = new StringEncoder(cs, cs.name());
-        return se.encode(Arrays.copyOfRange(ca, off, off + len), 0, len);
+        CharsetEncoder ce = cs.newEncoder();
+        int en = scale(len, ce.maxBytesPerChar());
+        byte[] ba = new byte[en];
+        if (len == 0)
+            return ba;
+        boolean isTrusted = false;
+        if (System.getSecurityManager() != null) {
+            if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
+                ca =  Arrays.copyOfRange(ca, off, off + len);
+                off = 0;
+            }
+        }
+        if (ce instanceof ArrayEncoder) {
+            int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
+            return safeTrim(ba, blen, cs, isTrusted);
+        } else {
+            ce.onMalformedInput(CodingErrorAction.REPLACE)
+              .onUnmappableCharacter(CodingErrorAction.REPLACE)
+              .reset();
+            ByteBuffer bb = ByteBuffer.wrap(ba);
+            CharBuffer cb = CharBuffer.wrap(ca, off, len);
+            try {
+                CoderResult cr = ce.encode(cb, bb, true);
+                if (!cr.isUnderflow())
+                    cr.throwException();
+                cr = ce.flush(bb);
+                if (!cr.isUnderflow())
+                    cr.throwException();
+            } catch (CharacterCodingException x) {
+                throw new Error(x);
+            }
+            return safeTrim(ba, bb.position(), cs, isTrusted);
+        }
     }
 
     static byte[] encode(char[] ca, int off, int len) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/nio/cs/ArrayDecoder.java	Mon Mar 23 09:19:23 2009 -0700
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.nio.cs;
+
+/*
+ * FastPath byte[]->char[] decoder, REPLACE on malformed or
+ * unmappable input.
+ */
+
+public interface ArrayDecoder {
+    int decode(byte[] src, int off, int len, char[] dst);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/nio/cs/ArrayEncoder.java	Mon Mar 23 09:19:23 2009 -0700
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.nio.cs;
+
+/*
+ * FastPath char[]->byte[] encoder, REPLACE on malformed input or
+ * unmappable input.
+ */
+
+public interface ArrayEncoder {
+    int encode(char[] src, int off, int len, byte[] dst);
+}
--- a/jdk/src/share/classes/sun/nio/cs/ISO_8859_1.java	Sat Mar 21 13:52:13 2009 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/ISO_8859_1.java	Mon Mar 23 09:19:23 2009 -0700
@@ -23,9 +23,6 @@
  * have any questions.
  */
 
-/*
- */
-
 package sun.nio.cs;
 
 import java.nio.ByteBuffer;
@@ -34,10 +31,7 @@
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.MalformedInputException;
-import java.nio.charset.UnmappableCharacterException;
-
+import java.util.Arrays;
 
 class ISO_8859_1
     extends Charset
@@ -65,8 +59,8 @@
         return new Encoder(this);
     }
 
-    private static class Decoder extends CharsetDecoder {
-
+    private static class Decoder extends CharsetDecoder
+                                 implements ArrayDecoder {
         private Decoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
         }
@@ -127,10 +121,18 @@
                 return decodeBufferLoop(src, dst);
         }
 
+        public int decode(byte[] src, int sp, int len, char[] dst) {
+            if (len > dst.length)
+                len = dst.length;
+            int dp = 0;
+            while (dp < len)
+                dst[dp++] = (char)(src[sp++] & 0xff);
+            return dp;
+        }
     }
 
-    private static class Encoder extends CharsetEncoder {
-
+    private static class Encoder extends CharsetEncoder
+                                 implements ArrayEncoder {
         private Encoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
         }
@@ -139,6 +141,10 @@
             return c <= '\u00FF';
         }
 
+        public boolean isLegalReplacement(byte[] repl) {
+            return (repl.length == 1);  // we accept any byte value
+        }
+
         private final Surrogate.Parser sgp = new Surrogate.Parser();
 
         private CoderResult encodeArrayLoop(CharBuffer src,
@@ -208,5 +214,31 @@
                 return encodeBufferLoop(src, dst);
         }
 
+        private byte repl = (byte)'?';
+        protected void implReplaceWith(byte[] newReplacement) {
+            repl = newReplacement[0];
+        }
+
+        public int encode(char[] src, int sp, int len, byte[] dst) {
+            int dp = 0;
+            int sl = sp + Math.min(len, dst.length);
+            while (sp < sl) {
+                char c = src[sp++];
+                if (c <= '\u00FF') {
+                    dst[dp++] = (byte)c;
+                    continue;
+                }
+                if (Surrogate.isHigh(c) && sp < sl &&
+                    Surrogate.isLow(src[sp])) {
+                    if (len > dst.length) {
+                        sl++;
+                        len--;
+                    }
+                    sp++;
+                }
+                dst[dp++] = repl;
+            }
+            return dp;
+        }
     }
 }
--- a/jdk/src/share/classes/sun/nio/cs/SingleByte.java	Sat Mar 21 13:52:13 2009 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/SingleByte.java	Mon Mar 23 09:19:23 2009 -0700
@@ -32,6 +32,7 @@
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
+import java.util.Arrays;
 import static sun.nio.cs.CharsetMapping.*;
 
 public class SingleByte
@@ -45,7 +46,8 @@
         return cr;
     }
 
-    public static class Decoder extends CharsetDecoder {
+    final public static class Decoder extends CharsetDecoder
+                                      implements ArrayDecoder {
         private final char[] b2c;
 
         public Decoder(Charset cs, char[] b2c) {
@@ -108,9 +110,29 @@
         private final char decode(int b) {
             return b2c[b + 128];
         }
+
+        private char repl = '\uFFFD';
+        protected void implReplaceWith(String newReplacement) {
+            repl = newReplacement.charAt(0);
+        }
+
+        public int decode(byte[] src, int sp, int len, char[] dst) {
+            if (len > dst.length)
+                len = dst.length;
+            int dp = 0;
+            while (dp < len) {
+                dst[dp] = decode(src[sp++]);
+                if (dst[dp] == UNMAPPABLE_DECODING) {
+                    dst[dp] = repl;
+                }
+                dp++;
+            }
+            return dp;
+        }
     }
 
-    public static class Encoder extends CharsetEncoder {
+    final public static class Encoder extends CharsetEncoder
+                                      implements ArrayEncoder {
         private Surrogate.Parser sgp;
         private final char[] c2b;
         private final char[] c2bIndex;
@@ -125,6 +147,11 @@
             return encode(c) != UNMAPPABLE_ENCODING;
         }
 
+        public boolean isLegalReplacement(byte[] repl) {
+            return ((repl.length == 1 && repl[0] == (byte)'?') ||
+                    super.isLegalReplacement(repl));
+        }
+
         private CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
             char[] sa = src.array();
             int sp = src.arrayOffset() + src.position();
@@ -200,6 +227,34 @@
                 return UNMAPPABLE_ENCODING;
             return c2b[index + (ch & 0xff)];
         }
+
+        private byte repl = (byte)'?';
+        protected void implReplaceWith(byte[] newReplacement) {
+            repl = newReplacement[0];
+        }
+
+        public int encode(char[] src, int sp, int len, byte[] dst) {
+            int dp = 0;
+            int sl = sp + Math.min(len, dst.length);
+            while (sp < sl) {
+                char c = src[sp++];
+                int b = encode(c);
+                if (b != UNMAPPABLE_ENCODING) {
+                    dst[dp++] = (byte)b;
+                    continue;
+                }
+                if (Surrogate.isHigh(c) && sp < sl &&
+                    Surrogate.isLow(src[sp])) {
+                    if (len > dst.length) {
+                        sl++;
+                        len--;
+                    }
+                    sp++;
+                }
+                dst[dp++] = repl;
+            }
+            return dp;
+        }
     }
 
     // init the c2b and c2bIndex tables from b2c.
--- a/jdk/src/share/classes/sun/nio/cs/US_ASCII.java	Sat Mar 21 13:52:13 2009 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/US_ASCII.java	Mon Mar 23 09:19:23 2009 -0700
@@ -31,10 +31,7 @@
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.MalformedInputException;
-import java.nio.charset.UnmappableCharacterException;
-
+import java.util.Arrays;
 
 public class US_ASCII
     extends Charset
@@ -61,7 +58,8 @@
         return new Encoder(this);
     }
 
-    private static class Decoder extends CharsetDecoder {
+    private static class Decoder extends CharsetDecoder
+                                 implements ArrayDecoder {
 
         private Decoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
@@ -131,9 +129,27 @@
                 return decodeBufferLoop(src, dst);
         }
 
+        private char repl = '\uFFFD';
+        protected void implReplaceWith(String newReplacement) {
+            repl = newReplacement.charAt(0);
+        }
+
+        public int decode(byte[] src, int sp, int len, char[] dst) {
+            int dp = 0;
+            len = Math.min(len, dst.length);
+            while (dp < len) {
+                byte b = src[sp++];
+                if (b >= 0)
+                    dst[dp++] = (char)b;
+                else
+                    dst[dp++] = repl;
+            }
+            return dp;
+        }
     }
 
-    private static class Encoder extends CharsetEncoder {
+    private static class Encoder extends CharsetEncoder
+                                 implements ArrayEncoder {
 
         private Encoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
@@ -143,8 +159,11 @@
             return c < 0x80;
         }
 
+        public boolean isLegalReplacement(byte[] repl) {
+            return (repl.length == 1 && repl[0] >= 0);
+        }
+
         private final Surrogate.Parser sgp = new Surrogate.Parser();
-
         private CoderResult encodeArrayLoop(CharBuffer src,
                                             ByteBuffer dst)
         {
@@ -213,6 +232,32 @@
                 return encodeBufferLoop(src, dst);
         }
 
+        private byte repl = (byte)'?';
+        protected void implReplaceWith(byte[] newReplacement) {
+            repl = newReplacement[0];
+        }
+
+        public int encode(char[] src, int sp, int len, byte[] dst) {
+            int dp = 0;
+            int sl = sp + Math.min(len, dst.length);
+            while (sp < sl) {
+                char c = src[sp++];
+                if (c < 0x80) {
+                    dst[dp++] = (byte)c;
+                    continue;
+                }
+                if (Surrogate.isHigh(c) && sp < sl &&
+                    Surrogate.isLow(src[sp])) {
+                    if (len > dst.length) {
+                        sl++;
+                        len--;
+                    }
+                    sp++;
+                }
+                dst[dp++] = repl;
+            }
+            return dp;
+        }
     }
 
 }
--- a/jdk/test/sun/nio/cs/FindEncoderBugs.java	Sat Mar 21 13:52:13 2009 -0700
+++ b/jdk/test/sun/nio/cs/FindEncoderBugs.java	Mon Mar 23 09:19:23 2009 -0700
@@ -526,4 +526,3 @@
         System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed);
         if (failed > 0) throw new AssertionError("Some tests failed");}
 }
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/StrCodingBenchmark.java	Mon Mar 23 09:19:23 2009 -0700
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+import java.util.concurrent.*;
+import java.util.regex.Pattern;
+
+/**
+ * Usage: java StringCodingBenchmark
+ * [-Diterations=N] [-Dsize=N] [-Dsubsize=N] [-Dmaxchar=N]
+ * [-Dfilter=REGEXP] [-DSecurityManager=true]
+ */
+public class StrCodingBenchmark {
+    abstract static class Job {
+        private final String name;
+        public Job(String name) { this.name = name; }
+        public String name() { return name; }
+        public abstract void work() throws Throwable;
+    }
+
+    private static void collectAllGarbage() {
+        final java.util.concurrent.CountDownLatch drained
+            = new java.util.concurrent.CountDownLatch(1);
+        try {
+            System.gc();        // enqueue finalizable objects
+            new Object() { protected void finalize() {
+                drained.countDown(); }};
+            System.gc();        // enqueue detector
+            drained.await();    // wait for finalizer queue to drain
+            System.gc();        // cleanup finalized objects
+        } catch (InterruptedException e) { throw new Error(e); }
+    }
+
+    /**
+     * Runs each job for long enough that all the runtime compilers
+     * have had plenty of time to warm up, i.e. get around to
+     * compiling everything worth compiling.
+     * Returns array of average times per job per run.
+     */
+    public static long[] time0(Job ... jobs) throws Throwable {
+        //final long warmupNanos = 10L * 1000L * 1000L * 1000L;
+        final long warmupNanos = 100L * 100L;
+        long[] nanoss = new long[jobs.length];
+        for (int i = 0; i < jobs.length; i++) {
+            collectAllGarbage();
+            long t0 = System.nanoTime();
+            long t;
+            int j = 0;
+            do { jobs[i].work(); j++; }
+            while ((t = System.nanoTime() - t0) < warmupNanos);
+            nanoss[i] = t/j;
+        }
+        return nanoss;
+    }
+
+    public static void time(Job ... jobs) throws Throwable {
+
+        long[] warmup = time0(jobs); // Warm up run
+        long[] nanoss = time0(jobs); // Real timing run
+        long[] milliss = new long[jobs.length];
+        double[] ratios = new double[jobs.length];
+
+        final String nameHeader   = "Method";
+        final String millisHeader = "Millis";
+        final String ratioHeader  = "Ratio";
+
+        int nameWidth   = nameHeader.length();
+        int millisWidth = millisHeader.length();
+        int ratioWidth  = ratioHeader.length();
+
+        for (int i = 0; i < jobs.length; i++) {
+            nameWidth = Math.max(nameWidth, jobs[i].name().length());
+
+            milliss[i] = nanoss[i]/(1000L * 1000L);
+            millisWidth = Math.max(millisWidth,
+                                   String.format("%d", milliss[i]).length());
+
+            ratios[i] = (double) nanoss[i] / (double) nanoss[0];
+            ratioWidth = Math.max(ratioWidth,
+                                  String.format("%.3f", ratios[i]).length());
+        }
+        String format = String.format("%%-%ds %%%dd %n",
+                                      nameWidth, millisWidth);
+        String headerFormat = String.format("%%-%ds %%%ds%n",
+                                            nameWidth, millisWidth);
+        System.out.printf(headerFormat, "Method", "Millis");
+
+        // Print out absolute and relative times, calibrated against first job
+        for (int i = 0; i < jobs.length; i++)
+            System.out.printf(format, jobs[i].name(), milliss[i], ratios[i]);
+    }
+
+    public static Job[] filter(Pattern filter, Job[] jobs) {
+        if (filter == null) return jobs;
+        Job[] newJobs = new Job[jobs.length];
+        int n = 0;
+        for (Job job : jobs)
+            if (filter.matcher(job.name()).find())
+                newJobs[n++] = job;
+        // Arrays.copyOf not available in JDK 5
+        Job[] ret = new Job[n];
+        System.arraycopy(newJobs, 0, ret, 0, n);
+        return ret;
+    }
+
+    static class PermissiveSecurityManger extends SecurityManager {
+        @Override public void checkPermission(java.security.Permission p) {
+        }
+    }
+
+    public static void main(String[] args) throws Throwable {
+        final int itrs = Integer.getInteger("iterations", 100000);
+        final int size       = Integer.getInteger("size", 2048);
+        final int subsize    = Integer.getInteger("subsize", 128);
+        final int maxchar    = Integer.getInteger("maxchar", 128);
+        final String regex = System.getProperty("filter");
+        final Pattern filter = (regex == null) ? null : Pattern.compile(regex);
+        final boolean useSecurityManager = Boolean.getBoolean("SecurityManager");
+        if (useSecurityManager)
+            System.setSecurityManager(new PermissiveSecurityManger());
+        final Random rnd = new Random();
+
+        for (Charset charset:  Charset.availableCharsets().values()) {
+            if (!("ISO-8859-1".equals(charset.name()) ||
+                  "US-ASCII".equals(charset.name()) ||
+                  charset.newDecoder() instanceof sun.nio.cs.SingleByte.Decoder))
+                continue;
+            final String csn = charset.name();
+            final Charset cs = charset;
+            final StringBuilder sb = new StringBuilder();
+            {
+                final CharsetEncoder enc = cs.newEncoder();
+                for (int i = 0; i < size; ) {
+                    char c = (char) rnd.nextInt(maxchar);
+                    if (enc.canEncode(c)) {
+                        sb.append(c);
+                        i++;
+                    }
+                }
+            }
+            final String string = sb.toString();
+            final byte[] bytes  = string.getBytes(cs);
+
+            System.out.printf("%n--------%s---------%n", csn);
+            for (int sz = 4; sz <= 2048; sz *= 2) {
+                System.out.printf("   [len=%d]%n", sz);
+                final byte[] bs  = Arrays.copyOf(bytes, sz);
+                final String str = new String(bs, csn);
+                Job[] jobs = {
+                    new Job("String decode: csn") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                            new String(bs, csn);
+                    }},
+
+                    new Job("String decode: cs") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                            new String(bs, cs);
+                    }},
+
+                    new Job("String encode: csn") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                                str.getBytes(csn);
+                    }},
+
+                    new Job("String encode: cs") {
+                    public void work() throws Throwable {
+                         for (int i = 0; i < itrs; i++)
+                          str.getBytes(cs);
+                    }},
+                };
+                time(filter(filter, jobs));
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/TestStringCoding.java	Mon Mar 23 09:19:23 2009 -0700
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2000-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/* @test
+   @bug 6636323 6636319
+   @summary Test if StringCoding and NIO result have the same de/encoding result
+ * @run main/timeout=2000 TestStringCoding
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class TestStringCoding {
+    public static void main(String[] args) throws Throwable {
+
+        for (Boolean hasSM: new boolean[] { false, true }) {
+            if (hasSM)
+                System.setSecurityManager(new PermissiveSecurityManger());
+            for (Charset cs:  Charset.availableCharsets().values()) {
+                if ("ISO-2022-CN".equals(cs.name()) ||
+                    "x-COMPOUND_TEXT".equals(cs.name()) ||
+                    "x-JISAutoDetect".equals(cs.name()))
+                    continue;
+                System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM);
+                // full bmp first
+                char[] bmpCA = new char[0x10000];
+                for (int i = 0; i < 0x10000; i++) {
+                     bmpCA[i] = (char)i;
+                }
+                byte[] sbBA = new byte[0x100];
+                for (int i = 0; i < 0x100; i++) {
+                    sbBA[i] = (byte)i;
+                }
+                test(cs, bmpCA, sbBA);
+                // "randomed" sizes
+                Random rnd = new Random();
+                for (int i = 0; i < 10; i++) {
+                    int clen = rnd.nextInt(0x10000);
+                    int blen = rnd.nextInt(0x100);
+                    //System.out.printf("    blen=%d, clen=%d%n", blen, clen);
+                    test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen));
+                    //add a pair of surrogates
+                    int pos = clen / 2;
+                    if ((pos + 1) < blen) {
+                        bmpCA[pos] = '\uD800';
+                        bmpCA[pos+1] = '\uDC00';
+                    }
+                    test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen));
+                }
+                System.out.println("done!");
+            }
+        }
+    }
+
+    static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
+        String bmpStr = new String(bmpCA);
+        CharsetDecoder dec = cs.newDecoder()
+            .onMalformedInput(CodingErrorAction.REPLACE)
+            .onUnmappableCharacter(CodingErrorAction.REPLACE);
+        CharsetEncoder enc = cs.newEncoder()
+            .onMalformedInput(CodingErrorAction.REPLACE)
+            .onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+        //getBytes(csn);
+        byte[] baSC = bmpStr.getBytes(cs.name());
+        ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
+        byte[] baNIO = new byte[bf.limit()];
+        bf.get(baNIO, 0, baNIO.length);
+        if (!Arrays.equals(baSC, baNIO))
+            throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
+
+        //getBytes(cs);
+        baSC = bmpStr.getBytes(cs);
+        if (!Arrays.equals(baSC, baNIO))
+            throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());
+
+        //new String(csn);
+        String strSC = new String(sbBA, cs.name());
+        String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();
+        if(!strNIO.equals(strSC))
+            throw new RuntimeException("new String(csn) failed  -> " + cs.name());
+
+        //new String(cs);
+        strSC = new String(sbBA, cs);
+        if (!strNIO.equals(strSC))
+            throw new RuntimeException("new String(cs) failed  -> " + cs.name());
+
+        //encode unmappable surrogates
+        if (enc instanceof sun.nio.cs.ArrayEncoder &&
+            cs.contains(Charset.forName("ASCII"))) {
+            enc.replaceWith(new byte[] { (byte)'A'});
+            sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
+
+            String str = "ab\uD800\uDC00\uD800\uDC00cd";
+            byte[] ba = new byte[str.length() - 2];
+            int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
+            if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
+                throw new RuntimeException("encode1(surrogates) failed  -> "
+                                           + cs.name());
+
+            ba = new byte[str.length()];
+            n = cae.encode(str.toCharArray(), 0, str.length(), ba);
+            if (n != 6 || !"abAAcd".equals(new String(ba, 0, n,
+                                                     cs.name())))
+                throw new RuntimeException("encode2(surrogates) failed  -> "
+                                           + cs.name());
+            str = "ab\uD800B\uDC00Bcd";
+            ba = new byte[str.length()];
+            n = cae.encode(str.toCharArray(), 0, str.length(), ba);
+            if (n != 8 || !"abABABcd".equals(new String(ba, 0, n,
+                                                       cs.name())))
+                throw new RuntimeException("encode3(surrogates) failed  -> "
+                                           + cs.name());
+
+            ba = new byte[str.length() - 1];
+            n = cae.encode(str.toCharArray(), 0, str.length(), ba);
+            if (n != 7 || !"abABABc".equals(new String(ba, 0, n,
+                                                      cs.name())))
+                throw new RuntimeException("encode4(surrogates) failed  -> "
+                                           + cs.name());
+        }
+
+    }
+
+    static class PermissiveSecurityManger extends SecurityManager {
+        @Override public void checkPermission(java.security.Permission p) {}
+    }
+}