jdk-sandbox: changeset 48262:daf3b49f4839

--- a/src/java.base/share/classes/java/lang/String.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/java/lang/String.java	Wed Dec 13 07:51:57 2017 -0800
@@ -3046,6 +3046,10 @@
         return COMPACT_STRINGS ? coder : UTF16;
     }
 
+    byte[] value() {
+        return value;
+    }
+
     private boolean isLatin1() {
         return COMPACT_STRINGS && coder == LATIN1;
     }

--- a/src/java.base/share/classes/java/lang/StringCoding.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/java/lang/StringCoding.java	Wed Dec 13 07:51:57 2017 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,6 +47,11 @@
 import static java.lang.String.LATIN1;
 import static java.lang.String.UTF16;
 import static java.lang.String.COMPACT_STRINGS;
+import static java.lang.Character.isSurrogate;
+import static java.lang.Character.highSurrogate;
+import static java.lang.Character.lowSurrogate;
+import static java.lang.Character.isSupplementaryCodePoint;
+import static java.lang.StringUTF16.putChar;
 
 /**
  * Utility class for string encoding and decoding.
@@ -66,8 +71,6 @@
     private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
     private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
 
-    private static boolean warnUnsupportedCharset = true;
-
     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
         SoftReference<T> sr = tl.get();
         if (sr == null)
@@ -80,7 +83,6 @@
     }
 
     // Trim the given byte array to the given length
-    //
     private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
             return ba;
@@ -105,17 +107,6 @@
         return null;
     }
 
-    private static void warnUnsupportedCharset(String csn) {
-        if (warnUnsupportedCharset) {
-            // Use err(String) rather than the Logging API or System.err
-            // since this method may be called during VM initialization
-            // before either is available.
-            err("WARNING: Default charset " + csn +
-                " not supported, using ISO-8859-1 instead\n");
-            warnUnsupportedCharset = false;
-        }
-    }
-
     static class Result {
         byte[] value;
         byte coder;
@@ -224,19 +215,6 @@
         }
     }
 
-    private static class StringDecoder8859_1 extends StringDecoder {
-        StringDecoder8859_1(Charset cs, String rcn) {
-            super(cs, rcn);
-        }
-        Result decode(byte[] ba, int off, int len) {
-            if (COMPACT_STRINGS) {
-                return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
-            } else {
-                return result.with(StringLatin1.inflate(ba, off, len), UTF16);
-            }
-        }
-    }
-
     static Result decode(String charsetName, byte[] ba, int off, int len)
         throws UnsupportedEncodingException
     {
@@ -249,12 +227,15 @@
                 Charset cs = lookupCharset(csn);
                 if (cs != null) {
                     if (cs == UTF_8) {
-                        sd = new StringDecoderUTF8(cs, csn);
-                    } else if (cs == ISO_8859_1) {
-                        sd = new StringDecoder8859_1(cs, csn);
-                    } else {
-                        sd = new StringDecoder(cs, csn);
+                        return decodeUTF8(ba, off, len, true);
+                    }
+                    if (cs == ISO_8859_1) {
+                        return decodeLatin1(ba, off, len);
                     }
+                    if (cs == US_ASCII) {
+                        return decodeASCII(ba, off, len);
+                    }
+                    sd = new StringDecoder(cs, csn);
                 }
             } catch (IllegalCharsetNameException x) {}
             if (sd == null)
@@ -265,6 +246,16 @@
     }
 
     static Result decode(Charset cs, byte[] ba, int off, int len) {
+        if (cs == UTF_8) {
+            return decodeUTF8(ba, off, len, true);
+        }
+        if (cs == ISO_8859_1) {
+            return decodeLatin1(ba, off, len);
+        }
+        if (cs == US_ASCII) {
+            return decodeASCII(ba, off, len);
+        }
+
         // (1)We never cache the "external" cs, the only benefit of creating
         // an additional StringDe/Encoder object to wrap it is to share the
         // de/encode() method. These SD/E objects are short-lived, the young-gen
@@ -280,39 +271,29 @@
         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
         // but it then can be argued that the SM is null when the operation
         // is started...
-        if (cs == UTF_8) {
-            return StringDecoderUTF8.decode(ba, off, len, new Result());
-        }
         CharsetDecoder cd = cs.newDecoder();
         // ascii fastpath
-        if (cs == ISO_8859_1 || ((cd instanceof ArrayDecoder) &&
-                                 ((ArrayDecoder)cd).isASCIICompatible() &&
-                                 !hasNegatives(ba, off, len))) {
-             if (COMPACT_STRINGS) {
-                 return new Result().with(Arrays.copyOfRange(ba, off, off + len),
-                                          LATIN1);
-             } else {
-                 return new Result().with(StringLatin1.inflate(ba, off, len), UTF16);
-             }
+        if ((cd instanceof ArrayDecoder) &&
+            ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
+            return decodeLatin1(ba, off, len);
         }
         int en = scale(len, cd.maxCharsPerByte());
         if (len == 0) {
             return new Result().with();
         }
-        if (cs.getClass().getClassLoader0() != null &&
-            System.getSecurityManager() != null) {
-            ba =  Arrays.copyOfRange(ba, off, off + len);
-            off = 0;
-        }
         cd.onMalformedInput(CodingErrorAction.REPLACE)
           .onUnmappableCharacter(CodingErrorAction.REPLACE)
           .reset();
-
         char[] ca = new char[en];
         if (cd instanceof ArrayDecoder) {
             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
             return new Result().with(ca, 0, clen);
         }
+        if (cs.getClass().getClassLoader0() != null &&
+            System.getSecurityManager() != null) {
+            ba = Arrays.copyOfRange(ba, off, off + len);
+            off = 0;
+        }
         ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
         CharBuffer cb = CharBuffer.wrap(ca);
         try {
@@ -331,24 +312,22 @@
     }
 
     static Result decode(byte[] ba, int off, int len) {
-        String csn = Charset.defaultCharset().name();
-        try {
-            // use charset name decode() variant which provides caching.
-            return decode(csn, ba, off, len);
-        } catch (UnsupportedEncodingException x) {
-            warnUnsupportedCharset(csn);
+        Charset cs = Charset.defaultCharset();
+        if (cs == UTF_8) {
+            return decodeUTF8(ba, off, len, true);
+        }
+        if (cs == ISO_8859_1) {
+            return decodeLatin1(ba, off, len);
         }
-        try {
-            return decode("ISO-8859-1", ba, off, len);
-        } catch (UnsupportedEncodingException x) {
-            // If this code is hit during VM initialization, err(String) is
-            // the only way we will be able to get any kind of error message.
-            err("ISO-8859-1 charset not available: " + x.toString() + "\n");
-            // If we can not find ISO-8859-1 (a required encoding) then things
-            // are seriously wrong with the installation.
-            System.exit(1);
-            return null;
+        if (cs == US_ASCII) {
+            return decodeASCII(ba, off, len);
         }
+        StringDecoder sd = deref(decoder);
+        if (sd == null || !cs.name().equals(sd.cs.name())) {
+            sd = new StringDecoder(cs, cs.name());
+            set(decoder, sd);
+        }
+        return sd.decode(ba, off, len);
     }
 
     // -- Encoding --
@@ -393,9 +372,6 @@
                 return ba;
             }
             if (ce instanceof ArrayEncoder) {
-                if (!isTrusted) {
-                    val = Arrays.copyOf(val, val.length);
-                }
                 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
                                               : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
                 if (blen != -1) {
@@ -423,49 +399,140 @@
         }
     }
 
-    @HotSpotIntrinsicCandidate
-    private static int implEncodeISOArray(byte[] sa, int sp,
-                                          byte[] da, int dp, int len) {
-        int i = 0;
-        for (; i < len; i++) {
-            char c = StringUTF16.getChar(sa, sp++);
-            if (c > '\u00FF')
-                break;
-            da[dp++] = (byte)c;
+    static byte[] encode(String charsetName, byte coder, byte[] val)
+        throws UnsupportedEncodingException
+    {
+        StringEncoder se = deref(encoder);
+        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
+        if ((se == null) || !(csn.equals(se.requestedCharsetName())
+                              || csn.equals(se.charsetName()))) {
+            se = null;
+            try {
+                Charset cs = lookupCharset(csn);
+                if (cs != null) {
+                    if (cs == UTF_8) {
+                        return encodeUTF8(coder, val, true);
+                    }
+                    if (cs == ISO_8859_1) {
+                        return encode8859_1(coder, val);
+                    }
+                    if (cs == US_ASCII) {
+                        return encodeASCII(coder, val);
+                    }
+                    se = new StringEncoder(cs, csn);
+                }
+            } catch (IllegalCharsetNameException x) {}
+            if (se == null) {
+                throw new UnsupportedEncodingException (csn);
+            }
+            set(encoder, se);
         }
-        return i;
+        return se.encode(coder, val);
     }
 
-    static byte[] encode8859_1(byte coder, byte[] val) {
-        if (coder == LATIN1) {
+    static byte[] encode(Charset cs, byte coder, byte[] val) {
+        if (cs == UTF_8) {
+            return encodeUTF8(coder, val, true);
+        }
+        if (cs == ISO_8859_1) {
+            return encode8859_1(coder, val);
+        }
+        if (cs == US_ASCII) {
+            return encodeASCII(coder, val);
+        }
+        CharsetEncoder ce = cs.newEncoder();
+        // fastpath for ascii compatible
+        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
+                                 ((ArrayEncoder)ce).isASCIICompatible() &&
+                                 !hasNegatives(val, 0, val.length)))) {
             return Arrays.copyOf(val, val.length);
         }
-        int len = val.length >> 1;
-        byte[] dst = new byte[len];
-        int dp = 0;
-        int sp = 0;
-        int sl = len;
-        while (sp < sl) {
-            int ret = implEncodeISOArray(val, sp, dst, dp, len);
-            sp = sp + ret;
-            dp = dp + ret;
-            if (ret != len) {
-                char c = StringUTF16.getChar(val, sp++);
-                if (Character.isHighSurrogate(c) && sp < sl &&
-                    Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
-                    sp++;
-                }
-                dst[dp++] = '?';
-                len = sl - sp;
+        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
+        int en = scale(len, ce.maxBytesPerChar());
+        byte[] ba = new byte[en];
+        if (len == 0) {
+            return ba;
+        }
+        ce.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+        if (ce instanceof ArrayEncoder) {
+            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
+                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
+            if (blen != -1) {
+                return safeTrim(ba, blen, true);
             }
         }
-        if (dp == dst.length) {
-            return dst;
+        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
+                            System.getSecurityManager() == null;
+        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
+                                       : StringUTF16.toChars(val);
+        ByteBuffer bb = ByteBuffer.wrap(ba);
+        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
+        try {
+            CoderResult cr = ce.encode(cb, bb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = ce.flush(bb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
         }
-        return Arrays.copyOf(dst, dp);
+        return safeTrim(ba, bb.position(), isTrusted);
     }
 
-    static byte[] encodeASCII(byte coder, byte[] val) {
+    static byte[] encode(byte coder, byte[] val) {
+        Charset cs = Charset.defaultCharset();
+        if (cs == UTF_8) {
+            return encodeUTF8(coder, val, true);
+        }
+        if (cs == ISO_8859_1) {
+            return encode8859_1(coder, val);
+        }
+        if (cs == US_ASCII) {
+            return encodeASCII(coder, val);
+        }
+        StringEncoder se = deref(encoder);
+        if (se == null || !cs.name().equals(se.cs.name())) {
+            se = new StringEncoder(cs, cs.name());
+            set(encoder, se);
+        }
+        return se.encode(coder, val);
+    }
+
+    /**
+     *  Print a message directly to stderr, bypassing all character conversion
+     *  methods.
+     *  @param msg  message to print
+     */
+    private static native void err(String msg);
+
+     /* The cached Result for each thread */
+    private static final ThreadLocal<StringCoding.Result>
+        resultCached = new ThreadLocal<>() {
+            protected StringCoding.Result initialValue() {
+                return new StringCoding.Result();
+            }};
+
+    ////////////////////////// ascii //////////////////////////////
+
+    private static Result decodeASCII(byte[] ba, int off, int len) {
+        Result result = resultCached.get();
+        if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
+            return result.with(Arrays.copyOfRange(ba, off, off + len),
+                               LATIN1);
+        }
+        byte[] dst = new byte[len<<1];
+        int dp = 0;
+        while (dp < len) {
+            int b = ba[off++];
+            putChar(dst, dp++, (b >= 0) ? (char)b : repl);
+        }
+        return result.with(dst, UTF16);
+    }
+
+    private static byte[] encodeASCII(byte coder, byte[] val) {
         if (coder == LATIN1) {
             byte[] dst = new byte[val.length];
             for (int i = 0; i < val.length; i++) {
@@ -498,59 +565,51 @@
         return Arrays.copyOf(dst, dp);
     }
 
-   static byte[] encodeUTF8(byte coder, byte[] val) {
-        int dp = 0;
-        byte[] dst;
+    ////////////////////////// latin1/8859_1 ///////////////////////////
+
+    private static Result decodeLatin1(byte[] ba, int off, int len) {
+       Result result = resultCached.get();
+       if (COMPACT_STRINGS) {
+           return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
+       } else {
+           return result.with(StringLatin1.inflate(ba, off, len), UTF16);
+       }
+    }
+
+    @HotSpotIntrinsicCandidate
+    private static int implEncodeISOArray(byte[] sa, int sp,
+                                          byte[] da, int dp, int len) {
+        int i = 0;
+        for (; i < len; i++) {
+            char c = StringUTF16.getChar(sa, sp++);
+            if (c > '\u00FF')
+                break;
+            da[dp++] = (byte)c;
+        }
+        return i;
+    }
+
+    private static byte[] encode8859_1(byte coder, byte[] val) {
         if (coder == LATIN1) {
-            dst = new byte[val.length << 1];
-            for (int sp = 0; sp < val.length; sp++) {
-                byte c = val[sp];
-                if (c < 0) {
-                    dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
-                    dst[dp++] = (byte)(0x80 | (c & 0x3f));
-                } else {
-                    dst[dp++] = c;
+            return Arrays.copyOf(val, val.length);
+        }
+        int len = val.length >> 1;
+        byte[] dst = new byte[len];
+        int dp = 0;
+        int sp = 0;
+        int sl = len;
+        while (sp < sl) {
+            int ret = implEncodeISOArray(val, sp, dst, dp, len);
+            sp = sp + ret;
+            dp = dp + ret;
+            if (ret != len) {
+                char c = StringUTF16.getChar(val, sp++);
+                if (Character.isHighSurrogate(c) && sp < sl &&
+                    Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
+                    sp++;
                 }
-            }
-        } else {
-            int sp = 0;
-            int sl = val.length >> 1;
-            dst = new byte[sl * 3];
-            char c;
-            while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
-                // ascii fast loop;
-                dst[dp++] = (byte)c;
-                sp++;
-            }
-            while (sp < sl) {
-                c = StringUTF16.getChar(val, sp++);
-                if (c < 0x80) {
-                    dst[dp++] = (byte)c;
-                } else if (c < 0x800) {
-                    dst[dp++] = (byte)(0xc0 | (c >> 6));
-                    dst[dp++] = (byte)(0x80 | (c & 0x3f));
-                } else if (Character.isSurrogate(c)) {
-                    int uc = -1;
-                    char c2;
-                    if (Character.isHighSurrogate(c) && sp < sl &&
-                        Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
-                        uc = Character.toCodePoint(c, c2);
-                    }
-                    if (uc < 0) {
-                        dst[dp++] = '?';
-                    } else {
-                        dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
-                        dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
-                        dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
-                        dst[dp++] = (byte)(0x80 | (uc & 0x3f));
-                        sp++;  // 2 chars
-                    }
-                } else {
-                    // 3 bytes, 16 bits
-                    dst[dp++] = (byte)(0xe0 | ((c >> 12)));
-                    dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
-                    dst[dp++] = (byte)(0x80 | (c & 0x3f));
-                }
+                dst[dp++] = '?';
+                len = sl - sp;
             }
         }
         if (dp == dst.length) {
@@ -559,113 +618,333 @@
         return Arrays.copyOf(dst, dp);
     }
 
-    static byte[] encode(String charsetName, byte coder, byte[] val)
-        throws UnsupportedEncodingException
-    {
-        StringEncoder se = deref(encoder);
-        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
-        if ((se == null) || !(csn.equals(se.requestedCharsetName())
-                              || csn.equals(se.charsetName()))) {
-            se = null;
-            try {
-                Charset cs = lookupCharset(csn);
-                if (cs != null) {
-                    if (cs == UTF_8) {
-                        return encodeUTF8(coder, val);
-                    } else if (cs == ISO_8859_1) {
-                        return encode8859_1(coder, val);
-                    } else if (cs == US_ASCII) {
-                        return encodeASCII(coder, val);
-                    }
-                    se = new StringEncoder(cs, csn);
-                }
-            } catch (IllegalCharsetNameException x) {}
-            if (se == null) {
-                throw new UnsupportedEncodingException (csn);
-            }
-            set(encoder, se);
+    //////////////////////////////// utf8 ////////////////////////////////////
+
+    private static boolean isNotContinuation(int b) {
+        return (b & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed3(int b1, int b2, int b3) {
+        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+               (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed3_2(int b1, int b2) {
+        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+               (b2 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed4(int b2, int b3, int b4) {
+        return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
+               (b4 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed4_2(int b1, int b2) {
+        return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
+               (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
+               (b2 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed4_3(int b3) {
+        return (b3 & 0xc0) != 0x80;
+    }
+
+    // for nb == 3/4
+    private static int malformedN(byte[] src, int sp, int nb) {
+        if (nb == 3) {
+            int b1 = src[sp++];
+            int b2 = src[sp++];    // no need to lookup b3
+            return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+                    isNotContinuation(b2)) ? 1 : 2;
+        } else if (nb == 4) { // we don't care the speed here
+            int b1 = src[sp++] & 0xff;
+            int b2 = src[sp++] & 0xff;
+            if (b1 > 0xf4 ||
+                (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
+                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
+                isNotContinuation(b2))
+                return 1;
+            if (isNotContinuation(src[sp++]))
+                return 2;
+            return 3;
         }
-        return se.encode(coder, val);
+        assert false;
+        return -1;
+    }
+
+    private static void throwMalformed(int off, int nb) {
+        throw new IllegalArgumentException("malformed input off : " + off +
+                                           ", length : " + nb);
+    }
+
+    private static char repl = '\ufffd';
+
+    private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
+        // ascii-bais, which has a relative impact to the non-ascii-only bytes
+        if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
+            return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
+                                           LATIN1);
+        return decodeUTF8_0(src, sp, len, doReplace);
     }
 
-    static byte[] encode(Charset cs, byte coder, byte[] val) {
-        if (cs == UTF_8) {
-            return encodeUTF8(coder, val);
-        } else if (cs == ISO_8859_1) {
-            return encode8859_1(coder, val);
-        } else if (cs == US_ASCII) {
-            return encodeASCII(coder, val);
-        }
-        CharsetEncoder ce = cs.newEncoder();
-        // fastpath for ascii compatible
-        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
-                                 ((ArrayEncoder)ce).isASCIICompatible() &&
-                                 !hasNegatives(val, 0, val.length)))) {
-            return Arrays.copyOf(val, val.length);
-        }
-        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
-        int en = scale(len, ce.maxBytesPerChar());
-        byte[] ba = new byte[en];
-        if (len == 0) {
-            return ba;
-        }
-        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
-                            System.getSecurityManager() == null;
-        ce.onMalformedInput(CodingErrorAction.REPLACE)
-          .onUnmappableCharacter(CodingErrorAction.REPLACE)
-          .reset();
-        if (ce instanceof ArrayEncoder) {
-            if (!isTrusted) {
-                val = Arrays.copyOf(val, val.length);
+    private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
+        Result ret = resultCached.get();
+
+        int sl = sp + len;
+        int dp = 0;
+        byte[] dst = new byte[len];
+
+        if (COMPACT_STRINGS) {
+            while (sp < sl) {
+                int b1 = src[sp];
+                if (b1 >= 0) {
+                    dst[dp++] = (byte)b1;
+                    sp++;
+                    continue;
+                }
+                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
+                    sp + 1 < sl) {
+                    int b2 = src[sp + 1];
+                    if (!isNotContinuation(b2)) {
+                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
+                                           (((byte) 0xC0 << 6) ^
+                                           ((byte) 0x80 << 0)));
+                        sp += 2;
+                        continue;
+                    }
+                }
+                // anything not a latin1, including the repl
+                // we have to go with the utf16
+                break;
             }
-            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
-                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
-            if (blen != -1) {
-                return safeTrim(ba, blen, isTrusted);
+            if (sp == sl) {
+                if (dp != dst.length) {
+                    dst = Arrays.copyOf(dst, dp);
+                }
+                return ret.with(dst, LATIN1);
             }
         }
-        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
-                                       : StringUTF16.toChars(val);
-        ByteBuffer bb = ByteBuffer.wrap(ba);
-        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
-        try {
-            CoderResult cr = ce.encode(cb, bb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = ce.flush(bb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            throw new Error(x);
+        if (dp == 0) {
+            dst = new byte[len << 1];
+        } else {
+            byte[] buf = new byte[len << 1];
+            StringLatin1.inflate(dst, 0, buf, 0, dp);
+            dst = buf;
         }
-        return safeTrim(ba, bb.position(), isTrusted);
+        while (sp < sl) {
+            int b1 = src[sp++];
+            if (b1 >= 0) {
+                putChar(dst, dp++, (char) b1);
+            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+                if (sp < sl) {
+                    int b2 = src[sp++];
+                    if (isNotContinuation(b2)) {
+                        if (!doReplace) {
+                            throwMalformed(sp - 1, 1);
+                        }
+                        putChar(dst, dp++, repl);
+                        sp--;
+                    } else {
+                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
+                                                  (((byte) 0xC0 << 6) ^
+                                                  ((byte) 0x80 << 0))));
+                    }
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(sp, 1);  // underflow()
+                }
+                putChar(dst, dp++, repl);
+                break;
+            } else if ((b1 >> 4) == -2) {
+                if (sp + 1 < sl) {
+                    int b2 = src[sp++];
+                    int b3 = src[sp++];
+                    if (isMalformed3(b1, b2, b3)) {
+                        if (!doReplace) {
+                            throwMalformed(sp - 3, 3);
+                        }
+                        putChar(dst, dp++, repl);
+                        sp -= 3;
+                        sp += malformedN(src, sp, 3);
+                    } else {
+                        char c = (char)((b1 << 12) ^
+                                        (b2 <<  6) ^
+                                        (b3 ^
+                                         (((byte) 0xE0 << 12) ^
+                                         ((byte) 0x80 <<  6) ^
+                                         ((byte) 0x80 <<  0))));
+                        if (isSurrogate(c)) {
+                            if (!doReplace) {
+                                throwMalformed(sp - 3, 3);
+                            }
+                            putChar(dst, dp++, repl);
+                        } else {
+                            putChar(dst, dp++, c);
+                        }
+                    }
+                    continue;
+                }
+                if (sp  < sl && isMalformed3_2(b1, src[sp])) {
+                    if (!doReplace) {
+                        throwMalformed(sp - 1, 2);
+                    }
+                    putChar(dst, dp++, repl);
+                    continue;
+                }
+                if (!doReplace){
+                    throwMalformed(sp, 1);
+                }
+                putChar(dst, dp++, repl);
+                break;
+            } else if ((b1 >> 3) == -2) {
+                if (sp + 2 < sl) {
+                    int b2 = src[sp++];
+                    int b3 = src[sp++];
+                    int b4 = src[sp++];
+                    int uc = ((b1 << 18) ^
+                              (b2 << 12) ^
+                              (b3 <<  6) ^
+                              (b4 ^
+                               (((byte) 0xF0 << 18) ^
+                               ((byte) 0x80 << 12) ^
+                               ((byte) 0x80 <<  6) ^
+                               ((byte) 0x80 <<  0))));
+                    if (isMalformed4(b2, b3, b4) ||
+                        !isSupplementaryCodePoint(uc)) { // shortest form check
+                        if (!doReplace) {
+                            throwMalformed(sp - 4, 4);
+                        }
+                        putChar(dst, dp++, repl);
+                        sp -= 4;
+                        sp += malformedN(src, sp, 4);
+                    } else {
+                        putChar(dst, dp++, highSurrogate(uc));
+                        putChar(dst, dp++, lowSurrogate(uc));
+                    }
+                    continue;
+                }
+                b1 &= 0xff;
+                if (b1 > 0xf4 ||
+                    sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
+                    if (!doReplace) {
+                        throwMalformed(sp - 1, 1);  // or 2
+                    }
+                    putChar(dst, dp++, repl);
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(sp - 1, 1);
+                }
+                sp++;
+                putChar(dst, dp++, repl);
+                if (sp  < sl && isMalformed4_3(src[sp])) {
+                    continue;
+                }
+                break;
+            } else {
+                if (!doReplace) {
+                    throwMalformed(sp - 1, 1);
+                }
+                putChar(dst, dp++, repl);
+            }
+        }
+        if (dp != len) {
+            dst = Arrays.copyOf(dst, dp << 1);
+        }
+        return ret.with(dst, UTF16);
     }
 
-    static byte[] encode(byte coder, byte[] val) {
-        String csn = Charset.defaultCharset().name();
-        try {
-            // use charset name encode() variant which provides caching.
-            return encode(csn, coder, val);
-        } catch (UnsupportedEncodingException x) {
-            warnUnsupportedCharset(csn);
+    private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
+        if (coder == UTF16)
+            return encodeUTF8_UTF16(val, doReplace);
+
+        if (!hasNegatives(val, 0, val.length))
+            return Arrays.copyOf(val, val.length);
+
+        int dp = 0;
+        byte[] dst = new byte[val.length << 1];
+        for (int sp = 0; sp < val.length; sp++) {
+            byte c = val[sp];
+            if (c < 0) {
+                dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
+                dst[dp++] = (byte)(0x80 | (c & 0x3f));
+            } else {
+                dst[dp++] = c;
+            }
         }
-        try {
-            return encode("ISO-8859-1", coder, val);
-        } catch (UnsupportedEncodingException x) {
-            // If this code is hit during VM initialization, err(String) is
-            // the only way we will be able to get any kind of error message.
-            err("ISO-8859-1 charset not available: " + x.toString() + "\n");
-            // If we can not find ISO-8859-1 (a required encoding) then things
-            // are seriously wrong with the installation.
-            System.exit(1);
-            return null;
-        }
+        if (dp == dst.length)
+            return dst;
+        return Arrays.copyOf(dst, dp);
     }
 
-    /**
-     *  Print a message directly to stderr, bypassing all character conversion
-     *  methods.
-     *  @param msg  message to print
+    private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
+        int dp = 0;
+        int sp = 0;
+        int sl = val.length >> 1;
+        byte[] dst = new byte[sl * 3];
+        char c;
+        while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
+            // ascii fast loop;
+            dst[dp++] = (byte)c;
+            sp++;
+        }
+        while (sp < sl) {
+            c = StringUTF16.getChar(val, sp++);
+            if (c < 0x80) {
+                dst[dp++] = (byte)c;
+            } else if (c < 0x800) {
+                dst[dp++] = (byte)(0xc0 | (c >> 6));
+                dst[dp++] = (byte)(0x80 | (c & 0x3f));
+            } else if (Character.isSurrogate(c)) {
+                int uc = -1;
+                char c2;
+                if (Character.isHighSurrogate(c) && sp < sl &&
+                    Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
+                    uc = Character.toCodePoint(c, c2);
+                }
+                if (uc < 0) {
+                    if (doReplace) {
+                        dst[dp++] = '?';
+                    } else {
+                        throwMalformed(sp - 1, 1); // or 2, does not matter here
+                    }
+                } else {
+                    dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
+                    dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+                    dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
+                    dst[dp++] = (byte)(0x80 | (uc & 0x3f));
+                    sp++;  // 2 chars
+                }
+            } else {
+                // 3 bytes, 16 bits
+                dst[dp++] = (byte)(0xe0 | ((c >> 12)));
+                dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
+                dst[dp++] = (byte)(0x80 | (c & 0x3f));
+            }
+        }
+        if (dp == dst.length) {
+            return dst;
+        }
+        return Arrays.copyOf(dst, dp);
+    }
+
+    ////////////////////// for j.u.z.ZipCoder //////////////////////////
+
+    /*
+     * Throws iae, instead of replacing, if malformed or unmappble.
      */
-    private static native void err(String msg);
+    static String newStringUTF8NoRepl(byte[] src, int off, int len) {
+        if (COMPACT_STRINGS && !hasNegatives(src, off, len))
+            return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
+        Result ret = decodeUTF8_0(src, off, len, false);
+        return new String(ret.value, ret.coder);
+    }
+
+    /*
+     * Throws iae, instead of replacing, if unmappble.
+     */
+    static byte[] getBytesUTF8NoRepl(String s) {
+        return encodeUTF8(s.coder(), s.value(), false);
+    }
 }

--- a/src/java.base/share/classes/java/lang/StringDecoderUTF8.java	Wed Dec 13 15:32:36 2017 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package java.lang;
-
-import java.nio.charset.Charset;
-import java.util.Arrays;
-
-import static java.lang.String.LATIN1;
-import static java.lang.String.UTF16;
-import static java.lang.String.COMPACT_STRINGS;
-import static java.lang.Character.isSurrogate;
-import static java.lang.Character.highSurrogate;
-import static java.lang.Character.lowSurrogate;
-import static java.lang.Character.isSupplementaryCodePoint;
-import static java.lang.StringUTF16.putChar;
-
-class StringDecoderUTF8 extends StringCoding.StringDecoder {
-
-    StringDecoderUTF8(Charset cs, String rcn) {
-        super(cs, rcn);
-    }
-
-    private static boolean isNotContinuation(int b) {
-        return (b & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed3(int b1, int b2, int b3) {
-        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-               (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed3_2(int b1, int b2) {
-        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-               (b2 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed4(int b2, int b3, int b4) {
-        return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
-               (b4 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed4_2(int b1, int b2) {
-        return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
-               (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
-               (b2 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed4_3(int b3) {
-        return (b3 & 0xc0) != 0x80;
-    }
-
-    // for nb == 3/4
-    private static int malformedN(byte[] src, int sp, int nb) {
-        if (nb == 3) {
-            int b1 = src[sp++];
-            int b2 = src[sp++];    // no need to lookup b3
-            return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-                    isNotContinuation(b2)) ? 1 : 2;
-        } else if (nb == 4) { // we don't care the speed here
-            int b1 = src[sp++] & 0xff;
-            int b2 = src[sp++] & 0xff;
-            if (b1 > 0xf4 ||
-                (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
-                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
-                isNotContinuation(b2))
-                return 1;
-            if (isNotContinuation(src[sp++]))
-                return 2;
-            return 3;
-        }
-        assert false;
-        return -1;
-    }
-
-    private static char repl = '\ufffd';
-
-    StringCoding.Result decode(byte[] src, int sp, int len) {
-        return decode(src, sp, len, result);
-    }
-
-    static StringCoding.Result decode(byte[] src, int sp, int len,
-                                      StringCoding.Result ret) {
-        int sl = sp + len;
-        byte[] dst = new byte[len];
-        int dp = 0;
-        if (COMPACT_STRINGS) {   // Latin1 only loop
-            while (sp < sl) {
-                int b1 = src[sp];
-                if (b1 >= 0) {
-                    dst[dp++] = (byte)b1;
-                    sp++;
-                    continue;
-                }
-                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
-                    sp + 1 < sl) {
-                    int b2 = src[sp + 1];
-                    if (!isNotContinuation(b2)) {
-                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
-                                           (((byte) 0xC0 << 6) ^
-                                           ((byte) 0x80 << 0)));
-                        sp += 2;
-                        continue;
-                    }
-                }
-                // anything not a latin1, including the repl
-                // we have to go with the utf16
-                break;
-            }
-            if (sp == sl) {
-                if (dp != dst.length) {
-                    dst = Arrays.copyOf(dst, dp);
-                }
-                return ret.with(dst, LATIN1);
-            }
-        }
-        if (dp == 0) {
-            dst = new byte[len << 1];
-        } else {
-            byte[] buf = new byte[len << 1];
-            StringLatin1.inflate(dst, 0, buf, 0, dp);
-            dst = buf;
-        }
-        while (sp < sl) {
-            int b1 = src[sp++];
-            if (b1 >= 0) {
-                putChar(dst, dp++, (char) b1);
-            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                if (sp < sl) {
-                    int b2 = src[sp++];
-                    if (isNotContinuation(b2)) {
-                        putChar(dst, dp++, repl);
-                        sp--;
-                    } else {
-                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
-                                                  (((byte) 0xC0 << 6) ^
-                                                  ((byte) 0x80 << 0))));
-                    }
-                    continue;
-                }
-                putChar(dst, dp++, repl);
-                break;
-            } else if ((b1 >> 4) == -2) {
-                if (sp + 1 < sl) {
-                    int b2 = src[sp++];
-                    int b3 = src[sp++];
-                    if (isMalformed3(b1, b2, b3)) {
-                        putChar(dst, dp++, repl);
-                        sp -= 3;
-                        sp += malformedN(src, sp, 3);
-                    } else {
-                        char c = (char)((b1 << 12) ^
-                                        (b2 <<  6) ^
-                                        (b3 ^
-                                         (((byte) 0xE0 << 12) ^
-                                         ((byte) 0x80 <<  6) ^
-                                         ((byte) 0x80 <<  0))));
-                        putChar(dst, dp++, isSurrogate(c) ?  repl : c);
-                    }
-                    continue;
-                }
-                if (sp  < sl && isMalformed3_2(b1, src[sp])) {
-                    putChar(dst, dp++, repl);
-                    continue;
-                }
-                putChar(dst, dp++, repl);
-                break;
-            } else if ((b1 >> 3) == -2) {
-                if (sp + 2 < sl) {
-                    int b2 = src[sp++];
-                    int b3 = src[sp++];
-                    int b4 = src[sp++];
-                    int uc = ((b1 << 18) ^
-                              (b2 << 12) ^
-                              (b3 <<  6) ^
-                              (b4 ^
-                               (((byte) 0xF0 << 18) ^
-                               ((byte) 0x80 << 12) ^
-                               ((byte) 0x80 <<  6) ^
-                               ((byte) 0x80 <<  0))));
-                    if (isMalformed4(b2, b3, b4) ||
-                        !isSupplementaryCodePoint(uc)) { // shortest form check
-                        putChar(dst, dp++, repl);
-                        sp -= 4;
-                        sp += malformedN(src, sp, 4);
-                    } else {
-                        putChar(dst, dp++, highSurrogate(uc));
-                        putChar(dst, dp++, lowSurrogate(uc));
-                    }
-                    continue;
-                }
-                b1 &= 0xff;
-                if (b1 > 0xf4 ||
-                    sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
-                    putChar(dst, dp++, repl);
-                    continue;
-                }
-                sp++;
-                putChar(dst, dp++, repl);
-                if (sp  < sl && isMalformed4_3(src[sp])) {
-                    continue;
-                }
-                break;
-            } else {
-                putChar(dst, dp++, repl);
-            }
-        }
-        if (dp != len) {
-            dst = Arrays.copyOf(dst, dp << 1);
-        }
-        return ret.with(dst, UTF16);
-    }
-}

--- a/src/java.base/share/classes/java/lang/System.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/java/lang/System.java	Wed Dec 13 07:51:57 2017 -0800
@@ -2184,6 +2184,15 @@
             public Stream<ModuleLayer> layers(ClassLoader loader) {
                 return ModuleLayer.layers(loader);
             }
+
+            public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
+                return StringCoding.newStringUTF8NoRepl(bytes, off, len);
+            }
+
+            public byte[] getBytesUTF8NoRepl(String s) {
+                return StringCoding.getBytesUTF8NoRepl(s);
+            }
+
         });
     }
 }

--- a/src/java.base/share/classes/java/util/zip/ZipCoder.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java	Wed Dec 13 07:51:57 2017 -0800
@@ -28,72 +28,60 @@
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.CodingErrorAction;
-import java.util.Arrays;
-import sun.nio.cs.ArrayDecoder;
-import sun.nio.cs.ArrayEncoder;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
 
 /**
  * Utility class for zipfile name and comment decoding and encoding
  */
 
-final class ZipCoder {
+class ZipCoder {
+
+    private static final jdk.internal.misc.JavaLangAccess JLA =
+        jdk.internal.misc.SharedSecrets.getJavaLangAccess();
+
+    static final class UTF8 extends ZipCoder {
+
+        UTF8(Charset utf8) {
+            super(utf8);
+        }
 
-    private static boolean isASCII(byte[] ba, int off, int len) {
-        for (int i = off; i < off + len; i++) {
-            if (ba[i] < 0)
-                return false;
+        @Override
+        boolean isUTF8() {
+            return true;
         }
-        return true;
+
+        @Override
+        String toString(byte[] ba, int off, int length) {
+            return JLA.newStringUTF8NoRepl(ba, off, length);
+        }
+
+        @Override
+        byte[] getBytes(String s) {
+            return JLA.getBytesUTF8NoRepl(s);
+        }
     }
 
-    private static boolean hasReplaceChar(byte[] ba) {
-        for (int i = 0; i < ba.length; i++) {
-            if (ba[i] == (byte)'?')
-                return true;
-        }
-        return false;
+    // UTF_8.ArrayEn/Decoder is stateless, so make it singleton.
+    private static ZipCoder utf8 = new UTF8(UTF_8);
+
+    public static ZipCoder get(Charset charset) {
+        if (charset == UTF_8)
+            return utf8;
+        return new ZipCoder(charset);
     }
 
     String toString(byte[] ba, int off, int length) {
-
-        // fastpath for UTF-8 cs and ascii only name, leverage the
-        // compact string impl to avoid the unnecessary char[] copy/
-        // paste. A temporary workaround before we have better approach,
-        // such as a String constructor that throws exception for
-        // malformed and/or unmappable characters, instead of silently
-        // replacing with repl char
-        if (isUTF8 && isASCII(ba, off, length)) {
-            return new String(ba, off, length, cs);
-        }
+        try {
+              return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
 
-        CharsetDecoder cd = decoder().reset();
-        int len = (int)(length * cd.maxCharsPerByte());
-        char[] ca = new char[len];
-        if (len == 0)
-            return new String(ca);
-        // UTF-8 only for now. Other ArrayDeocder only handles
-        // CodingErrorAction.REPLACE mode. ZipCoder uses
-        // REPORT mode.
-        if (isUTF8 && cd instanceof ArrayDecoder) {
-            int clen = ((ArrayDecoder)cd).decode(ba, off, length, ca);
-            if (clen == -1)    // malformed
-                throw new IllegalArgumentException("MALFORMED");
-            return new String(ca, 0, clen);
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);
         }
-        ByteBuffer bb = ByteBuffer.wrap(ba, off, length);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        CoderResult cr = cd.decode(bb, cb, true);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        cr = cd.flush(cb);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        return new String(ca, 0, cb.position());
     }
 
     String toString(byte[] ba, int length) {
@@ -105,84 +93,47 @@
     }
 
     byte[] getBytes(String s) {
-        if (isUTF8) {
-            // fastpath for UTF8. should only occur when the string
-            // has malformed surrogates. A postscan should still be
-            // faster and use less memory.
-            byte[] ba = s.getBytes(cs);
-            if (!hasReplaceChar(ba)) {
-                return ba;
+        try {
+            ByteBuffer bb = encoder().encode(CharBuffer.wrap(s));
+            int pos = bb.position();
+            int limit = bb.limit();
+            if (bb.hasArray() && pos == 0 && limit == bb.capacity()) {
+                return bb.array();
             }
+            byte[] bytes = new byte[bb.limit() - bb.position()];
+            bb.get(bytes);
+            return bytes;
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);
         }
-        CharsetEncoder ce = encoder().reset();
-        char[] ca = s.toCharArray();
-        int len = (int)(ca.length * ce.maxBytesPerChar());
-        byte[] ba = new byte[len];
-        if (len == 0)
-            return ba;
-        // UTF-8 only for now. Other ArrayDeocder only handles
-        // CodingErrorAction.REPLACE mode.
-        if (isUTF8 && ce instanceof ArrayEncoder) {
-            int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
-            if (blen == -1)    // malformed
-                throw new IllegalArgumentException("MALFORMED");
-            return Arrays.copyOf(ba, blen);
-        }
-        ByteBuffer bb = ByteBuffer.wrap(ba);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        CoderResult cr = ce.encode(cb, bb, true);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        cr = ce.flush(bb);
-        if (!cr.isUnderflow())
-            throw new IllegalArgumentException(cr.toString());
-        if (bb.position() == ba.length)  // defensive copy?
-            return ba;
-        else
-            return Arrays.copyOf(ba, bb.position());
     }
 
     // assume invoked only if "this" is not utf8
     byte[] getBytesUTF8(String s) {
-        if (isUTF8)
-            return getBytes(s);
-        if (utf8 == null)
-            utf8 = new ZipCoder(StandardCharsets.UTF_8);
         return utf8.getBytes(s);
     }
 
     String toStringUTF8(byte[] ba, int len) {
-        return toStringUTF8(ba, 0, len);
+        return utf8.toString(ba, 0, len);
     }
 
     String toStringUTF8(byte[] ba, int off, int len) {
-        if (isUTF8)
-            return toString(ba, off, len);
-        if (utf8 == null)
-            utf8 = new ZipCoder(StandardCharsets.UTF_8);
         return utf8.toString(ba, off, len);
     }
 
     boolean isUTF8() {
-        return isUTF8;
+        return false;
     }
 
     private Charset cs;
     private CharsetDecoder dec;
     private CharsetEncoder enc;
-    private boolean isUTF8;
-    private ZipCoder utf8;
 
     private ZipCoder(Charset cs) {
         this.cs = cs;
-        this.isUTF8 = cs.name().equals(StandardCharsets.UTF_8.name());
     }
 
-    static ZipCoder get(Charset charset) {
-        return new ZipCoder(charset);
-    }
-
-    private CharsetDecoder decoder() {
+    protected CharsetDecoder decoder() {
         if (dec == null) {
             dec = cs.newDecoder()
               .onMalformedInput(CodingErrorAction.REPORT)
@@ -191,7 +142,7 @@
         return dec;
     }
 
-    private CharsetEncoder encoder() {
+    protected CharsetEncoder encoder() {
         if (enc == null) {
             enc = cs.newEncoder()
               .onMalformedInput(CodingErrorAction.REPORT)

--- a/src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java	Wed Dec 13 07:51:57 2017 -0800
@@ -254,4 +254,23 @@
      * given class loader.
      */
     Stream<ModuleLayer> layers(ClassLoader loader);
+
+    /**
+     * Returns a new string by decoding from the given utf8 bytes array.
+     *
+     * @param off the index of the first byte to decode
+     * @param len the number of bytes to decode
+     * @return the newly created string
+     * @throws IllegalArgumentException for malformed or unmappable bytes.
+     */
+    String newStringUTF8NoRepl(byte[] bytes, int off, int len);
+
+    /**
+     * Encode the given string into a sequence of bytes using utf8.
+     *
+     * @param s the string to encode
+     * @return the encoded bytes in utf8
+     * @throws IllegalArgumentException for malformed surrogates
+     */
+    byte[] getBytesUTF8NoRepl(String s);
 }

--- a/src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java	Wed Dec 13 07:51:57 2017 -0800
@@ -63,8 +63,8 @@
         return new Encoder(this);
     }
 
-    private static class Decoder extends CharsetDecoder
-                                 implements ArrayDecoder {
+    private static class Decoder extends CharsetDecoder {
+
         private Decoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
         }
@@ -124,23 +124,10 @@
             else
                 return decodeBufferLoop(src, dst);
         }
-
-        public int decode(byte[] src, int sp, int len, char[] dst) {
-            if (len > dst.length)
-                len = dst.length;
-            int dp = 0;
-            while (dp < len)
-                dst[dp++] = (char)(src[sp++] & 0xff);
-            return dp;
-        }
-
-        public boolean isASCIICompatible() {
-            return true;
-        }
     }
 
-    private static class Encoder extends CharsetEncoder
-                                 implements ArrayEncoder {
+    private static class Encoder extends CharsetEncoder {
+
         private Encoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
         }
@@ -271,39 +258,5 @@
             else
                 return encodeBufferLoop(src, dst);
         }
-
-        private byte repl = (byte)'?';
-        protected void implReplaceWith(byte[] newReplacement) {
-            repl = newReplacement[0];
-        }
-
-        public int encode(char[] src, int sp, int len, byte[] dst) {
-            int dp = 0;
-            int slen = Math.min(len, dst.length);
-            int sl = sp + slen;
-            while (sp < sl) {
-                int ret = encodeISOArray(src, sp, dst, dp, slen);
-                sp = sp + ret;
-                dp = dp + ret;
-                if (ret != slen) {
-                    char c = src[sp++];
-                    if (Character.isHighSurrogate(c) && sp < sl &&
-                        Character.isLowSurrogate(src[sp])) {
-                        if (len > dst.length) {
-                            sl++;
-                            len--;
-                        }
-                        sp++;
-                    }
-                    dst[dp++] = repl;
-                    slen = Math.min((sl - sp), (dst.length - dp));
-                }
-            }
-            return dp;
-        }
-
-        public boolean isASCIICompatible() {
-            return true;
-        }
     }
 }

--- a/src/java.base/share/classes/sun/nio/cs/US_ASCII.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/sun/nio/cs/US_ASCII.java	Wed Dec 13 07:51:57 2017 -0800
@@ -58,8 +58,7 @@
         return new Encoder(this);
     }
 
-    private static class Decoder extends CharsetDecoder
-                                 implements ArrayDecoder {
+    private static class Decoder extends CharsetDecoder {
 
         private Decoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
@@ -128,32 +127,9 @@
             else
                 return decodeBufferLoop(src, dst);
         }
-
-        private char repl = '\uFFFD';
-        protected void implReplaceWith(String newReplacement) {
-            repl = newReplacement.charAt(0);
-        }
-
-        public int decode(byte[] src, int sp, int len, char[] dst) {
-            int dp = 0;
-            len = Math.min(len, dst.length);
-            while (dp < len) {
-                byte b = src[sp++];
-                if (b >= 0)
-                    dst[dp++] = (char)b;
-                else
-                    dst[dp++] = repl;
-            }
-            return dp;
-        }
-
-        public boolean isASCIICompatible() {
-            return true;
-        }
     }
 
-    private static class Encoder extends CharsetEncoder
-                                 implements ArrayEncoder {
+    private static class Encoder extends CharsetEncoder {
 
         private Encoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
@@ -237,36 +213,5 @@
                 return encodeBufferLoop(src, dst);
         }
 
-        private byte repl = (byte)'?';
-        protected void implReplaceWith(byte[] newReplacement) {
-            repl = newReplacement[0];
-        }
-
-        public int encode(char[] src, int sp, int len, byte[] dst) {
-            int dp = 0;
-            int sl = sp + Math.min(len, dst.length);
-            while (sp < sl) {
-                char c = src[sp++];
-                if (c < 0x80) {
-                    dst[dp++] = (byte)c;
-                    continue;
-                }
-                if (Character.isHighSurrogate(c) && sp < sl &&
-                    Character.isLowSurrogate(src[sp])) {
-                    if (len > dst.length) {
-                        sl++;
-                        len--;
-                    }
-                    sp++;
-                }
-                dst[dp++] = repl;
-            }
-            return dp;
-        }
-
-        public boolean isASCIICompatible() {
-            return true;
-        }
     }
-
 }

--- a/src/java.base/share/classes/sun/nio/cs/UTF_8.java	Wed Dec 13 15:32:36 2017 +0000
+++ b/src/java.base/share/classes/sun/nio/cs/UTF_8.java	Wed Dec 13 07:51:57 2017 -0800
@@ -80,8 +80,8 @@
         dst.position(dp - dst.arrayOffset());
     }
 
-    private static class Decoder extends CharsetDecoder
-                                 implements ArrayDecoder {
+    private static class Decoder extends CharsetDecoder {
+
         private Decoder(Charset cs) {
             super(cs, 1.0f, 1.0f);
         }
@@ -423,142 +423,9 @@
             bb.position(sp);
             return bb;
         }
-
-        // returns -1 if there is/are malformed byte(s) and the
-        // "action" for malformed input is not REPLACE.
-        public int decode(byte[] sa, int sp, int len, char[] da) {
-            final int sl = sp + len;
-            int dp = 0;
-            int dlASCII = Math.min(len, da.length);
-            ByteBuffer bb = null;  // only necessary if malformed
-
-            // ASCII only optimized loop
-            while (dp < dlASCII && sa[sp] >= 0)
-                da[dp++] = (char) sa[sp++];
-
-            while (sp < sl) {
-                int b1 = sa[sp++];
-                if (b1 >= 0) {
-                    // 1 byte, 7 bits: 0xxxxxxx
-                    da[dp++] = (char) b1;
-                } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                    // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
-                    if (sp < sl) {
-                        int b2 = sa[sp++];
-                        if (isNotContinuation(b2)) {
-                            if (malformedInputAction() != CodingErrorAction.REPLACE)
-                                return -1;
-                            da[dp++] = replacement().charAt(0);
-                            sp--;            // malformedN(bb, 2) always returns 1
-                        } else {
-                            da[dp++] = (char) (((b1 << 6) ^ b2)^
-                                           (((byte) 0xC0 << 6) ^
-                                            ((byte) 0x80 << 0)));
-                        }
-                        continue;
-                    }
-                    if (malformedInputAction() != CodingErrorAction.REPLACE)
-                        return -1;
-                    da[dp++] = replacement().charAt(0);
-                    return dp;
-                } else if ((b1 >> 4) == -2) {
-                    // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
-                    if (sp + 1 < sl) {
-                        int b2 = sa[sp++];
-                        int b3 = sa[sp++];
-                        if (isMalformed3(b1, b2, b3)) {
-                            if (malformedInputAction() != CodingErrorAction.REPLACE)
-                                return -1;
-                            da[dp++] = replacement().charAt(0);
-                            sp -= 3;
-                            bb = getByteBuffer(bb, sa, sp);
-                            sp += malformedN(bb, 3).length();
-                        } else {
-                            char c = (char)((b1 << 12) ^
-                                              (b2 <<  6) ^
-                                              (b3 ^
-                                              (((byte) 0xE0 << 12) ^
-                                              ((byte) 0x80 <<  6) ^
-                                              ((byte) 0x80 <<  0))));
-                            if (Character.isSurrogate(c)) {
-                                if (malformedInputAction() != CodingErrorAction.REPLACE)
-                                    return -1;
-                                da[dp++] = replacement().charAt(0);
-                            } else {
-                                da[dp++] = c;
-                            }
-                        }
-                        continue;
-                    }
-                    if (malformedInputAction() != CodingErrorAction.REPLACE)
-                        return -1;
-                    if (sp  < sl && isMalformed3_2(b1, sa[sp])) {
-                        da[dp++] = replacement().charAt(0);
-                        continue;
-
-                    }
-                    da[dp++] = replacement().charAt(0);
-                    return dp;
-                } else if ((b1 >> 3) == -2) {
-                    // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-                    if (sp + 2 < sl) {
-                        int b2 = sa[sp++];
-                        int b3 = sa[sp++];
-                        int b4 = sa[sp++];
-                        int uc = ((b1 << 18) ^
-                                  (b2 << 12) ^
-                                  (b3 <<  6) ^
-                                  (b4 ^
-                                   (((byte) 0xF0 << 18) ^
-                                   ((byte) 0x80 << 12) ^
-                                   ((byte) 0x80 <<  6) ^
-                                   ((byte) 0x80 <<  0))));
-                        if (isMalformed4(b2, b3, b4) ||
-                            // shortest form check
-                            !Character.isSupplementaryCodePoint(uc)) {
-                            if (malformedInputAction() != CodingErrorAction.REPLACE)
-                                return -1;
-                            da[dp++] = replacement().charAt(0);
-                            sp -= 4;
-                            bb = getByteBuffer(bb, sa, sp);
-                            sp += malformedN(bb, 4).length();
-                        } else {
-                            da[dp++] = Character.highSurrogate(uc);
-                            da[dp++] = Character.lowSurrogate(uc);
-                        }
-                        continue;
-                    }
-                    if (malformedInputAction() != CodingErrorAction.REPLACE)
-                        return -1;
-                    b1 &= 0xff;
-                    if (b1 > 0xf4 ||
-                        sp  < sl && isMalformed4_2(b1, sa[sp] & 0xff)) {
-                        da[dp++] = replacement().charAt(0);
-                        continue;
-                    }
-                    sp++;
-                    if (sp  < sl && isMalformed4_3(sa[sp])) {
-                        da[dp++] = replacement().charAt(0);
-                        continue;
-                    }
-                    da[dp++] = replacement().charAt(0);
-                    return dp;
-                } else {
-                    if (malformedInputAction() != CodingErrorAction.REPLACE)
-                        return -1;
-                    da[dp++] = replacement().charAt(0);
-                }
-            }
-            return dp;
-        }
-
-        public boolean isASCIICompatible() {
-            return true;
-        }
     }
 
-    private static final class Encoder extends CharsetEncoder
-                                 implements ArrayEncoder {
+    private static final class Encoder extends CharsetEncoder {
 
         private Encoder(Charset cs) {
             super(cs, 1.1f, 3.0f);
@@ -699,58 +566,5 @@
                 return encodeBufferLoop(src, dst);
         }
 
-        private byte repl = (byte)'?';
-        protected void implReplaceWith(byte[] newReplacement) {
-            repl = newReplacement[0];
-        }
-
-        // returns -1 if there is malformed char(s) and the
-        // "action" for malformed input is not REPLACE.
-        public int encode(char[] sa, int sp, int len, byte[] da) {
-            int sl = sp + len;
-            int dp = 0;
-            int dlASCII = dp + Math.min(len, da.length);
-
-            // ASCII only optimized loop
-            while (dp < dlASCII && sa[sp] < '\u0080')
-                da[dp++] = (byte) sa[sp++];
-
-            while (sp < sl) {
-                char c = sa[sp++];
-                if (c < 0x80) {
-                    // Have at most seven bits
-                    da[dp++] = (byte)c;
-                } else if (c < 0x800) {
-                    // 2 bytes, 11 bits
-                    da[dp++] = (byte)(0xc0 | (c >> 6));
-                    da[dp++] = (byte)(0x80 | (c & 0x3f));
-                } else if (Character.isSurrogate(c)) {
-                    if (sgp == null)
-                        sgp = new Surrogate.Parser();
-                    int uc = sgp.parse(c, sa, sp - 1, sl);
-                    if (uc < 0) {
-                        if (malformedInputAction() != CodingErrorAction.REPLACE)
-                            return -1;
-                        da[dp++] = repl;
-                    } else {
-                        da[dp++] = (byte)(0xf0 | ((uc >> 18)));
-                        da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
-                        da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
-                        da[dp++] = (byte)(0x80 | (uc & 0x3f));
-                        sp++;  // 2 chars
-                    }
-                } else {
-                    // 3 bytes, 16 bits
-                    da[dp++] = (byte)(0xe0 | ((c >> 12)));
-                    da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
-                    da[dp++] = (byte)(0x80 | (c & 0x3f));
-                }
-            }
-            return dp;
-        }
-
-        public boolean isASCIICompatible() {
-            return true;
-        }
     }
 }

author	sherman
	Wed, 13 Dec 2017 07:51:57 -0800
changeset 48262	daf3b49f4839
parent 48261	43edfde828ab
child 48263	a559b7cd1dea

src/java.base/share/classes/java/lang/String.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/java/lang/StringCoding.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/java/lang/StringDecoderUTF8.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/java/lang/System.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/java/util/zip/ZipCoder.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/jdk/internal/misc/JavaLangAccess.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/sun/nio/cs/ISO_8859_1.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/sun/nio/cs/US_ASCII.java		file \| annotate \| diff \| comparison \| revisions
src/java.base/share/classes/sun/nio/cs/UTF_8.java		file \| annotate \| diff \| comparison \| revisions