8231717: Improve performance of charset decoding when charset is always compactable
authoraleonard
Thu, 10 Oct 2019 10:28:55 +0100
changeset 58561 3968bf3673c5
parent 58560 5a4b4544b810
child 58562 e43ac61b89ab
8231717: Improve performance of charset decoding when charset is always compactable Reviewed-by: rriggs, redestad, alanb
make/data/charsetmapping/SingleByte-X.java.template
make/jdk/src/classes/build/tools/charsetmapping/SBCS.java
src/java.base/share/classes/java/lang/StringCoding.java
src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java
src/java.base/share/classes/sun/nio/cs/SingleByte.java
--- a/make/data/charsetmapping/SingleByte-X.java.template	Fri Oct 11 06:57:33 2019 -0700
+++ b/make/data/charsetmapping/SingleByte-X.java.template	Thu Oct 10 10:28:55 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,7 @@
     }
 
     public CharsetDecoder newDecoder() {
-        return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$);
+        return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$, $LATIN1DECODABLE$);
     }
 
     public CharsetEncoder newEncoder() {
--- a/make/jdk/src/classes/build/tools/charsetmapping/SBCS.java	Fri Oct 11 06:57:33 2019 -0700
+++ b/make/jdk/src/classes/build/tools/charsetmapping/SBCS.java	Thu Oct 10 10:28:55 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,6 +46,7 @@
         String hisName = cs.hisName;
         String pkgName = cs.pkgName;
         boolean isASCII = cs.isASCII;
+        boolean isLatin1Decodable = true;
 
         StringBuilder b2cSB = new StringBuilder();
         StringBuilder b2cNRSB = new StringBuilder();
@@ -69,6 +70,9 @@
                 c2bOff += 0x100;
                 c2bIndex[e.cp>>8] = 1;
             }
+            if (e.cp > 0xFF) {
+                isLatin1Decodable = false;
+            }
         }
 
         Formatter fm = new Formatter(b2cSB);
@@ -178,6 +182,9 @@
             if (line.indexOf("$ASCIICOMPATIBLE$") != -1) {
                 line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false");
             }
+            if (line.indexOf("$LATIN1DECODABLE$") != -1) {
+                line = line.replace("$LATIN1DECODABLE$", isLatin1Decodable ? "true" : "false");
+            }
             if (line.indexOf("$B2CTABLE$") != -1) {
                 line = line.replace("$B2CTABLE$", b2c);
             }
--- a/src/java.base/share/classes/java/lang/StringCoding.java	Fri Oct 11 06:57:33 2019 -0700
+++ b/src/java.base/share/classes/java/lang/StringCoding.java	Thu Oct 10 10:28:55 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -191,6 +191,12 @@
                     return result.with(StringLatin1.inflate(ba, off, len), UTF16);
                 }
             }
+            // fastpath for always Latin1 decodable single byte
+            if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
+                byte[] dst = new byte[len];
+                ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
+                return result.with(dst, LATIN1);
+            }
             int en = scale(len, cd.maxCharsPerByte());
             char[] ca = new char[en];
             if (cd instanceof ArrayDecoder) {
@@ -278,6 +284,13 @@
             ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
             return decodeLatin1(ba, off, len);
         }
+        // fastpath for always Latin1 decodable single byte
+        if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
+            byte[] dst = new byte[len];
+            ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
+            return new Result().with(dst, LATIN1);
+        }
+
         int en = scale(len, cd.maxCharsPerByte());
         if (len == 0) {
             return new Result().with();
--- a/src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java	Fri Oct 11 06:57:33 2019 -0700
+++ b/src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java	Thu Oct 10 10:28:55 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,9 @@
 /*
  * FastPath byte[]->char[] decoder, REPLACE on malformed or
  * unmappable input.
+ *
+ * FastPath encoded byte[]-> "String Latin1 coding" byte[] decoder for use when
+ * charset is always decodable to the internal String Latin1 coding byte[], ie. all mappings <=0xff
  */
 
 public interface ArrayDecoder {
@@ -36,4 +39,14 @@
     default boolean isASCIICompatible() {
         return false;
     }
+
+    // Is always decodable to internal String Latin1 coding, ie. all mappings <= 0xff
+    default boolean isLatin1Decodable() {
+        return false;
+    }
+
+    // Decode to internal String Latin1 coding byte[] fastpath for when isLatin1Decodable == true
+    default int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) {
+        return 0;
+    }
 }
--- a/src/java.base/share/classes/sun/nio/cs/SingleByte.java	Fri Oct 11 06:57:33 2019 -0700
+++ b/src/java.base/share/classes/sun/nio/cs/SingleByte.java	Thu Oct 10 10:28:55 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,17 +50,27 @@
                                       implements ArrayDecoder {
         private final char[] b2c;
         private final boolean isASCIICompatible;
+        private final boolean isLatin1Decodable;
 
         public Decoder(Charset cs, char[] b2c) {
             super(cs, 1.0f, 1.0f);
             this.b2c = b2c;
             this.isASCIICompatible = false;
+            this.isLatin1Decodable = false;
         }
 
         public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible) {
             super(cs, 1.0f, 1.0f);
             this.b2c = b2c;
             this.isASCIICompatible = isASCIICompatible;
+            this.isLatin1Decodable = false;
+        }
+
+        public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible, boolean isLatin1Decodable) {
+            super(cs, 1.0f, 1.0f);
+            this.b2c = b2c;
+            this.isASCIICompatible = isASCIICompatible;
+            this.isLatin1Decodable = isLatin1Decodable;
         }
 
         private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
@@ -125,6 +135,18 @@
         }
 
         @Override
+        public int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) {
+            if (len > dst.length)
+                len = dst.length;
+
+            int dp = 0;
+            while (dp < len) {
+                dst[dp++] = (byte)decode(src[sp++]);
+            }
+            return dp;
+        }
+
+        @Override
         public int decode(byte[] src, int sp, int len, char[] dst) {
             if (len > dst.length)
                 len = dst.length;
@@ -143,6 +165,11 @@
         public boolean isASCIICompatible() {
             return isASCIICompatible;
         }
+
+        @Override
+        public boolean isLatin1Decodable() {
+            return isLatin1Decodable;
+        }
     }
 
     public static final class Encoder extends CharsetEncoder