# HG changeset patch # User aleonard # Date 1570699735 -3600 # Node ID 3968bf3673c50327115373159982874a82f76f56 # Parent 5a4b4544b810c18dd796d2f3853055031fd0af1a 8231717: Improve performance of charset decoding when charset is always compactable Reviewed-by: rriggs, redestad, alanb diff -r 5a4b4544b810 -r 3968bf3673c5 make/data/charsetmapping/SingleByte-X.java.template --- a/make/data/charsetmapping/SingleByte-X.java.template Fri Oct 11 06:57:33 2019 -0700 +++ b/make/data/charsetmapping/SingleByte-X.java.template Thu Oct 10 10:28:55 2019 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,7 +48,7 @@ } public CharsetDecoder newDecoder() { - return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$); + return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$, $LATIN1DECODABLE$); } public CharsetEncoder newEncoder() { diff -r 5a4b4544b810 -r 3968bf3673c5 make/jdk/src/classes/build/tools/charsetmapping/SBCS.java --- a/make/jdk/src/classes/build/tools/charsetmapping/SBCS.java Fri Oct 11 06:57:33 2019 -0700 +++ b/make/jdk/src/classes/build/tools/charsetmapping/SBCS.java Thu Oct 10 10:28:55 2019 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,6 +46,7 @@ String hisName = cs.hisName; String pkgName = cs.pkgName; boolean isASCII = cs.isASCII; + boolean isLatin1Decodable = true; StringBuilder b2cSB = new StringBuilder(); StringBuilder b2cNRSB = new StringBuilder(); @@ -69,6 +70,9 @@ c2bOff += 0x100; c2bIndex[e.cp>>8] = 1; } + if (e.cp > 0xFF) { + isLatin1Decodable = false; + } } Formatter fm = new Formatter(b2cSB); @@ -178,6 +182,9 @@ if (line.indexOf("$ASCIICOMPATIBLE$") != -1) { line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false"); } + if (line.indexOf("$LATIN1DECODABLE$") != -1) { + line = line.replace("$LATIN1DECODABLE$", isLatin1Decodable ? "true" : "false"); + } if (line.indexOf("$B2CTABLE$") != -1) { line = line.replace("$B2CTABLE$", b2c); } diff -r 5a4b4544b810 -r 3968bf3673c5 src/java.base/share/classes/java/lang/StringCoding.java --- a/src/java.base/share/classes/java/lang/StringCoding.java Fri Oct 11 06:57:33 2019 -0700 +++ b/src/java.base/share/classes/java/lang/StringCoding.java Thu Oct 10 10:28:55 2019 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -191,6 +191,12 @@ return result.with(StringLatin1.inflate(ba, off, len), UTF16); } } + // fastpath for always Latin1 decodable single byte + if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) { + byte[] dst = new byte[len]; + ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst); + return result.with(dst, LATIN1); + } int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; if (cd instanceof ArrayDecoder) { @@ -278,6 +284,13 @@ ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) { return decodeLatin1(ba, off, len); } + // fastpath for always Latin1 decodable single byte + if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) { + byte[] dst = new byte[len]; + ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst); + return new Result().with(dst, LATIN1); + } + int en = scale(len, cd.maxCharsPerByte()); if (len == 0) { return new Result().with(); diff -r 5a4b4544b810 -r 3968bf3673c5 src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java --- a/src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java Fri Oct 11 06:57:33 2019 -0700 +++ b/src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java Thu Oct 10 10:28:55 2019 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,9 @@ /* * FastPath byte[]->char[] decoder, REPLACE on malformed or * unmappable input. + * + * FastPath encoded byte[]-> "String Latin1 coding" byte[] decoder for use when + * charset is always decodable to the internal String Latin1 coding byte[], ie. all mappings <=0xff */ public interface ArrayDecoder { @@ -36,4 +39,14 @@ default boolean isASCIICompatible() { return false; } + + // Is always decodable to internal String Latin1 coding, ie. all mappings <= 0xff + default boolean isLatin1Decodable() { + return false; + } + + // Decode to internal String Latin1 coding byte[] fastpath for when isLatin1Decodable == true + default int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) { + return 0; + } } diff -r 5a4b4544b810 -r 3968bf3673c5 src/java.base/share/classes/sun/nio/cs/SingleByte.java --- a/src/java.base/share/classes/sun/nio/cs/SingleByte.java Fri Oct 11 06:57:33 2019 -0700 +++ b/src/java.base/share/classes/sun/nio/cs/SingleByte.java Thu Oct 10 10:28:55 2019 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,17 +50,27 @@ implements ArrayDecoder { private final char[] b2c; private final boolean isASCIICompatible; + private final boolean isLatin1Decodable; public Decoder(Charset cs, char[] b2c) { super(cs, 1.0f, 1.0f); this.b2c = b2c; this.isASCIICompatible = false; + this.isLatin1Decodable = false; } public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible) { super(cs, 1.0f, 1.0f); this.b2c = b2c; this.isASCIICompatible = isASCIICompatible; + this.isLatin1Decodable = false; + } + + public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible, boolean isLatin1Decodable) { + super(cs, 1.0f, 1.0f); + this.b2c = b2c; + this.isASCIICompatible = isASCIICompatible; + this.isLatin1Decodable = isLatin1Decodable; } private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { @@ -125,6 +135,18 @@ } @Override + public int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) { + if (len > dst.length) + len = dst.length; + + int dp = 0; + while (dp < len) { + dst[dp++] = (byte)decode(src[sp++]); + } + return dp; + } + + @Override public int decode(byte[] src, int sp, int len, char[] dst) { if (len > dst.length) len = dst.length; @@ -143,6 +165,11 @@ public boolean isASCIICompatible() { return isASCIICompatible; } + + @Override + public boolean isLatin1Decodable() { + return isLatin1Decodable; + } } public static final class Encoder extends CharsetEncoder