8224986: (str) optimize StringBuilder.append(CharSequence, int, int) for String arguments
authorredestad
Fri, 31 May 2019 12:20:21 +0200
changeset 55124 f91999057a5a
parent 55123 7b70f269fe0d
child 55125 89ac8ad48ac6
8224986: (str) optimize StringBuilder.append(CharSequence, int, int) for String arguments Reviewed-by: jlaskey, rriggs
src/java.base/share/classes/java/lang/AbstractStringBuilder.java
src/java.base/share/classes/java/lang/String.java
test/micro/org/openjdk/bench/java/lang/StringBuilders.java
--- a/src/java.base/share/classes/java/lang/AbstractStringBuilder.java	Fri May 31 10:13:24 2019 +0200
+++ b/src/java.base/share/classes/java/lang/AbstractStringBuilder.java	Fri May 31 12:20:21 2019 +0200
@@ -26,6 +26,7 @@
 package java.lang;
 
 import jdk.internal.math.FloatingDecimal;
+
 import java.util.Arrays;
 import java.util.Spliterator;
 import java.util.stream.IntStream;
@@ -685,10 +686,15 @@
         checkRange(start, end, s.length());
         int len = end - start;
         ensureCapacityInternal(count + len);
-        appendChars(s, start, end);
+        if (s instanceof String) {
+            appendChars((String)s, start, end);
+        } else {
+            appendChars(s, start, end);
+        }
         return this;
     }
 
+
     /**
      * Appends the string representation of the {@code char} array
      * argument to this sequence.
@@ -1743,6 +1749,35 @@
         this.count = count + end - off;
     }
 
+    private final void appendChars(String s, int off, int end) {
+        if (isLatin1()) {
+            if (s.isLatin1()) {
+                System.arraycopy(s.value(), off, this.value, this.count, end - off);
+            } else {
+                // We might need to inflate, but do it as late as possible since
+                // the range of characters we're copying might all be latin1
+                byte[] val = this.value;
+                for (int i = off, j = count; i < end; i++) {
+                    char c = s.charAt(i);
+                    if (StringLatin1.canEncode(c)) {
+                        val[j++] = (byte) c;
+                    } else {
+                        count = j;
+                        inflate();
+                        System.arraycopy(s.value(), i << UTF16, this.value, j << UTF16, (end - i) << UTF16);
+                        count += end - i;
+                        return;
+                    }
+                }
+            }
+        } else if (s.isLatin1()) {
+            StringUTF16.putCharsSB(this.value, this.count, s, off, end);
+        } else { // both UTF16
+            System.arraycopy(s.value(), off << UTF16, this.value, this.count << UTF16, (end - off) << UTF16);
+        }
+        count += end - off;
+    }
+
     private final void appendChars(CharSequence s, int off, int end) {
         if (isLatin1()) {
             byte[] val = this.value;
--- a/src/java.base/share/classes/java/lang/String.java	Fri May 31 10:13:24 2019 +0200
+++ b/src/java.base/share/classes/java/lang/String.java	Fri May 31 12:20:21 2019 +0200
@@ -3386,7 +3386,7 @@
         return value;
     }
 
-    private boolean isLatin1() {
+    boolean isLatin1() {
         return COMPACT_STRINGS && coder == LATIN1;
     }
 
--- a/test/micro/org/openjdk/bench/java/lang/StringBuilders.java	Fri May 31 10:13:24 2019 +0200
+++ b/test/micro/org/openjdk/bench/java/lang/StringBuilders.java	Fri May 31 12:20:21 2019 +0200
@@ -26,6 +26,7 @@
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
@@ -59,25 +60,21 @@
         sbUtf16 = new StringBuilder("UTF-\uFF11\uFF16 string");
     }
 
-    /** StringBuilder wins over StringMaker. */
     @Benchmark
     public String concat3p4p2() throws Exception {
         return new StringBuilder(String.valueOf(str3p4p2[0])).append(str3p4p2[1]).append(str3p4p2[2]).toString();
     }
 
-    /** StringBuilder wins over StringMaker. */
     @Benchmark
     public String concat16p8p7() throws Exception {
         return new StringBuilder(String.valueOf(str16p8p7[0])).append(str16p8p7[1]).append(str16p8p7[2]).toString();
     }
 
-    /** StringMaker wins over StringBuilder since the two last strings causes StringBuilder to do expand. */
     @Benchmark
     public String concat3p9p8() throws Exception {
         return new StringBuilder(String.valueOf(str3p9p8[0])).append(str3p9p8[1]).append(str3p9p8[2]).toString();
     }
 
-    /** StringMaker wins over StringBuilder. */
     @Benchmark
     public String concat22p40p31() throws Exception {
         return new StringBuilder(String.valueOf(str22p40p31[0])).append(str22p40p31[1]).append(str22p40p31[2]).toString();
@@ -280,4 +277,104 @@
     public StringBuilder fromUtf16StringBuilder() {
         return new StringBuilder(sbUtf16);
     }
+
+    @Benchmark
+    @SuppressWarnings("StringBufferReplaceableByString")
+    public String appendSubstring(Data data) {
+        String str = data.str;
+        int beginIndex = data.beginIndex;
+        int endIndex = data.endIndex;
+
+        String substring = str.substring(beginIndex, endIndex);
+        return new StringBuilder().append('L').append(substring).append(';').toString();
+    }
+
+    @Benchmark
+    public String appendBounds(Data data) {
+        String str = data.str;
+        int beginIndex = data.beginIndex;
+        int endIndex = data.endIndex;
+
+        return new StringBuilder().append('L').append(str, beginIndex, endIndex).append(';').toString();
+    }
+
+    @Benchmark
+    @SuppressWarnings("StringBufferReplaceableByString")
+    public String appendSubstringUtf16(Data data) {
+        String str = data.utf16Str;
+        int beginIndex = data.beginIndex;
+        int endIndex = data.endIndex;
+
+        String substring = str.substring(beginIndex, endIndex);
+
+        return new StringBuilder().append('L').append(substring).append(';').toString();
+    }
+
+    @Benchmark
+    public String appendBoundsUtf16(Data data) {
+        String str = data.utf16Str;
+        int beginIndex = data.beginIndex;
+        int endIndex = data.endIndex;
+
+        return new StringBuilder().append('L').append(str, beginIndex,
+                endIndex).append(';').toString();
+    }
+
+    @Benchmark
+    public String appendBoundsMix(Data data) {
+        CharSequence str = data.next();
+        int beginIndex = data.beginIndex;
+        int endIndex = data.endIndex;
+
+        return new StringBuilder().append('L').append(str, beginIndex,
+                endIndex).append(';').toString();
+    }
+
+    @State(Scope.Thread)
+    public static class Data {
+        int i = 0;
+
+        public CharSequence next() {
+            i++;
+            if (i == 1) {
+                return str;
+            } else if (i == 2) {
+                return utf16Str;
+            } else {
+                i = 0;
+                return cs;
+            }
+        }
+
+        String str;
+        String utf16Str;
+        CharSequence cs;
+
+        @Param({"10", "1000"})
+        private int length;
+
+        private int beginIndex;
+        private int endIndex;
+
+        @Setup
+        public void setup() {
+            generateData();
+            beginIndex = length / 4;
+            endIndex = length / 4 * 3;
+        }
+
+        private void generateData() {
+            char[] chars = "abcdefghijklmnopqrstuvwxyz0123456789".toCharArray();
+
+            StringBuilder sb = new StringBuilder(length);
+            for (int i = 0; i < length; i++) {
+                char c = chars[i % chars.length];
+                sb.append(c);
+            }
+            str = sb.toString();
+            sb.replace(length / 4 * 2, length / 4 * 2 + 1, "\u04FF");
+            utf16Str = sb.toString();
+            cs = new StringBuilder(str);
+        }
+    }
 }