8224986: (str) optimize StringBuilder.append(CharSequence, int, int) for String arguments
Reviewed-by: jlaskey, rriggs
--- a/src/java.base/share/classes/java/lang/AbstractStringBuilder.java Fri May 31 10:13:24 2019 +0200
+++ b/src/java.base/share/classes/java/lang/AbstractStringBuilder.java Fri May 31 12:20:21 2019 +0200
@@ -26,6 +26,7 @@
package java.lang;
import jdk.internal.math.FloatingDecimal;
+
import java.util.Arrays;
import java.util.Spliterator;
import java.util.stream.IntStream;
@@ -685,10 +686,15 @@
checkRange(start, end, s.length());
int len = end - start;
ensureCapacityInternal(count + len);
- appendChars(s, start, end);
+ if (s instanceof String) {
+ appendChars((String)s, start, end);
+ } else {
+ appendChars(s, start, end);
+ }
return this;
}
+
/**
* Appends the string representation of the {@code char} array
* argument to this sequence.
@@ -1743,6 +1749,35 @@
this.count = count + end - off;
}
+ private final void appendChars(String s, int off, int end) {
+ if (isLatin1()) {
+ if (s.isLatin1()) {
+ System.arraycopy(s.value(), off, this.value, this.count, end - off);
+ } else {
+ // We might need to inflate, but do it as late as possible since
+ // the range of characters we're copying might all be latin1
+ byte[] val = this.value;
+ for (int i = off, j = count; i < end; i++) {
+ char c = s.charAt(i);
+ if (StringLatin1.canEncode(c)) {
+ val[j++] = (byte) c;
+ } else {
+ count = j;
+ inflate();
+ System.arraycopy(s.value(), i << UTF16, this.value, j << UTF16, (end - i) << UTF16);
+ count += end - i;
+ return;
+ }
+ }
+ }
+ } else if (s.isLatin1()) {
+ StringUTF16.putCharsSB(this.value, this.count, s, off, end);
+ } else { // both UTF16
+ System.arraycopy(s.value(), off << UTF16, this.value, this.count << UTF16, (end - off) << UTF16);
+ }
+ count += end - off;
+ }
+
private final void appendChars(CharSequence s, int off, int end) {
if (isLatin1()) {
byte[] val = this.value;
--- a/src/java.base/share/classes/java/lang/String.java Fri May 31 10:13:24 2019 +0200
+++ b/src/java.base/share/classes/java/lang/String.java Fri May 31 12:20:21 2019 +0200
@@ -3386,7 +3386,7 @@
return value;
}
- private boolean isLatin1() {
+ boolean isLatin1() {
return COMPACT_STRINGS && coder == LATIN1;
}
--- a/test/micro/org/openjdk/bench/java/lang/StringBuilders.java Fri May 31 10:13:24 2019 +0200
+++ b/test/micro/org/openjdk/bench/java/lang/StringBuilders.java Fri May 31 12:20:21 2019 +0200
@@ -26,6 +26,7 @@
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
@@ -59,25 +60,21 @@
sbUtf16 = new StringBuilder("UTF-\uFF11\uFF16 string");
}
- /** StringBuilder wins over StringMaker. */
@Benchmark
public String concat3p4p2() throws Exception {
return new StringBuilder(String.valueOf(str3p4p2[0])).append(str3p4p2[1]).append(str3p4p2[2]).toString();
}
- /** StringBuilder wins over StringMaker. */
@Benchmark
public String concat16p8p7() throws Exception {
return new StringBuilder(String.valueOf(str16p8p7[0])).append(str16p8p7[1]).append(str16p8p7[2]).toString();
}
- /** StringMaker wins over StringBuilder since the two last strings causes StringBuilder to do expand. */
@Benchmark
public String concat3p9p8() throws Exception {
return new StringBuilder(String.valueOf(str3p9p8[0])).append(str3p9p8[1]).append(str3p9p8[2]).toString();
}
- /** StringMaker wins over StringBuilder. */
@Benchmark
public String concat22p40p31() throws Exception {
return new StringBuilder(String.valueOf(str22p40p31[0])).append(str22p40p31[1]).append(str22p40p31[2]).toString();
@@ -280,4 +277,104 @@
public StringBuilder fromUtf16StringBuilder() {
return new StringBuilder(sbUtf16);
}
+
+ @Benchmark
+ @SuppressWarnings("StringBufferReplaceableByString")
+ public String appendSubstring(Data data) {
+ String str = data.str;
+ int beginIndex = data.beginIndex;
+ int endIndex = data.endIndex;
+
+ String substring = str.substring(beginIndex, endIndex);
+ return new StringBuilder().append('L').append(substring).append(';').toString();
+ }
+
+ @Benchmark
+ public String appendBounds(Data data) {
+ String str = data.str;
+ int beginIndex = data.beginIndex;
+ int endIndex = data.endIndex;
+
+ return new StringBuilder().append('L').append(str, beginIndex, endIndex).append(';').toString();
+ }
+
+ @Benchmark
+ @SuppressWarnings("StringBufferReplaceableByString")
+ public String appendSubstringUtf16(Data data) {
+ String str = data.utf16Str;
+ int beginIndex = data.beginIndex;
+ int endIndex = data.endIndex;
+
+ String substring = str.substring(beginIndex, endIndex);
+
+ return new StringBuilder().append('L').append(substring).append(';').toString();
+ }
+
+ @Benchmark
+ public String appendBoundsUtf16(Data data) {
+ String str = data.utf16Str;
+ int beginIndex = data.beginIndex;
+ int endIndex = data.endIndex;
+
+ return new StringBuilder().append('L').append(str, beginIndex,
+ endIndex).append(';').toString();
+ }
+
+ @Benchmark
+ public String appendBoundsMix(Data data) {
+ CharSequence str = data.next();
+ int beginIndex = data.beginIndex;
+ int endIndex = data.endIndex;
+
+ return new StringBuilder().append('L').append(str, beginIndex,
+ endIndex).append(';').toString();
+ }
+
+ @State(Scope.Thread)
+ public static class Data {
+ int i = 0;
+
+ public CharSequence next() {
+ i++;
+ if (i == 1) {
+ return str;
+ } else if (i == 2) {
+ return utf16Str;
+ } else {
+ i = 0;
+ return cs;
+ }
+ }
+
+ String str;
+ String utf16Str;
+ CharSequence cs;
+
+ @Param({"10", "1000"})
+ private int length;
+
+ private int beginIndex;
+ private int endIndex;
+
+ @Setup
+ public void setup() {
+ generateData();
+ beginIndex = length / 4;
+ endIndex = length / 4 * 3;
+ }
+
+ private void generateData() {
+ char[] chars = "abcdefghijklmnopqrstuvwxyz0123456789".toCharArray();
+
+ StringBuilder sb = new StringBuilder(length);
+ for (int i = 0; i < length; i++) {
+ char c = chars[i % chars.length];
+ sb.append(c);
+ }
+ str = sb.toString();
+ sb.replace(length / 4 * 2, length / 4 * 2 + 1, "\u04FF");
+ utf16Str = sb.toString();
+ cs = new StringBuilder(str);
+ }
+ }
}