8010316: Improve handling of char sequences containing surrogates
Summary: Fix and optimize codePointAt, codePointBefore and similar methods
Reviewed-by: sherman, okutsu, ulfzibis, kizune
--- a/jdk/src/share/classes/java/lang/AbstractStringBuilder.java Tue Mar 26 13:34:54 2013 -0700
+++ b/jdk/src/share/classes/java/lang/AbstractStringBuilder.java Tue Mar 26 13:36:51 2013 -0700
@@ -236,7 +236,7 @@
if ((index < 0) || (index >= count)) {
throw new StringIndexOutOfBoundsException(index);
}
- return Character.codePointAt(value, index);
+ return Character.codePointAtImpl(value, index, count);
}
/**
@@ -265,7 +265,7 @@
if ((i < 0) || (i >= count)) {
throw new StringIndexOutOfBoundsException(index);
}
- return Character.codePointBefore(value, index);
+ return Character.codePointBeforeImpl(value, index, 0);
}
/**
@@ -1370,32 +1370,37 @@
* @return a reference to this object.
*/
public AbstractStringBuilder reverse() {
- boolean hasSurrogate = false;
+ boolean hasSurrogates = false;
int n = count - 1;
- for (int j = (n-1) >> 1; j >= 0; --j) {
- char temp = value[j];
- char temp2 = value[n - j];
- if (!hasSurrogate) {
- hasSurrogate = (temp >= Character.MIN_SURROGATE && temp <= Character.MAX_SURROGATE)
- || (temp2 >= Character.MIN_SURROGATE && temp2 <= Character.MAX_SURROGATE);
+ for (int j = (n-1) >> 1; j >= 0; j--) {
+ int k = n - j;
+ char cj = value[j];
+ char ck = value[k];
+ value[j] = ck;
+ value[k] = cj;
+ if (Character.isSurrogate(cj) ||
+ Character.isSurrogate(ck)) {
+ hasSurrogates = true;
}
- value[j] = temp2;
- value[n - j] = temp;
+ }
+ if (hasSurrogates) {
+ reverseAllValidSurrogatePairs();
}
- if (hasSurrogate) {
- // Reverse back all valid surrogate pairs
- for (int i = 0; i < count - 1; i++) {
- char c2 = value[i];
- if (Character.isLowSurrogate(c2)) {
- char c1 = value[i + 1];
- if (Character.isHighSurrogate(c1)) {
- value[i++] = c1;
- value[i] = c2;
- }
+ return this;
+ }
+
+ /** Outlined helper method for reverse() */
+ private void reverseAllValidSurrogatePairs() {
+ for (int i = 0; i < count - 1; i++) {
+ char c2 = value[i];
+ if (Character.isLowSurrogate(c2)) {
+ char c1 = value[i + 1];
+ if (Character.isHighSurrogate(c1)) {
+ value[i++] = c1;
+ value[i] = c2;
}
}
}
- return this;
}
/**
--- a/jdk/src/share/classes/java/lang/Character.java Tue Mar 26 13:34:54 2013 -0700
+++ b/jdk/src/share/classes/java/lang/Character.java Tue Mar 26 13:36:51 2013 -0700
@@ -4862,13 +4862,11 @@
* @since 1.5
*/
public static int codePointAt(CharSequence seq, int index) {
- char c1 = seq.charAt(index++);
- if (isHighSurrogate(c1)) {
- if (index < seq.length()) {
- char c2 = seq.charAt(index);
- if (isLowSurrogate(c2)) {
- return toCodePoint(c1, c2);
- }
+ char c1 = seq.charAt(index);
+ if (isHighSurrogate(c1) && ++index < seq.length()) {
+ char c2 = seq.charAt(index);
+ if (isLowSurrogate(c2)) {
+ return toCodePoint(c1, c2);
}
}
return c1;
@@ -4931,15 +4929,13 @@
return codePointAtImpl(a, index, limit);
}
- // throws ArrayIndexOutofBoundsException if index out of bounds
+ // throws ArrayIndexOutOfBoundsException if index out of bounds
static int codePointAtImpl(char[] a, int index, int limit) {
- char c1 = a[index++];
- if (isHighSurrogate(c1)) {
- if (index < limit) {
- char c2 = a[index];
- if (isLowSurrogate(c2)) {
- return toCodePoint(c1, c2);
- }
+ char c1 = a[index];
+ if (isHighSurrogate(c1) && ++index < limit) {
+ char c2 = a[index];
+ if (isLowSurrogate(c2)) {
+ return toCodePoint(c1, c2);
}
}
return c1;
@@ -4968,12 +4964,10 @@
*/
public static int codePointBefore(CharSequence seq, int index) {
char c2 = seq.charAt(--index);
- if (isLowSurrogate(c2)) {
- if (index > 0) {
- char c1 = seq.charAt(--index);
- if (isHighSurrogate(c1)) {
- return toCodePoint(c1, c2);
- }
+ if (isLowSurrogate(c2) && index > 0) {
+ char c1 = seq.charAt(--index);
+ if (isHighSurrogate(c1)) {
+ return toCodePoint(c1, c2);
}
}
return c2;
@@ -5038,15 +5032,13 @@
return codePointBeforeImpl(a, index, start);
}
- // throws ArrayIndexOutofBoundsException if index-1 out of bounds
+ // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
static int codePointBeforeImpl(char[] a, int index, int start) {
char c2 = a[--index];
- if (isLowSurrogate(c2)) {
- if (index > start) {
- char c1 = a[--index];
- if (isHighSurrogate(c1)) {
- return toCodePoint(c1, c2);
- }
+ if (isLowSurrogate(c2) && index > start) {
+ char c1 = a[--index];
+ if (isHighSurrogate(c1)) {
+ return toCodePoint(c1, c2);
}
}
return c2;
--- a/jdk/test/java/lang/StringBuilder/Supplementary.java Tue Mar 26 13:34:54 2013 -0700
+++ b/jdk/test/java/lang/StringBuilder/Supplementary.java Tue Mar 26 13:36:51 2013 -0700
@@ -37,6 +37,7 @@
test4(); // Test for appendCodePoint(int codePoint)
test5(); // Test for codePointCount(int beginIndex, int endIndex)
test6(); // Test for offsetByCodePoints(int index, int offset)
+ testDontReadOutOfBoundsTrailingSurrogate();
}
/* Text strings which are used as input data.
@@ -305,6 +306,19 @@
}
}
+ static void testDontReadOutOfBoundsTrailingSurrogate() {
+ StringBuilder sb = new StringBuilder();
+ int suppl = Character.MIN_SUPPLEMENTARY_CODE_POINT;
+ sb.appendCodePoint(suppl);
+ check(sb.codePointAt(0) != (int) suppl,
+ "codePointAt(0)", sb.codePointAt(0), suppl);
+ check(sb.length() != 2, "sb.length()");
+ sb.setLength(1);
+ check(sb.length() != 1, "sb.length()");
+ check(sb.codePointAt(0) != Character.highSurrogate(suppl),
+ "codePointAt(0)",
+ sb.codePointAt(0), Character.highSurrogate(suppl));
+ }
static final boolean At = true, Before = false;