8010316: Improve handling of char sequences containing surrogates
authormartin
Tue, 26 Mar 2013 13:36:51 -0700
changeset 16714 cb235d5f8bd4
parent 16713 7824dded9821
child 16715 bedceea6e559
8010316: Improve handling of char sequences containing surrogates Summary: Fix and optimize codePointAt, codePointBefore and similar methods Reviewed-by: sherman, okutsu, ulfzibis, kizune
jdk/src/share/classes/java/lang/AbstractStringBuilder.java
jdk/src/share/classes/java/lang/Character.java
jdk/test/java/lang/StringBuilder/Supplementary.java
--- a/jdk/src/share/classes/java/lang/AbstractStringBuilder.java	Tue Mar 26 13:34:54 2013 -0700
+++ b/jdk/src/share/classes/java/lang/AbstractStringBuilder.java	Tue Mar 26 13:36:51 2013 -0700
@@ -236,7 +236,7 @@
         if ((index < 0) || (index >= count)) {
             throw new StringIndexOutOfBoundsException(index);
         }
-        return Character.codePointAt(value, index);
+        return Character.codePointAtImpl(value, index, count);
     }
 
     /**
@@ -265,7 +265,7 @@
         if ((i < 0) || (i >= count)) {
             throw new StringIndexOutOfBoundsException(index);
         }
-        return Character.codePointBefore(value, index);
+        return Character.codePointBeforeImpl(value, index, 0);
     }
 
     /**
@@ -1370,32 +1370,37 @@
      * @return  a reference to this object.
      */
     public AbstractStringBuilder reverse() {
-        boolean hasSurrogate = false;
+        boolean hasSurrogates = false;
         int n = count - 1;
-        for (int j = (n-1) >> 1; j >= 0; --j) {
-            char temp = value[j];
-            char temp2 = value[n - j];
-            if (!hasSurrogate) {
-                hasSurrogate = (temp >= Character.MIN_SURROGATE && temp <= Character.MAX_SURROGATE)
-                    || (temp2 >= Character.MIN_SURROGATE && temp2 <= Character.MAX_SURROGATE);
+        for (int j = (n-1) >> 1; j >= 0; j--) {
+            int k = n - j;
+            char cj = value[j];
+            char ck = value[k];
+            value[j] = ck;
+            value[k] = cj;
+            if (Character.isSurrogate(cj) ||
+                Character.isSurrogate(ck)) {
+                hasSurrogates = true;
             }
-            value[j] = temp2;
-            value[n - j] = temp;
+        }
+        if (hasSurrogates) {
+            reverseAllValidSurrogatePairs();
         }
-        if (hasSurrogate) {
-            // Reverse back all valid surrogate pairs
-            for (int i = 0; i < count - 1; i++) {
-                char c2 = value[i];
-                if (Character.isLowSurrogate(c2)) {
-                    char c1 = value[i + 1];
-                    if (Character.isHighSurrogate(c1)) {
-                        value[i++] = c1;
-                        value[i] = c2;
-                    }
+        return this;
+    }
+
+    /** Outlined helper method for reverse() */
+    private void reverseAllValidSurrogatePairs() {
+        for (int i = 0; i < count - 1; i++) {
+            char c2 = value[i];
+            if (Character.isLowSurrogate(c2)) {
+                char c1 = value[i + 1];
+                if (Character.isHighSurrogate(c1)) {
+                    value[i++] = c1;
+                    value[i] = c2;
                 }
             }
         }
-        return this;
     }
 
     /**
--- a/jdk/src/share/classes/java/lang/Character.java	Tue Mar 26 13:34:54 2013 -0700
+++ b/jdk/src/share/classes/java/lang/Character.java	Tue Mar 26 13:36:51 2013 -0700
@@ -4862,13 +4862,11 @@
      * @since  1.5
      */
     public static int codePointAt(CharSequence seq, int index) {
-        char c1 = seq.charAt(index++);
-        if (isHighSurrogate(c1)) {
-            if (index < seq.length()) {
-                char c2 = seq.charAt(index);
-                if (isLowSurrogate(c2)) {
-                    return toCodePoint(c1, c2);
-                }
+        char c1 = seq.charAt(index);
+        if (isHighSurrogate(c1) && ++index < seq.length()) {
+            char c2 = seq.charAt(index);
+            if (isLowSurrogate(c2)) {
+                return toCodePoint(c1, c2);
             }
         }
         return c1;
@@ -4931,15 +4929,13 @@
         return codePointAtImpl(a, index, limit);
     }
 
-    // throws ArrayIndexOutofBoundsException if index out of bounds
+    // throws ArrayIndexOutOfBoundsException if index out of bounds
     static int codePointAtImpl(char[] a, int index, int limit) {
-        char c1 = a[index++];
-        if (isHighSurrogate(c1)) {
-            if (index < limit) {
-                char c2 = a[index];
-                if (isLowSurrogate(c2)) {
-                    return toCodePoint(c1, c2);
-                }
+        char c1 = a[index];
+        if (isHighSurrogate(c1) && ++index < limit) {
+            char c2 = a[index];
+            if (isLowSurrogate(c2)) {
+                return toCodePoint(c1, c2);
             }
         }
         return c1;
@@ -4968,12 +4964,10 @@
      */
     public static int codePointBefore(CharSequence seq, int index) {
         char c2 = seq.charAt(--index);
-        if (isLowSurrogate(c2)) {
-            if (index > 0) {
-                char c1 = seq.charAt(--index);
-                if (isHighSurrogate(c1)) {
-                    return toCodePoint(c1, c2);
-                }
+        if (isLowSurrogate(c2) && index > 0) {
+            char c1 = seq.charAt(--index);
+            if (isHighSurrogate(c1)) {
+                return toCodePoint(c1, c2);
             }
         }
         return c2;
@@ -5038,15 +5032,13 @@
         return codePointBeforeImpl(a, index, start);
     }
 
-    // throws ArrayIndexOutofBoundsException if index-1 out of bounds
+    // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
     static int codePointBeforeImpl(char[] a, int index, int start) {
         char c2 = a[--index];
-        if (isLowSurrogate(c2)) {
-            if (index > start) {
-                char c1 = a[--index];
-                if (isHighSurrogate(c1)) {
-                    return toCodePoint(c1, c2);
-                }
+        if (isLowSurrogate(c2) && index > start) {
+            char c1 = a[--index];
+            if (isHighSurrogate(c1)) {
+                return toCodePoint(c1, c2);
             }
         }
         return c2;
--- a/jdk/test/java/lang/StringBuilder/Supplementary.java	Tue Mar 26 13:34:54 2013 -0700
+++ b/jdk/test/java/lang/StringBuilder/Supplementary.java	Tue Mar 26 13:36:51 2013 -0700
@@ -37,6 +37,7 @@
         test4();        // Test for appendCodePoint(int codePoint)
         test5();        // Test for codePointCount(int beginIndex, int endIndex)
         test6();        // Test for offsetByCodePoints(int index, int offset)
+        testDontReadOutOfBoundsTrailingSurrogate();
     }
 
     /* Text strings which are used as input data.
@@ -305,6 +306,19 @@
         }
     }
 
+    static void testDontReadOutOfBoundsTrailingSurrogate() {
+        StringBuilder sb = new StringBuilder();
+        int suppl = Character.MIN_SUPPLEMENTARY_CODE_POINT;
+        sb.appendCodePoint(suppl);
+        check(sb.codePointAt(0) != (int) suppl,
+              "codePointAt(0)", sb.codePointAt(0), suppl);
+        check(sb.length() != 2, "sb.length()");
+        sb.setLength(1);
+        check(sb.length() != 1, "sb.length()");
+        check(sb.codePointAt(0) != Character.highSurrogate(suppl),
+              "codePointAt(0)",
+              sb.codePointAt(0), Character.highSurrogate(suppl));
+    }
 
     static final boolean At = true, Before = false;