8032012: String.toLowerCase/toUpperCase performance improvement
authorsherman
Fri, 07 Feb 2014 09:04:17 -0800
changeset 22943 f35dda1a2873
parent 22942 3e0d28137ea8
child 22944 a9e7afb60f4f
8032012: String.toLowerCase/toUpperCase performance improvement Summary: updated the implementation to improve the performance Reviewed-by: psandoz, forax
jdk/src/share/classes/java/lang/String.java
jdk/test/java/lang/String/ToLowerCase.java
--- a/jdk/src/share/classes/java/lang/String.java	Thu Feb 06 16:42:25 2014 -0800
+++ b/jdk/src/share/classes/java/lang/String.java	Fri Feb 07 09:04:17 2014 -0800
@@ -2549,87 +2549,88 @@
         if (locale == null) {
             throw new NullPointerException();
         }
-
-        int firstUpper;
+        int first;
+        boolean hasSurr = false;
         final int len = value.length;
 
-        /* Now check if there are any characters that need to be changed. */
-        scan: {
-            for (firstUpper = 0 ; firstUpper < len; ) {
-                char c = value[firstUpper];
-                if ((c >= Character.MIN_HIGH_SURROGATE)
-                        && (c <= Character.MAX_HIGH_SURROGATE)) {
-                    int supplChar = codePointAt(firstUpper);
-                    if (supplChar != Character.toLowerCase(supplChar)) {
-                        break scan;
-                    }
-                    firstUpper += Character.charCount(supplChar);
-                } else {
-                    if (c != Character.toLowerCase(c)) {
-                        break scan;
-                    }
-                    firstUpper++;
-                }
+        // Now check if there are any characters that need to be changed, or are surrogate
+        for (first = 0 ; first < len; first++) {
+            int cp = (int)value[first];
+            if (Character.isSurrogate((char)cp)) {
+                hasSurr = true;
+                break;
+            }
+            if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
+                break;
             }
+        }
+        if (first == len)
             return this;
+        char[] result = new char[len];
+        System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
+                                                       // lowerCase characters.
+        String lang = locale.getLanguage();
+        if (lang == "tr" || lang == "az" || lang == "lt") {
+            return toLowerCaseEx(result, first, locale, true);
+        }
+        if (hasSurr) {
+            return toLowerCaseEx(result, first, locale, false);
         }
-
-        char[] result = new char[len];
-        int resultOffset = 0;  /* result may grow, so i+resultOffset
-                                * is the write location in result */
-
-        /* Just copy the first few lowerCase characters. */
-        System.arraycopy(value, 0, result, 0, firstUpper);
+        for (int i = first; i < len; i++) {
+            int cp = (int)value[i];
+            if (cp == '\u03A3') {                       // GREEK CAPITAL LETTER SIGMA
+                return toLowerCaseEx(result, i, locale, false);
+            }
+            cp = Character.toLowerCase(cp);
+            if (!Character.isBmpCodePoint(cp)) {
+                return toLowerCaseEx(result, i, locale, false);
+            }
+            result[i] = (char)cp;
+        }
+        return new String(result, true);
+    }
 
-        String lang = locale.getLanguage();
-        boolean localeDependent =
-                (lang == "tr" || lang == "az" || lang == "lt");
-        char[] lowerCharArray;
-        int lowerChar;
-        int srcChar;
+    private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) {
+        int resultOffset = first;
         int srcCount;
-        for (int i = firstUpper; i < len; i += srcCount) {
-            srcChar = (int)value[i];
-            if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
-                    && (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
+        for (int i = first; i < value.length; i += srcCount) {
+            int srcChar = (int)value[i];
+            int lowerChar;
+            char[] lowerCharArray;
+            srcCount = 1;
+            if (Character.isSurrogate((char)srcChar)) {
                 srcChar = codePointAt(i);
                 srcCount = Character.charCount(srcChar);
-            } else {
-                srcCount = 1;
             }
             if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
             } else {
                 lowerChar = Character.toLowerCase(srcChar);
             }
-            if ((lowerChar == Character.ERROR)
-                    || (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
+            if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
+                result[resultOffset++] = (char)lowerChar;
+            } else {
                 if (lowerChar == Character.ERROR) {
-                    lowerCharArray =
-                            ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
+                    lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
                 } else if (srcCount == 2) {
-                    resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
+                    resultOffset += Character.toChars(lowerChar, result, resultOffset);
                     continue;
                 } else {
                     lowerCharArray = Character.toChars(lowerChar);
                 }
-
                 /* Grow result if needed */
                 int mapLen = lowerCharArray.length;
                 if (mapLen > srcCount) {
                     char[] result2 = new char[result.length + mapLen - srcCount];
-                    System.arraycopy(result, 0, result2, 0, i + resultOffset);
+                    System.arraycopy(result, 0, result2, 0, resultOffset);
                     result = result2;
                 }
                 for (int x = 0; x < mapLen; ++x) {
-                    result[i + resultOffset + x] = lowerCharArray[x];
+                    result[resultOffset++] = lowerCharArray[x];
                 }
-                resultOffset += (mapLen - srcCount);
-            } else {
-                result[i + resultOffset] = (char)lowerChar;
             }
         }
-        return new String(result, 0, len + resultOffset);
+        return new String(result, 0, resultOffset);
     }
 
     /**
@@ -2707,92 +2708,91 @@
         if (locale == null) {
             throw new NullPointerException();
         }
-
-        int firstLower;
+        int first;
+        boolean hasSurr = false;
         final int len = value.length;
 
-        /* Now check if there are any characters that need to be changed. */
-        scan: {
-            for (firstLower = 0 ; firstLower < len; ) {
-                int c = (int)value[firstLower];
-                int srcCount;
-                if ((c >= Character.MIN_HIGH_SURROGATE)
-                        && (c <= Character.MAX_HIGH_SURROGATE)) {
-                    c = codePointAt(firstLower);
-                    srcCount = Character.charCount(c);
-                } else {
-                    srcCount = 1;
-                }
-                int upperCaseChar = Character.toUpperCaseEx(c);
-                if ((upperCaseChar == Character.ERROR)
-                        || (c != upperCaseChar)) {
-                    break scan;
-                }
-                firstLower += srcCount;
+        // Now check if there are any characters that need to be changed, or are surrogate
+        for (first = 0 ; first < len; first++ ) {
+            int cp = (int)value[first];
+            if (Character.isSurrogate((char)cp)) {
+                hasSurr = true;
+                break;
             }
+            if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
+                break;
+            }
+        }
+        if (first == len) {
             return this;
         }
-
-        /* result may grow, so i+resultOffset is the write location in result */
-        int resultOffset = 0;
-        char[] result = new char[len]; /* may grow */
-
-        /* Just copy the first few upperCase characters. */
-        System.arraycopy(value, 0, result, 0, firstLower);
-
+        char[] result = new char[len];
+        System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
+                                                       // upperCase characters.
         String lang = locale.getLanguage();
-        boolean localeDependent =
-                (lang == "tr" || lang == "az" || lang == "lt");
-        char[] upperCharArray;
-        int upperChar;
-        int srcChar;
+        if (lang == "tr" || lang == "az" || lang == "lt") {
+            return toUpperCaseEx(result, first, locale, true);
+        }
+        if (hasSurr) {
+            return toUpperCaseEx(result, first, locale, false);
+        }
+        for (int i = first; i < len; i++) {
+            int cp = Character.toUpperCaseEx((int)value[i]);
+            if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
+                return toUpperCaseEx(result, i, locale, false);
+            }
+            result[i] = (char)cp;
+        }
+        return new String(result, true);
+    }
+
+    private String toUpperCaseEx(char[] result, int first, Locale locale,
+                                 boolean localeDependent) {
+        int resultOffset = first;
         int srcCount;
-        for (int i = firstLower; i < len; i += srcCount) {
-            srcChar = (int)value[i];
-            if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
-                (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
+        for (int i = first; i < value.length; i += srcCount) {
+            int srcChar = (int)value[i];
+            int upperChar;
+            char[] upperCharArray;
+            srcCount = 1;
+            if (Character.isSurrogate((char)srcChar)) {
                 srcChar = codePointAt(i);
                 srcCount = Character.charCount(srcChar);
-            } else {
-                srcCount = 1;
             }
             if (localeDependent) {
                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
             } else {
                 upperChar = Character.toUpperCaseEx(srcChar);
             }
-            if ((upperChar == Character.ERROR)
-                    || (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
+            if (Character.isBmpCodePoint(upperChar)) {
+                result[resultOffset++] = (char)upperChar;
+            } else {
                 if (upperChar == Character.ERROR) {
                     if (localeDependent) {
                         upperCharArray =
-                                ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
+                            ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
                     } else {
                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
                     }
                 } else if (srcCount == 2) {
-                    resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
+                    resultOffset += Character.toChars(upperChar, result, resultOffset);
                     continue;
                 } else {
                     upperCharArray = Character.toChars(upperChar);
                 }
-
                 /* Grow result if needed */
                 int mapLen = upperCharArray.length;
                 if (mapLen > srcCount) {
                     char[] result2 = new char[result.length + mapLen - srcCount];
-                    System.arraycopy(result, 0, result2, 0, i + resultOffset);
+                    System.arraycopy(result, 0, result2, 0, resultOffset);
                     result = result2;
-                }
-                for (int x = 0; x < mapLen; ++x) {
-                    result[i + resultOffset + x] = upperCharArray[x];
-                }
-                resultOffset += (mapLen - srcCount);
-            } else {
-                result[i + resultOffset] = (char)upperChar;
+                 }
+                 for (int x = 0; x < mapLen; ++x) {
+                    result[resultOffset++] = upperCharArray[x];
+                 }
             }
         }
-        return new String(result, 0, len + resultOffset);
+        return new String(result, 0, resultOffset);
     }
 
     /**
--- a/jdk/test/java/lang/String/ToLowerCase.java	Thu Feb 06 16:42:25 2014 -0800
+++ b/jdk/test/java/lang/String/ToLowerCase.java	Fri Feb 07 09:04:17 2014 -0800
@@ -23,7 +23,7 @@
 
 /*
     @test
-    @bug 4217441 4533872 4900935 8020037
+    @bug 4217441 4533872 4900935 8020037 8032012
     @summary toLowerCase should lower-case Greek Sigma correctly depending
              on the context (final/non-final).  Also it should handle
              Locale specific (lt, tr, and az) lowercasings and supplementary
@@ -104,6 +104,22 @@
         // invalid code point tests:
         test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
 
+        // test bmp + supp1
+        StringBuilder src = new StringBuilder(0x20000);
+        StringBuilder exp = new StringBuilder(0x20000);
+        for (int cp = 0; cp < 0x20000; cp++) {
+            if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
+                continue;
+            }
+            int lowerCase = Character.toLowerCase(cp);
+            if (lowerCase == -1) {    //Character.ERROR
+                continue;
+            }
+            src.appendCodePoint(cp);
+            exp.appendCodePoint(lowerCase);
+        }
+        test(src.toString(), Locale.US, exp.toString());
+
     }
 
     static void test(String in, Locale locale, String expected) {