8007395: StringIndexOutofBoundsException in Match.find() when input String contains surrogate UTF-16 characters
authorsherman
Fri, 26 Apr 2013 13:59:10 -0700
changeset 17183 1e21e0bc10e7
parent 17182 b786c0de868c
child 17187 692bf32a1f26
8007395: StringIndexOutofBoundsException in Match.find() when input String contains surrogate UTF-16 characters Summary: updated GroupCurly.match0() to backtrack correctly Reviewed-by: mchung
jdk/src/share/classes/java/util/regex/Pattern.java
jdk/test/java/util/regex/RegExTest.java
--- a/jdk/src/share/classes/java/util/regex/Pattern.java	Wed Apr 24 16:15:47 2013 -0700
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java	Fri Apr 26 13:59:10 2013 -0700
@@ -4334,7 +4334,6 @@
                 info.deterministic = detm;
             else
                 info.deterministic = false;
-
             return next.study(info);
         }
     }
@@ -4415,6 +4414,8 @@
         }
         // Aggressive group match
         boolean match0(Matcher matcher, int i, int j, CharSequence seq) {
+            // don't back off passing the starting "j"
+            int min = j;
             int[] groups = matcher.groups;
             int save0 = 0;
             int save1 = 0;
@@ -4452,7 +4453,7 @@
                         break;
                     }
                 }
-                while (j > cmin) {
+                while (j > min) {
                     if (next.match(matcher, i, seq)) {
                         if (capture) {
                             groups[groupIndex+1] = i;
@@ -4544,7 +4545,6 @@
             } else {
                 info.deterministic = false;
             }
-
             return next.study(info);
         }
     }
--- a/jdk/test/java/util/regex/RegExTest.java	Wed Apr 24 16:15:47 2013 -0700
+++ b/jdk/test/java/util/regex/RegExTest.java	Fri Apr 26 13:59:10 2013 -0700
@@ -33,7 +33,7 @@
  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
- * 7067045 7014640 7189363
+ * 7067045 7014640 7189363 8007395
  */
 
 import java.util.regex.*;
@@ -144,6 +144,7 @@
         horizontalAndVerticalWSTest();
         linebreakTest();
         branchTest();
+        groupCurlyNotFoundSuppTest();
         if (failure) {
             throw new
                 RuntimeException("RegExTest failed, 1st failure: " +
@@ -3947,4 +3948,27 @@
         report("branchTest");
     }
 
+    // This test is for 8007395
+    private static void groupCurlyNotFoundSuppTest() throws Exception {
+        String input = "test this as \ud83d\ude0d";
+        for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
+                                          "test(.)*(@[a-zA-Z.]+)",
+                                          "test([^B])+(@[a-zA-Z.]+)",
+                                          "test([^B])*(@[a-zA-Z.]+)",
+                                          "test(\\P{IsControl})+(@[a-zA-Z.]+)",
+                                          "test(\\P{IsControl})*(@[a-zA-Z.]+)",
+                                        }) {
+            Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
+                               .matcher(input);
+            try {
+                if (m.find()) {
+                    failCount++;
+                }
+            } catch (Exception x) {
+                failCount++;
+            }
+        }
+        report("GroupCurly NotFoundSupp");
+    }
+
 }