8197462: Inconsistent exception messages for invalid capturing group names
authorigerasim
Mon, 12 Feb 2018 21:06:06 -0800
changeset 48853 84b4ffbba8b0
parent 48852 478e198da84b
child 48854 345f41527dcc
8197462: Inconsistent exception messages for invalid capturing group names 8179608: Error in comment in Pattern.java Reviewed-by: sherman
src/java.base/share/classes/java/util/regex/Pattern.java
test/jdk/java/util/regex/RegExTest.java
--- a/src/java.base/share/classes/java/util/regex/Pattern.java	Tue Feb 13 12:26:22 2018 +0800
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java	Mon Feb 12 21:06:06 2018 -0800
@@ -782,12 +782,9 @@
      * arguments, they can also be passed as inline modifiers.
      * For example, the following statements have the same effect.
      * <pre>
-     * RegExp r1 = RegExp.compile("abc", Pattern.I|Pattern.M);
-     * RegExp r2 = RegExp.compile("(?im)abc", 0);
+     * Pattern p1 = Pattern.compile("abc", Pattern.CASE_INSENSITIVE|Pattern.MULTILINE);
+     * Pattern p2 = Pattern.compile("(?im)abc", 0);
      * </pre>
-     *
-     * The flags are duplicated so that the familiar Perl match flag
-     * names are available.
      */
 
     /**
@@ -2527,7 +2524,7 @@
                 throw error("\\k is not followed by '<' for named capturing group");
             String name = groupname(read());
             if (!namedGroups().containsKey(name))
-                throw error("(named capturing group <"+ name+"> does not exit");
+                throw error("named capturing group <" + name + "> does not exist");
             if (create) {
                 hasGroupRef = true;
                 if (has(CASE_INSENSITIVE))
@@ -2922,13 +2919,11 @@
      */
     private String groupname(int ch) {
         StringBuilder sb = new StringBuilder();
-        sb.append(Character.toChars(ch));
-        while (ASCII.isLower(ch=read()) || ASCII.isUpper(ch) ||
-               ASCII.isDigit(ch)) {
-            sb.append(Character.toChars(ch));
-        }
-        if (sb.length() == 0)
-            throw error("named capturing group has 0 length name");
+        if (!ASCII.isAlpha(ch))
+            throw error("capturing group name does not start with a Latin letter");
+        do {
+            sb.append((char) ch);
+        } while (ASCII.isAlnum(ch=read()));
         if (ch != '>')
             throw error("named capturing group is missing trailing '>'");
         return sb.toString();
@@ -2974,7 +2969,7 @@
                 break;
             case '<':   // (?<xxx)  look behind
                 ch = read();
-                if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
+                if (ch != '=' && ch != '!') {
                     // named captured group
                     String name = groupname(ch);
                     if (namedGroups().containsKey(name))
@@ -3005,14 +3000,12 @@
                                                info.minLength) :
                                    new Behind(head, info.maxLength,
                                               info.minLength));
-                } else if (ch == '!') {
+                } else { // if (ch == '!')
                     head = tail = (hasSupplementary ?
                                    new NotBehindS(head, info.maxLength,
                                                   info.minLength) :
                                    new NotBehind(head, info.maxLength,
                                                  info.minLength));
-                } else {
-                    throw error("Unknown look-behind group");
                 }
                 // clear all top-closure-nodes inside lookbehind
                 if (saveTCNCount < topClosureNodes.size())
--- a/test/jdk/java/util/regex/RegExTest.java	Tue Feb 13 12:26:22 2018 +0800
+++ b/test/jdk/java/util/regex/RegExTest.java	Mon Feb 12 21:06:06 2018 -0800
@@ -35,7 +35,7 @@
  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
- * 8194667
+ * 8194667 8197462
  *
  * @library /test/lib
  * @build jdk.test.lib.RandomFactory
@@ -168,6 +168,7 @@
         embeddedFlags();
         grapheme();
         expoBacktracking();
+        invalidGroupName();
 
         if (failure) {
             throw new
@@ -4870,4 +4871,41 @@
             }
         }
     }
+
+    private static void invalidGroupName() {
+        // Invalid start of a group name
+        for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
+                "\u0060", "\u007b", "\u0416")) {
+            for (String pat : List.of("(?<" + groupName + ">)",
+                    "\\k<" + groupName + ">")) {
+                try {
+                    Pattern.compile(pat);
+                    failCount++;
+                } catch (PatternSyntaxException e) {
+                    if (!e.getMessage().startsWith(
+                            "capturing group name does not start with a"
+                            + " Latin letter")) {
+                        failCount++;
+                    }
+                }
+            }
+        }
+        // Invalid char in a group name
+        for (String groupName : List.of("a.", "b\u0040", "c\u005b",
+                "d\u0060", "e\u007b", "f\u0416")) {
+            for (String pat : List.of("(?<" + groupName + ">)",
+                    "\\k<" + groupName + ">")) {
+                try {
+                    Pattern.compile(pat);
+                    failCount++;
+                } catch (PatternSyntaxException e) {
+                    if (!e.getMessage().startsWith(
+                            "named capturing group is missing trailing '>'")) {
+                        failCount++;
+                    }
+                }
+            }
+        }
+        report("Invalid capturing group names");
+    }
 }