6878475: Better syntax for the named capture group in regex
authorsherman
Wed, 21 Oct 2009 11:40:40 -0700
changeset 4161 679d00486dc6
parent 4151 db7d59c0b0e6
child 4162 425328b81201
6878475: Better syntax for the named capture group in regex Summary: Updated the syntax of the newly added named capture group Reviewed-by: martin, alanb
jdk/src/share/classes/java/util/regex/Matcher.java
jdk/src/share/classes/java/util/regex/Pattern.java
jdk/test/java/util/regex/RegExTest.java
--- a/jdk/src/share/classes/java/util/regex/Matcher.java	Tue Oct 20 09:51:28 2009 -0700
+++ b/jdk/src/share/classes/java/util/regex/Matcher.java	Wed Oct 21 11:40:40 2009 -0700
@@ -688,7 +688,7 @@
      *
      * <p> The replacement string may contain references to subsequences
      * captured during the previous match: Each occurrence of
-     * <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i>
+     * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
      * will be replaced by the result of evaluating the corresponding
      * {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
      * respectively. For  <tt>$</tt><i>g</i><tt></tt>,
@@ -770,7 +770,7 @@
                 // more appropriate.
                 nextChar = replacement.charAt(cursor);
                 int refNum = -1;
-                if (nextChar == '<') {
+                if (nextChar == '{') {
                     cursor++;
                     StringBuilder gsb = new StringBuilder();
                     while (cursor < replacement.length()) {
@@ -787,13 +787,17 @@
                     if (gsb.length() == 0)
                         throw new IllegalArgumentException(
                             "named capturing group has 0 length name");
-                    if (nextChar != '>')
+                    if (nextChar != '}')
                         throw new IllegalArgumentException(
-                            "named capturing group is missing trailing '>'");
+                            "named capturing group is missing trailing '}'");
                     String gname = gsb.toString();
+                    if (ASCII.isDigit(gname.charAt(0)))
+                        throw new IllegalArgumentException(
+                            "capturing group name {" + gname +
+                            "} starts with digit character");
                     if (!parentPattern.namedGroups().containsKey(gname))
                         throw new IllegalArgumentException(
-                            "No group with name <" + gname + ">");
+                            "No group with name {" + gname + "}");
                     refNum = parentPattern.namedGroups().get(gname);
                     cursor++;
                 } else {
--- a/jdk/src/share/classes/java/util/regex/Pattern.java	Tue Oct 20 09:51:28 2009 -0700
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java	Wed Oct 21 11:40:40 2009 -0700
@@ -484,7 +484,7 @@
  * <h5> Group name </h5>
  * <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
  * and then be back-referenced later by the "name". Group names are composed of
- * the following characters:
+ * the following characters. The first character must be a <tt>letter</tt>.
  *
  * <ul>
  *   <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
@@ -2567,7 +2567,7 @@
                 break;
             case '<':   // (?<xxx)  look behind
                 ch = read();
-                if (ASCII.isLower(ch) || ASCII.isUpper(ch) || ASCII.isDigit(ch)) {
+                if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
                     // named captured group
                     String name = groupname(ch);
                     if (namedGroups().containsKey(name))
--- a/jdk/test/java/util/regex/RegExTest.java	Tue Oct 20 09:51:28 2009 -0700
+++ b/jdk/test/java/util/regex/RegExTest.java	Wed Oct 21 11:40:40 2009 -0700
@@ -32,7 +32,7 @@
  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
- * 6350801 6676425
+ * 6350801 6676425 6878475
  */
 
 import java.util.regex.*;
@@ -3389,9 +3389,9 @@
               "gname",
               "yyy");
 
-        check(Pattern.compile("x+(?<8gname>y+)z+"),
+        check(Pattern.compile("x+(?<gname8>y+)z+"),
               "xxxyyyzzz",
-              "8gname",
+              "gname8",
               "yyy");
 
         //backref
@@ -3430,81 +3430,82 @@
         //replaceFirst/All
         checkReplaceFirst("(?<gn>ab)(c*)",
                           "abccczzzabcczzzabccc",
-                          "$<gn>",
+                          "${gn}",
                           "abzzzabcczzzabccc");
 
         checkReplaceAll("(?<gn>ab)(c*)",
                         "abccczzzabcczzzabccc",
-                        "$<gn>",
+                        "${gn}",
                         "abzzzabzzzab");
 
 
         checkReplaceFirst("(?<gn>ab)(c*)",
                           "zzzabccczzzabcczzzabccczzz",
-                          "$<gn>",
+                          "${gn}",
                           "zzzabzzzabcczzzabccczzz");
 
         checkReplaceAll("(?<gn>ab)(c*)",
                         "zzzabccczzzabcczzzabccczzz",
-                        "$<gn>",
+                        "${gn}",
                         "zzzabzzzabzzzabzzz");
 
         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
                           "zzzabccczzzabcczzzabccczzz",
-                          "$<gn2>",
+                          "${gn2}",
                           "zzzccczzzabcczzzabccczzz");
 
         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
                         "zzzabccczzzabcczzzabccczzz",
-                        "$<gn2>",
+                        "${gn2}",
                         "zzzccczzzcczzzccczzz");
 
         //toSupplementaries("(ab)(c*)"));
         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
                            ")(?<gn2>" + toSupplementaries("c") + "*)",
                           toSupplementaries("abccczzzabcczzzabccc"),
-                          "$<gn1>",
+                          "${gn1}",
                           toSupplementaries("abzzzabcczzzabccc"));
 
 
         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
                         ")(?<gn2>" + toSupplementaries("c") + "*)",
                         toSupplementaries("abccczzzabcczzzabccc"),
-                        "$<gn1>",
+                        "${gn1}",
                         toSupplementaries("abzzzabzzzab"));
 
         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
                            ")(?<gn2>" + toSupplementaries("c") + "*)",
                           toSupplementaries("abccczzzabcczzzabccc"),
-                          "$<gn2>",
+                          "${gn2}",
                           toSupplementaries("ccczzzabcczzzabccc"));
 
 
         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
                         ")(?<gn2>" + toSupplementaries("c") + "*)",
                         toSupplementaries("abccczzzabcczzzabccc"),
-                        "$<gn2>",
+                        "${gn2}",
                         toSupplementaries("ccczzzcczzzccc"));
 
         checkReplaceFirst("(?<dog>Dog)AndCat",
                           "zzzDogAndCatzzzDogAndCatzzz",
-                          "$<dog>",
+                          "${dog}",
                           "zzzDogzzzDogAndCatzzz");
 
 
         checkReplaceAll("(?<dog>Dog)AndCat",
                           "zzzDogAndCatzzzDogAndCatzzz",
-                          "$<dog>",
+                          "${dog}",
                           "zzzDogzzzDogzzz");
 
         // backref in Matcher & String
-        if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "$<gn>").equals("abefij") ||
-            !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "$<gn>").equals("abcdefgh"))
+        if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
+            !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
             failCount++;
 
         // negative
         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
+        checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),