6878475: Better syntax for the named capture group in regex
Summary: Updated the syntax of the newly added named capture group
Reviewed-by: martin, alanb
--- a/jdk/src/share/classes/java/util/regex/Matcher.java Tue Oct 20 09:51:28 2009 -0700
+++ b/jdk/src/share/classes/java/util/regex/Matcher.java Wed Oct 21 11:40:40 2009 -0700
@@ -688,7 +688,7 @@
*
* <p> The replacement string may contain references to subsequences
* captured during the previous match: Each occurrence of
- * <tt>$</tt><<i>name</i>> or <tt>$</tt><i>g</i>
+ * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
* will be replaced by the result of evaluating the corresponding
* {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
* respectively. For <tt>$</tt><i>g</i><tt></tt>,
@@ -770,7 +770,7 @@
// more appropriate.
nextChar = replacement.charAt(cursor);
int refNum = -1;
- if (nextChar == '<') {
+ if (nextChar == '{') {
cursor++;
StringBuilder gsb = new StringBuilder();
while (cursor < replacement.length()) {
@@ -787,13 +787,17 @@
if (gsb.length() == 0)
throw new IllegalArgumentException(
"named capturing group has 0 length name");
- if (nextChar != '>')
+ if (nextChar != '}')
throw new IllegalArgumentException(
- "named capturing group is missing trailing '>'");
+ "named capturing group is missing trailing '}'");
String gname = gsb.toString();
+ if (ASCII.isDigit(gname.charAt(0)))
+ throw new IllegalArgumentException(
+ "capturing group name {" + gname +
+ "} starts with digit character");
if (!parentPattern.namedGroups().containsKey(gname))
throw new IllegalArgumentException(
- "No group with name <" + gname + ">");
+ "No group with name {" + gname + "}");
refNum = parentPattern.namedGroups().get(gname);
cursor++;
} else {
--- a/jdk/src/share/classes/java/util/regex/Pattern.java Tue Oct 20 09:51:28 2009 -0700
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java Wed Oct 21 11:40:40 2009 -0700
@@ -484,7 +484,7 @@
* <h5> Group name </h5>
* <p>A capturing group can also be assigned a "name", a <tt>named-capturing group</tt>,
* and then be back-referenced later by the "name". Group names are composed of
- * the following characters:
+ * the following characters. The first character must be a <tt>letter</tt>.
*
* <ul>
* <li> The uppercase letters <tt>'A'</tt> through <tt>'Z'</tt>
@@ -2567,7 +2567,7 @@
break;
case '<': // (?<xxx) look behind
ch = read();
- if (ASCII.isLower(ch) || ASCII.isUpper(ch) || ASCII.isDigit(ch)) {
+ if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
// named captured group
String name = groupname(ch);
if (namedGroups().containsKey(name))
--- a/jdk/test/java/util/regex/RegExTest.java Tue Oct 20 09:51:28 2009 -0700
+++ b/jdk/test/java/util/regex/RegExTest.java Wed Oct 21 11:40:40 2009 -0700
@@ -32,7 +32,7 @@
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
- * 6350801 6676425
+ * 6350801 6676425 6878475
*/
import java.util.regex.*;
@@ -3389,9 +3389,9 @@
"gname",
"yyy");
- check(Pattern.compile("x+(?<8gname>y+)z+"),
+ check(Pattern.compile("x+(?<gname8>y+)z+"),
"xxxyyyzzz",
- "8gname",
+ "gname8",
"yyy");
//backref
@@ -3430,81 +3430,82 @@
//replaceFirst/All
checkReplaceFirst("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc",
- "$<gn>",
+ "${gn}",
"abzzzabcczzzabccc");
checkReplaceAll("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc",
- "$<gn>",
+ "${gn}",
"abzzzabzzzab");
checkReplaceFirst("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz",
- "$<gn>",
+ "${gn}",
"zzzabzzzabcczzzabccczzz");
checkReplaceAll("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz",
- "$<gn>",
+ "${gn}",
"zzzabzzzabzzzabzzz");
checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz",
- "$<gn2>",
+ "${gn2}",
"zzzccczzzabcczzzabccczzz");
checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz",
- "$<gn2>",
+ "${gn2}",
"zzzccczzzcczzzccczzz");
//toSupplementaries("(ab)(c*)"));
checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
- "$<gn1>",
+ "${gn1}",
toSupplementaries("abzzzabcczzzabccc"));
checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
- "$<gn1>",
+ "${gn1}",
toSupplementaries("abzzzabzzzab"));
checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
- "$<gn2>",
+ "${gn2}",
toSupplementaries("ccczzzabcczzzabccc"));
checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
")(?<gn2>" + toSupplementaries("c") + "*)",
toSupplementaries("abccczzzabcczzzabccc"),
- "$<gn2>",
+ "${gn2}",
toSupplementaries("ccczzzcczzzccc"));
checkReplaceFirst("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz",
- "$<dog>",
+ "${dog}",
"zzzDogzzzDogAndCatzzz");
checkReplaceAll("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz",
- "$<dog>",
+ "${dog}",
"zzzDogzzzDogzzz");
// backref in Matcher & String
- if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "$<gn>").equals("abefij") ||
- !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "$<gn>").equals("abcdefgh"))
+ if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
+ !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
failCount++;
// negative
checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
+ checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),