8039124: j.u.regex.Matcher.appendReplace/Tail() should support StringBuilder variant
Summary: to add the StringBuilder variant
Reviewed-by: alanb, sherman
Contributed-by: jeremymanson@google.com, peter.levart@gmail.com
--- a/jdk/src/share/classes/java/util/regex/Matcher.java Wed Apr 09 12:49:51 2014 +0000
+++ b/jdk/src/share/classes/java/util/regex/Matcher.java Wed Apr 09 09:36:19 2014 -0700
@@ -65,9 +65,10 @@
* new strings whose contents can, if desired, be computed from the match
* result. The {@link #appendReplacement appendReplacement} and {@link
* #appendTail appendTail} methods can be used in tandem in order to collect
- * the result into an existing string buffer, or the more convenient {@link
- * #replaceAll replaceAll} method can be used to create a string in which every
- * matching subsequence in the input sequence is replaced.
+ * the result into an existing string buffer or string builder. Alternatively,
+ * the more convenient {@link #replaceAll replaceAll} method can be used to
+ * create a string in which every matching subsequence in the input sequence
+ * is replaced.
*
* <p> The explicit state of a matcher includes the start and end indices of
* the most recent successful match. It also includes the start and end
@@ -792,15 +793,115 @@
* that does not exist in the pattern
*/
public Matcher appendReplacement(StringBuffer sb, String replacement) {
-
// If no match, return error
if (first < 0)
throw new IllegalStateException("No match available");
+ StringBuilder result = new StringBuilder();
+ appendExpandedReplacement(replacement, result);
+ // Append the intervening text
+ sb.append(text, lastAppendPosition, first);
+ // Append the match substitution
+ sb.append(result);
+ lastAppendPosition = last;
+ return this;
+ }
- // Process substitution string to replace group references with groups
+ /**
+ * Implements a non-terminal append-and-replace step.
+ *
+ * <p> This method performs the following actions: </p>
+ *
+ * <ol>
+ *
+ * <li><p> It reads characters from the input sequence, starting at the
+ * append position, and appends them to the given string builder. It
+ * stops after reading the last character preceding the previous match,
+ * that is, the character at index {@link
+ * #start()} <tt>-</tt> <tt>1</tt>. </p></li>
+ *
+ * <li><p> It appends the given replacement string to the string builder.
+ * </p></li>
+ *
+ * <li><p> It sets the append position of this matcher to the index of
+ * the last character matched, plus one, that is, to {@link #end()}.
+ * </p></li>
+ *
+ * </ol>
+ *
+ * <p> The replacement string may contain references to subsequences
+ * captured during the previous match: Each occurrence of
+ * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of
+ * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
+ * The first number after the <tt>$</tt> is always treated as part of
+ * the group reference. Subsequent numbers are incorporated into g if
+ * they would form a legal group reference. Only the numerals '0'
+ * through '9' are considered as potential components of the group
+ * reference. If the second group matched the string <tt>"foo"</tt>, for
+ * example, then passing the replacement string <tt>"$2bar"</tt> would
+ * cause <tt>"foobar"</tt> to be appended to the string builder. A dollar
+ * sign (<tt>$</tt>) may be included as a literal in the replacement
+ * string by preceding it with a backslash (<tt>\$</tt>).
+ *
+ * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
+ * the replacement string may cause the results to be different than if it
+ * were being treated as a literal replacement string. Dollar signs may be
+ * treated as references to captured subsequences as described above, and
+ * backslashes are used to escape literal characters in the replacement
+ * string.
+ *
+ * <p> This method is intended to be used in a loop together with the
+ * {@link #appendTail appendTail} and {@link #find find} methods. The
+ * following code, for example, writes <tt>one dog two dogs in the
+ * yard</tt> to the standard-output stream: </p>
+ *
+ * <blockquote><pre>
+ * Pattern p = Pattern.compile("cat");
+ * Matcher m = p.matcher("one cat two cats in the yard");
+ * StringBuilder sb = new StringBuilder();
+ * while (m.find()) {
+ * m.appendReplacement(sb, "dog");
+ * }
+ * m.appendTail(sb);
+ * System.out.println(sb.toString());</pre></blockquote>
+ *
+ * @param sb
+ * The target string builder
+ * @param replacement
+ * The replacement string
+ * @return This matcher
+ *
+ * @throws IllegalStateException
+ * If no match has yet been attempted,
+ * or if the previous match operation failed
+ * @throws IllegalArgumentException
+ * If the replacement string refers to a named-capturing
+ * group that does not exist in the pattern
+ * @throws IndexOutOfBoundsException
+ * If the replacement string refers to a capturing group
+ * that does not exist in the pattern
+ * @since 1.9
+ */
+ public Matcher appendReplacement(StringBuilder sb, String replacement) {
+ // If no match, return error
+ if (first < 0)
+ throw new IllegalStateException("No match available");
+ StringBuilder result = new StringBuilder();
+ appendExpandedReplacement(replacement, result);
+ // Append the intervening text
+ sb.append(text, lastAppendPosition, first);
+ // Append the match substitution
+ sb.append(result);
+ lastAppendPosition = last;
+ return this;
+ }
+
+ /**
+ * Processes replacement string to replace group references with
+ * groups.
+ */
+ private StringBuilder appendExpandedReplacement(
+ String replacement, StringBuilder result) {
int cursor = 0;
- StringBuilder result = new StringBuilder();
-
while (cursor < replacement.length()) {
char nextChar = replacement.charAt(cursor);
if (nextChar == '\\') {
@@ -852,8 +953,8 @@
cursor++;
} else {
// The first number is always a group
- refNum = (int)nextChar - '0';
- if ((refNum < 0)||(refNum > 9))
+ refNum = nextChar - '0';
+ if ((refNum < 0) || (refNum > 9))
throw new IllegalArgumentException(
"Illegal group reference");
cursor++;
@@ -864,7 +965,7 @@
break;
}
int nextDigit = replacement.charAt(cursor) - '0';
- if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
+ if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
break;
}
int newRefNum = (refNum * 10) + nextDigit;
@@ -884,13 +985,7 @@
cursor++;
}
}
- // Append the intervening text
- sb.append(text, lastAppendPosition, first);
- // Append the match substitution
- sb.append(result);
-
- lastAppendPosition = last;
- return this;
+ return result;
}
/**
@@ -913,6 +1008,27 @@
}
/**
+ * Implements a terminal append-and-replace step.
+ *
+ * <p> This method reads characters from the input sequence, starting at
+ * the append position, and appends them to the given string builder. It is
+ * intended to be invoked after one or more invocations of the {@link
+ * #appendReplacement appendReplacement} method in order to copy the
+ * remainder of the input sequence. </p>
+ *
+ * @param sb
+ * The target string builder
+ *
+ * @return The target string builder
+ *
+ * @since 1.9
+ */
+ public StringBuilder appendTail(StringBuilder sb) {
+ sb.append(text, lastAppendPosition, getTextLength());
+ return sb;
+ }
+
+ /**
* Replaces every subsequence of the input sequence that matches the
* pattern with the given replacement string.
*
@@ -950,7 +1066,7 @@
reset();
boolean result = find();
if (result) {
- StringBuffer sb = new StringBuffer();
+ StringBuilder sb = new StringBuilder();
do {
appendReplacement(sb, replacement);
result = find();
@@ -1000,7 +1116,7 @@
reset();
if (!find())
return text.toString();
- StringBuffer sb = new StringBuffer();
+ StringBuilder sb = new StringBuilder();
appendReplacement(sb, replacement);
appendTail(sb);
return sb.toString();
--- a/jdk/test/java/util/regex/RegExTest.java Wed Apr 09 12:49:51 2014 +0000
+++ b/jdk/test/java/util/regex/RegExTest.java Wed Apr 09 09:36:19 2014 -0700
@@ -32,7 +32,7 @@
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
- * 8027645 8035076
+ * 8027645 8035076 8039124
*/
import java.util.regex.*;
@@ -75,7 +75,10 @@
// Substitition tests on randomly generated sequences
globalSubstitute();
stringbufferSubstitute();
+ stringbuilderSubstitute();
+
substitutionBasher();
+ substitutionBasher2();
// Canonical Equivalence
ceTest();
@@ -296,10 +299,12 @@
final Matcher m = Pattern.compile("xyz").matcher("xyz");
m.matches();
- check(new Runnable() { public void run() { m.appendTail(null);}});
+ check(new Runnable() { public void run() { m.appendTail((StringBuffer)null);}});
+ check(new Runnable() { public void run() { m.appendTail((StringBuilder)null);}});
check(new Runnable() { public void run() { m.replaceAll(null);}});
check(new Runnable() { public void run() { m.replaceFirst(null);}});
- check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
+ check(new Runnable() { public void run() { m.appendReplacement((StringBuffer)null, null);}});
+ check(new Runnable() { public void run() { m.appendReplacement((StringBuilder)null, null);}});
check(new Runnable() { public void run() { m.reset(null);}});
check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
//check(new Runnable() { public void run() { m.usePattern(null);}});
@@ -2973,6 +2978,286 @@
report("SB Substitution");
}
+ /**
+ * Tests the usage of Matcher.appendReplacement() with literal
+ * and group substitutions.
+ */
+ private static void stringbuilderSubstitute() throws Exception {
+ // SB substitution with literal
+ String blah = "zzzblahzzz";
+ Pattern p = Pattern.compile("blah");
+ Matcher m = p.matcher(blah);
+ StringBuilder result = new StringBuilder();
+ try {
+ m.appendReplacement(result, "blech");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "blech");
+ if (!result.toString().equals("zzzblech"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzblechzzz"))
+ failCount++;
+
+ // SB substitution with groups
+ blah = "zzzabcdzzz";
+ p = Pattern.compile("(ab)(cd)*");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals("zzzab"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabzzz"))
+ failCount++;
+
+ // SB substitution with 3 groups
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, "$1w$2w$3");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1w$2w$3");
+ if (!result.toString().equals("zzzabwcdwef"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabwcdwefzzz"))
+ failCount++;
+
+ // SB substitution with groups and three matches
+ // skipping middle match
+ blah = "zzzabcdzzzabcddzzzabcdzzz";
+ p = Pattern.compile("(ab)(cd*)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals("zzzab"))
+ failCount++;
+
+ m.find();
+ m.find();
+ m.appendReplacement(result, "$2");
+ if (!result.toString().equals("zzzabzzzabcddzzzcd"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
+ failCount++;
+
+ // Check to make sure escaped $ is ignored
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ m.appendReplacement(result, "$1w\\$2w$3");
+ if (!result.toString().equals("zzzabw$2wef"))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals("zzzabw$2wefzzz"))
+ failCount++;
+
+ // Check to make sure a reference to nonexistent group causes error
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ try {
+ m.appendReplacement(result, "$1w$5w$3");
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Correct result
+ }
+
+ // Check double digit group references
+ blah = "zzz123456789101112zzz";
+ p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ m.appendReplacement(result, "$1w$11w$3");
+ if (!result.toString().equals("zzz1w11w3"))
+ failCount++;
+
+ // Check to make sure it backs off $15 to $1 if only three groups
+ blah = "zzzabcdcdefzzz";
+ p = Pattern.compile("(ab)(cd)*(ef)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ m.appendReplacement(result, "$1w$15w$3");
+ if (!result.toString().equals("zzzabwab5wef"))
+ failCount++;
+
+
+ // Supplementary character test
+ // SB substitution with literal
+ blah = toSupplementaries("zzzblahzzz");
+ p = Pattern.compile(toSupplementaries("blah"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, toSupplementaries("blech"));
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, toSupplementaries("blech"));
+ if (!result.toString().equals(toSupplementaries("zzzblech")))
+ failCount++;
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
+ failCount++;
+
+ // SB substitution with groups
+ blah = toSupplementaries("zzzabcdzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals(toSupplementaries("zzzab")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabzzz")))
+ failCount++;
+
+ // SB substitution with 3 groups
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
+ if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
+ failCount++;
+
+ // SB substitution with groups and three matches
+ // skipping middle match
+ blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ try {
+ m.appendReplacement(result, "$1");
+ failCount++;
+ } catch (IllegalStateException e) {
+ }
+ m.find();
+ m.appendReplacement(result, "$1");
+ if (!result.toString().equals(toSupplementaries("zzzab")))
+ failCount++;
+
+ m.find();
+ m.find();
+ m.appendReplacement(result, "$2");
+ if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
+ failCount++;
+
+ // Check to make sure escaped $ is ignored
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
+ if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
+ failCount++;
+
+ m.appendTail(result);
+ if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
+ failCount++;
+
+ // Check to make sure a reference to nonexistent group causes error
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ try {
+ m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
+ failCount++;
+ } catch (IndexOutOfBoundsException ioobe) {
+ // Correct result
+ }
+ // Check double digit group references
+ blah = toSupplementaries("zzz123456789101112zzz");
+ p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
+ if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
+ failCount++;
+
+ // Check to make sure it backs off $15 to $1 if only three groups
+ blah = toSupplementaries("zzzabcdcdefzzz");
+ p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
+ m = p.matcher(blah);
+ result = new StringBuilder();
+ m.find();
+ m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
+ if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
+ failCount++;
+ // Check nothing has been appended into the output buffer if
+ // the replacement string triggers IllegalArgumentException.
+ p = Pattern.compile("(abc)");
+ m = p.matcher("abcd");
+ result = new StringBuilder();
+ m.find();
+ try {
+ m.appendReplacement(result, ("xyz$g"));
+ failCount++;
+ } catch (IllegalArgumentException iae) {
+ if (result.length() != 0)
+ failCount++;
+ }
+ report("SB Substitution 2");
+ }
+
/*
* 5 groups of characters are created to make a substitution string.
* A base string will be created including random lead chars, the
@@ -3059,6 +3344,93 @@
report("Substitution Basher");
}
+ /*
+ * 5 groups of characters are created to make a substitution string.
+ * A base string will be created including random lead chars, the
+ * substitution string, and random trailing chars.
+ * A pattern containing the 5 groups is searched for and replaced with:
+ * random group + random string + random group.
+ * The results are checked for correctness.
+ */
+ private static void substitutionBasher2() {
+ for (int runs = 0; runs<1000; runs++) {
+ // Create a base string to work in
+ int leadingChars = generator.nextInt(10);
+ StringBuilder baseBuffer = new StringBuilder(100);
+ String leadingString = getRandomAlphaString(leadingChars);
+ baseBuffer.append(leadingString);
+
+ // Create 5 groups of random number of random chars
+ // Create the string to substitute
+ // Create the pattern string to search for
+ StringBuilder bufferToSub = new StringBuilder(25);
+ StringBuilder bufferToPat = new StringBuilder(50);
+ String[] groups = new String[5];
+ for(int i=0; i<5; i++) {
+ int aGroupSize = generator.nextInt(5)+1;
+ groups[i] = getRandomAlphaString(aGroupSize);
+ bufferToSub.append(groups[i]);
+ bufferToPat.append('(');
+ bufferToPat.append(groups[i]);
+ bufferToPat.append(')');
+ }
+ String stringToSub = bufferToSub.toString();
+ String pattern = bufferToPat.toString();
+
+ // Place sub string into working string at random index
+ baseBuffer.append(stringToSub);
+
+ // Append random chars to end
+ int trailingChars = generator.nextInt(10);
+ String trailingString = getRandomAlphaString(trailingChars);
+ baseBuffer.append(trailingString);
+ String baseString = baseBuffer.toString();
+
+ // Create test pattern and matcher
+ Pattern p = Pattern.compile(pattern);
+ Matcher m = p.matcher(baseString);
+
+ // Reject candidate if pattern happens to start early
+ m.find();
+ if (m.start() < leadingChars)
+ continue;
+
+ // Reject candidate if more than one match
+ if (m.find())
+ continue;
+
+ // Construct a replacement string with :
+ // random group + random string + random group
+ StringBuilder bufferToRep = new StringBuilder();
+ int groupIndex1 = generator.nextInt(5);
+ bufferToRep.append("$" + (groupIndex1 + 1));
+ String randomMidString = getRandomAlphaString(5);
+ bufferToRep.append(randomMidString);
+ int groupIndex2 = generator.nextInt(5);
+ bufferToRep.append("$" + (groupIndex2 + 1));
+ String replacement = bufferToRep.toString();
+
+ // Do the replacement
+ String result = m.replaceAll(replacement);
+
+ // Construct expected result
+ StringBuilder bufferToRes = new StringBuilder();
+ bufferToRes.append(leadingString);
+ bufferToRes.append(groups[groupIndex1]);
+ bufferToRes.append(randomMidString);
+ bufferToRes.append(groups[groupIndex2]);
+ bufferToRes.append(trailingString);
+ String expectedResult = bufferToRes.toString();
+
+ // Check results
+ if (!result.equals(expectedResult)) {
+ failCount++;
+ }
+ }
+
+ report("Substitution Basher 2");
+ }
+
/**
* Checks the handling of some escape sequences that the Pattern
* class should process instead of the java compiler. These are