8131034: Cleanup in j.u.regex.Pattern.quote()
Reviewed-by: psandoz, sherman, martin
--- a/jdk/src/java.base/share/classes/java/util/regex/Matcher.java Fri Jul 31 16:03:48 2015 -0700
+++ b/jdk/src/java.base/share/classes/java/util/regex/Matcher.java Sun Aug 02 01:39:18 2015 +0300
@@ -63,7 +63,7 @@
*
* <p> A matcher finds matches in a subset of its input called the
* <i>region</i>. By default, the region contains all of the matcher's input.
- * The region can be modified via the{@link #region region} method and queried
+ * The region can be modified via the {@link #region region} method and queried
* via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
* methods. The way that the region boundaries interact with some pattern
* constructs can be changed. See {@link #useAnchoringBounds
@@ -1639,15 +1639,15 @@
*/
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append("java.util.regex.Matcher");
- sb.append("[pattern=" + pattern());
- sb.append(" region=");
- sb.append(regionStart() + "," + regionEnd());
- sb.append(" lastmatch=");
+ sb.append("java.util.regex.Matcher")
+ .append("[pattern=").append(pattern())
+ .append(" region=")
+ .append(regionStart()).append(',').append(regionEnd())
+ .append(" lastmatch=");
if ((first >= 0) && (group() != null)) {
sb.append(group());
}
- sb.append("]");
+ sb.append(']');
return sb.toString();
}
--- a/jdk/src/java.base/share/classes/java/util/regex/Pattern.java Fri Jul 31 16:03:48 2015 -0700
+++ b/jdk/src/java.base/share/classes/java/util/regex/Pattern.java Sun Aug 02 01:39:18 2015 +0300
@@ -565,7 +565,7 @@
* <p>
* <b><a name="usc">Scripts</a></b> are specified either with the prefix {@code Is}, as in
* {@code IsHiragana}, or by using the {@code script} keyword (or its short
- * form {@code sc})as in {@code script=Hiragana} or {@code sc=Hiragana}.
+ * form {@code sc}) as in {@code script=Hiragana} or {@code sc=Hiragana}.
* <p>
* The script names supported by <code>Pattern</code> are the valid script names
* accepted and defined by
@@ -1299,18 +1299,22 @@
if (slashEIndex == -1)
return "\\Q" + s + "\\E";
- StringBuilder sb = new StringBuilder(s.length() * 2);
+ int lenHint = s.length();
+ lenHint = (lenHint < Integer.MAX_VALUE - 8 - lenHint) ?
+ (lenHint << 1) : (Integer.MAX_VALUE - 8);
+
+ StringBuilder sb = new StringBuilder(lenHint);
sb.append("\\Q");
- slashEIndex = 0;
int current = 0;
- while ((slashEIndex = s.indexOf("\\E", current)) != -1) {
- sb.append(s.substring(current, slashEIndex));
+ do {
+ sb.append(s, current, slashEIndex)
+ .append("\\E\\\\E\\Q");
current = slashEIndex + 2;
- sb.append("\\E\\\\E\\Q");
- }
- sb.append(s.substring(current, s.length()));
- sb.append("\\E");
- return sb.toString();
+ } while ((slashEIndex = s.indexOf("\\E", current)) != -1);
+
+ return sb.append(s, current, s.length())
+ .append("\\E")
+ .toString();
}
/**
@@ -1367,14 +1371,16 @@
}
/**
- * The pattern is converted to normalizedD form and then a pure group
- * is constructed to match canonical equivalences of the characters.
+ * The pattern is converted to normalized form ({@linkplain
+ * java.text.Normalizer.Form.NFD NFD}, canonical decomposition)
+ * and then a pure group is constructed to match canonical
+ * equivalences of the characters.
*/
private void normalize() {
boolean inCharClass = false;
int lastCodePoint = -1;
- // Convert pattern into normalizedD form
+ // Convert pattern into normalized form
normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
patternLength = normalizedPattern.length();