8230829: Matcher matches a surrogate pair that crosses border of the region
Reviewed-by: naoto
--- a/src/java.base/share/classes/java/util/regex/Pattern.java Thu Sep 12 09:59:19 2019 -0700
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java Thu Sep 12 11:07:35 2019 -0700
@@ -3931,12 +3931,14 @@
boolean match(Matcher matcher, int i, CharSequence seq) {
if (i < matcher.to) {
int ch = Character.codePointAt(seq, i);
- return predicate.is(ch) &&
- next.match(matcher, i + Character.charCount(ch), seq);
- } else {
- matcher.hitEnd = true;
- return false;
+ i += Character.charCount(ch);
+ if (i <= matcher.to) {
+ return predicate.is(ch) &&
+ next.match(matcher, i, seq);
+ }
}
+ matcher.hitEnd = true;
+ return false;
}
boolean study(TreeInfo info) {
info.minLength++;
--- a/test/jdk/java/util/regex/RegExTest.java Thu Sep 12 09:59:19 2019 -0700
+++ b/test/jdk/java/util/regex/RegExTest.java Thu Sep 12 11:07:35 2019 -0700
@@ -35,7 +35,7 @@
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
- * 8194667 8197462 8184692 8221431 8224789 8228352
+ * 8194667 8197462 8184692 8221431 8224789 8228352 8230829
*
* @library /test/lib
* @library /lib/testlibrary/java/lang
@@ -1070,6 +1070,22 @@
matcher.useAnchoringBounds(false);
if (matcher.find())
failCount++;
+
+ // JDK-8230829
+ pattern = Pattern.compile("\\ud800\\udc61");
+ matcher = pattern.matcher("\ud800\udc61");
+ matcher.region(0, 1);
+ if (matcher.find()) {
+ failCount++;
+ System.out.println("Matched a surrogate pair" +
+ " that crosses border of region");
+ }
+ if (!matcher.hitEnd()) {
+ failCount++;
+ System.out.println("Expected to hit the end when" +
+ " matching a surrogate pair crossing region");
+ }
+
report("Regions");
}