8150488: Scanner.findAll() can return infinite stream if regex matches zero chars
Reviewed-by: sherman
--- a/jdk/src/java.base/share/classes/java/util/Scanner.java Fri Apr 28 14:16:33 2017 +0100
+++ b/jdk/src/java.base/share/classes/java/util/Scanner.java Fri Apr 28 12:16:30 2017 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2846,6 +2846,7 @@
class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
final Pattern pattern;
int expectedCount = -1;
+ private boolean advance = false; // true if we need to auto-advance
FindSpliterator(Pattern pattern) {
super(Long.MAX_VALUE,
@@ -2861,12 +2862,15 @@
throw new ConcurrentModificationException();
}
} else {
+ // init
+ matchValid = false;
+ matcher.usePattern(pattern);
expectedCount = modCount;
}
while (true) {
// assert expectedCount == modCount
- if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount
+ if (nextInBuffer()) { // doesn't increment modCount
cons.accept(matcher.toMatchResult());
if (expectedCount != modCount) {
throw new ConcurrentModificationException();
@@ -2879,6 +2883,29 @@
return false; // reached end of input
}
}
+
+ // reimplementation of findPatternInBuffer with auto-advance on zero-length matches
+ private boolean nextInBuffer() {
+ if (advance) {
+ if (position + 1 > buf.limit()) {
+ if (!sourceClosed)
+ needInput = true;
+ return false;
+ }
+ position++;
+ advance = false;
+ }
+ matcher.region(position, buf.limit());
+ if (matcher.find() && (!matcher.hitEnd() || sourceClosed)) {
+ // Did not hit end, or hit real end
+ position = matcher.end();
+ advance = matcher.start() == position;
+ return true;
+ }
+ if (!sourceClosed)
+ needInput = true;
+ return false;
+ }
}
/** Small LRU cache of Patterns. */
--- a/jdk/test/java/util/Scanner/ScannerStreamTest.java Fri Apr 28 14:16:33 2017 +0100
+++ b/jdk/test/java/util/Scanner/ScannerStreamTest.java Fri Apr 28 12:16:30 2017 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,7 @@
import java.util.Scanner;
import java.util.function.Consumer;
import java.util.function.Supplier;
+import java.util.regex.Matcher;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.stream.LambdaTestHelpers;
@@ -44,7 +45,7 @@
/**
* @test
- * @bug 8072722
+ * @bug 8072722 8150488
* @summary Tests of stream support in java.util.Scanner
* @library ../stream/bootlib
* @build java.base/java.util.stream.OpTestCase
@@ -56,19 +57,22 @@
static File inputFile = new File(System.getProperty("test.src", "."), "input.txt");
- @DataProvider(name = "Patterns")
- public static Object[][] makeStreamTestData() {
+ @DataProvider(name = "Tokens")
+ public static Object[][] makeTokensTestData() {
// each inner array is [String description, String input, String delimiter]
// delimiter may be null
List<Object[]> data = new ArrayList<>();
data.add(new Object[] { "default delimiter", "abc def ghi", null });
data.add(new Object[] { "fixed delimiter", "abc,def,,ghi", "," });
- data.add(new Object[] { "regexp delimiter", "###abc##def###ghi###j", "#+" });
+ data.add(new Object[] { "regex delimiter", "###abc##def###ghi###j", "#+" });
return data.toArray(new Object[0][]);
}
+ /*
+ * Creates a scanner over the input, applying a delimiter if non-null.
+ */
Scanner makeScanner(String input, String delimiter) {
Scanner sc = new Scanner(input);
if (delimiter != null) {
@@ -77,7 +81,11 @@
return sc;
}
- @Test(dataProvider = "Patterns")
+ /*
+ * Given input and a delimiter, tests that tokens() returns the same
+ * results that would be provided by a Scanner hasNext/next loop.
+ */
+ @Test(dataProvider = "Tokens")
public void tokensTest(String description, String input, String delimiter) {
// derive expected result by using conventional loop
Scanner sc = makeScanner(input, delimiter);
@@ -93,6 +101,9 @@
.exercise();
}
+ /*
+ * Creates a Scanner over the given input file.
+ */
Scanner makeFileScanner(File file) {
try {
return new Scanner(file, "UTF-8");
@@ -101,7 +112,12 @@
}
}
- public void findAllTest() {
+ /*
+ * Tests that the matches produced by findAll(pat) are the same
+ * as what are returned by findWithinHorizon(pat, 0). This tests
+ * a single pattern against a single input file.
+ */
+ public void findAllFileTest() {
// derive expected result by using conventional loop
Pattern pat = Pattern.compile("[A-Z]{7,}");
List<String> expected = new ArrayList<>();
@@ -116,10 +132,66 @@
Supplier<Stream<String>> ss =
() -> makeFileScanner(inputFile).findAll(pat).map(MatchResult::group);
- withData(TestData.Factory.ofSupplier("findAllTest", ss))
+ withData(TestData.Factory.ofSupplier("findAllFileTest", ss))
.stream(LambdaTestHelpers.identity())
.expectedResult(expected)
.exercise();
}
+ @DataProvider(name = "FindAllZero")
+ public static Object[][] makeFindAllZeroTestData() {
+ // each inner array is [String input, String patternString]
+ List<Object[]> data = new ArrayList<>();
+
+ data.add(new Object[] { "aaaaa", "a*" });
+ data.add(new Object[] { "aaaaab", "a*" });
+ data.add(new Object[] { "aaaaabb", "a*" });
+ data.add(new Object[] { "aaaaabbb", "a*" });
+ data.add(new Object[] { "aaabbaaaa", "a*" });
+ data.add(new Object[] { "aaabbaaaab", "a*" });
+ data.add(new Object[] { "aaabbaaaabb", "a*" });
+ data.add(new Object[] { "aaabbaaaabbb", "a*" });
+ data.add(new Object[] { "aaabbaaaa", "a*|b*" });
+ data.add(new Object[] { "aaabbaaaab", "a*|b*" });
+ data.add(new Object[] { "aaabbaaaabb", "a*|b*" });
+ data.add(new Object[] { "aaabbaaaabbb", "a*|b*" });
+
+ return data.toArray(new Object[0][]);
+ }
+
+ /*
+ * Tests findAll() using a pattern against an input string.
+ * The results from findAll() should equal the results obtained
+ * using a loop around Matcher.find().
+ *
+ * The provided regexes should allow zero-length matches.
+ * This primarily tests the auto-advance feature of findAll() that
+ * occurs if the regex match is of zero length to see if it has the
+ * same behavior as Matcher.find()'s auto-advance (JDK-8150488).
+ * Without auto-advance, findAll() would return an infinite stream
+ * of zero-length matches. Apply a limit to the stream so
+ * that an infinite stream will be truncated. The limit must be
+ * high enough that the resulting truncated stream won't be
+ * mistaken for a correct expected result.
+ */
+ @Test(dataProvider = "FindAllZero")
+ public void findAllZeroTest(String input, String patternString) {
+ Pattern pattern = Pattern.compile(patternString);
+
+ // generate expected result using Matcher.find()
+ Matcher m = pattern.matcher(input);
+ List<String> expected = new ArrayList<>();
+ while (m.find()) {
+ expected.add(m.group());
+ }
+
+ Supplier<Stream<String>> ss = () -> new Scanner(input).findAll(pattern)
+ .limit(100)
+ .map(MatchResult::group);
+
+ withData(TestData.Factory.ofSupplier("findAllZeroTest", ss))
+ .stream(LambdaTestHelpers.identity())
+ .expectedResult(expected)
+ .exercise();
+ }
}