8150488: Scanner.findAll() can return infinite stream if regex matches zero chars
authorsmarks
Fri, 28 Apr 2017 12:16:30 -0700
changeset 44855 30e9f8892612
parent 44854 5a486e0acd29
child 44856 72e90db93f70
8150488: Scanner.findAll() can return infinite stream if regex matches zero chars Reviewed-by: sherman
jdk/src/java.base/share/classes/java/util/Scanner.java
jdk/test/java/util/Scanner/ScannerStreamTest.java
--- a/jdk/src/java.base/share/classes/java/util/Scanner.java	Fri Apr 28 14:16:33 2017 +0100
+++ b/jdk/src/java.base/share/classes/java/util/Scanner.java	Fri Apr 28 12:16:30 2017 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2846,6 +2846,7 @@
     class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
         final Pattern pattern;
         int expectedCount = -1;
+        private boolean advance = false; // true if we need to auto-advance
 
         FindSpliterator(Pattern pattern) {
             super(Long.MAX_VALUE,
@@ -2861,12 +2862,15 @@
                     throw new ConcurrentModificationException();
                 }
             } else {
+                // init
+                matchValid = false;
+                matcher.usePattern(pattern);
                 expectedCount = modCount;
             }
 
             while (true) {
                 // assert expectedCount == modCount
-                if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount
+                if (nextInBuffer()) { // doesn't increment modCount
                     cons.accept(matcher.toMatchResult());
                     if (expectedCount != modCount) {
                         throw new ConcurrentModificationException();
@@ -2879,6 +2883,29 @@
                     return false; // reached end of input
             }
         }
+
+        // reimplementation of findPatternInBuffer with auto-advance on zero-length matches
+        private boolean nextInBuffer() {
+            if (advance) {
+                if (position + 1 > buf.limit()) {
+                    if (!sourceClosed)
+                        needInput = true;
+                    return false;
+                }
+                position++;
+                advance = false;
+            }
+            matcher.region(position, buf.limit());
+            if (matcher.find() && (!matcher.hitEnd() || sourceClosed)) {
+                 // Did not hit end, or hit real end
+                 position = matcher.end();
+                 advance = matcher.start() == position;
+                 return true;
+            }
+            if (!sourceClosed)
+                needInput = true;
+            return false;
+        }
     }
 
     /** Small LRU cache of Patterns. */
--- a/jdk/test/java/util/Scanner/ScannerStreamTest.java	Fri Apr 28 14:16:33 2017 +0100
+++ b/jdk/test/java/util/Scanner/ScannerStreamTest.java	Fri Apr 28 12:16:30 2017 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,7 @@
 import java.util.Scanner;
 import java.util.function.Consumer;
 import java.util.function.Supplier;
+import java.util.regex.Matcher;
 import java.util.regex.MatchResult;
 import java.util.regex.Pattern;
 import java.util.stream.LambdaTestHelpers;
@@ -44,7 +45,7 @@
 
 /**
  * @test
- * @bug 8072722
+ * @bug 8072722 8150488
  * @summary Tests of stream support in java.util.Scanner
  * @library ../stream/bootlib
  * @build java.base/java.util.stream.OpTestCase
@@ -56,19 +57,22 @@
 
     static File inputFile = new File(System.getProperty("test.src", "."), "input.txt");
 
-    @DataProvider(name = "Patterns")
-    public static Object[][] makeStreamTestData() {
+    @DataProvider(name = "Tokens")
+    public static Object[][] makeTokensTestData() {
         // each inner array is [String description, String input, String delimiter]
         // delimiter may be null
         List<Object[]> data = new ArrayList<>();
 
         data.add(new Object[] { "default delimiter", "abc def ghi",           null });
         data.add(new Object[] { "fixed delimiter",   "abc,def,,ghi",          "," });
-        data.add(new Object[] { "regexp delimiter",  "###abc##def###ghi###j", "#+" });
+        data.add(new Object[] { "regex delimiter",   "###abc##def###ghi###j", "#+" });
 
         return data.toArray(new Object[0][]);
     }
 
+    /*
+     * Creates a scanner over the input, applying a delimiter if non-null.
+     */
     Scanner makeScanner(String input, String delimiter) {
         Scanner sc = new Scanner(input);
         if (delimiter != null) {
@@ -77,7 +81,11 @@
         return sc;
     }
 
-    @Test(dataProvider = "Patterns")
+    /*
+     * Given input and a delimiter, tests that tokens() returns the same
+     * results that would be provided by a Scanner hasNext/next loop.
+     */
+    @Test(dataProvider = "Tokens")
     public void tokensTest(String description, String input, String delimiter) {
         // derive expected result by using conventional loop
         Scanner sc = makeScanner(input, delimiter);
@@ -93,6 +101,9 @@
                 .exercise();
     }
 
+    /*
+     * Creates a Scanner over the given input file.
+     */
     Scanner makeFileScanner(File file) {
         try {
             return new Scanner(file, "UTF-8");
@@ -101,7 +112,12 @@
         }
     }
 
-    public void findAllTest() {
+    /*
+     * Tests that the matches produced by findAll(pat) are the same
+     * as what are returned by findWithinHorizon(pat, 0). This tests
+     * a single pattern against a single input file.
+     */
+    public void findAllFileTest() {
         // derive expected result by using conventional loop
         Pattern pat = Pattern.compile("[A-Z]{7,}");
         List<String> expected = new ArrayList<>();
@@ -116,10 +132,66 @@
         Supplier<Stream<String>> ss =
             () -> makeFileScanner(inputFile).findAll(pat).map(MatchResult::group);
 
-        withData(TestData.Factory.ofSupplier("findAllTest", ss))
+        withData(TestData.Factory.ofSupplier("findAllFileTest", ss))
                 .stream(LambdaTestHelpers.identity())
                 .expectedResult(expected)
                 .exercise();
     }
 
+    @DataProvider(name = "FindAllZero")
+    public static Object[][] makeFindAllZeroTestData() {
+        // each inner array is [String input, String patternString]
+        List<Object[]> data = new ArrayList<>();
+
+        data.add(new Object[] { "aaaaa",        "a*" });
+        data.add(new Object[] { "aaaaab",       "a*" });
+        data.add(new Object[] { "aaaaabb",      "a*" });
+        data.add(new Object[] { "aaaaabbb",     "a*" });
+        data.add(new Object[] { "aaabbaaaa",    "a*" });
+        data.add(new Object[] { "aaabbaaaab",   "a*" });
+        data.add(new Object[] { "aaabbaaaabb",  "a*" });
+        data.add(new Object[] { "aaabbaaaabbb", "a*" });
+        data.add(new Object[] { "aaabbaaaa",    "a*|b*" });
+        data.add(new Object[] { "aaabbaaaab",   "a*|b*" });
+        data.add(new Object[] { "aaabbaaaabb",  "a*|b*" });
+        data.add(new Object[] { "aaabbaaaabbb", "a*|b*" });
+
+        return data.toArray(new Object[0][]);
+    }
+
+    /*
+     * Tests findAll() using a pattern against an input string.
+     * The results from findAll() should equal the results obtained
+     * using a loop around Matcher.find().
+     *
+     * The provided regexes should allow zero-length matches.
+     * This primarily tests the auto-advance feature of findAll() that
+     * occurs if the regex match is of zero length to see if it has the
+     * same behavior as Matcher.find()'s auto-advance (JDK-8150488).
+     * Without auto-advance, findAll() would return an infinite stream
+     * of zero-length matches. Apply a limit to the stream so
+     * that an infinite stream will be truncated. The limit must be
+     * high enough that the resulting truncated stream won't be
+     * mistaken for a correct expected result.
+     */
+    @Test(dataProvider = "FindAllZero")
+    public void findAllZeroTest(String input, String patternString) {
+        Pattern pattern = Pattern.compile(patternString);
+
+        // generate expected result using Matcher.find()
+        Matcher m = pattern.matcher(input);
+        List<String> expected = new ArrayList<>();
+        while (m.find()) {
+            expected.add(m.group());
+        }
+
+        Supplier<Stream<String>> ss = () -> new Scanner(input).findAll(pattern)
+                                                              .limit(100)
+                                                              .map(MatchResult::group);
+
+        withData(TestData.Factory.ofSupplier("findAllZeroTest", ss))
+                .stream(LambdaTestHelpers.identity())
+                .expectedResult(expected)
+                .exercise();
+    }
 }