# HG changeset patch # User smarks # Date 1493406990 25200 # Node ID 30e9f88926124f480c571a92798c9bf58a0b7bd2 # Parent 5a486e0acd298c09a7a97e9b5d0c311c4d0d073a 8150488: Scanner.findAll() can return infinite stream if regex matches zero chars Reviewed-by: sherman diff -r 5a486e0acd29 -r 30e9f8892612 jdk/src/java.base/share/classes/java/util/Scanner.java --- a/jdk/src/java.base/share/classes/java/util/Scanner.java Fri Apr 28 14:16:33 2017 +0100 +++ b/jdk/src/java.base/share/classes/java/util/Scanner.java Fri Apr 28 12:16:30 2017 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2846,6 +2846,7 @@ class FindSpliterator extends Spliterators.AbstractSpliterator { final Pattern pattern; int expectedCount = -1; + private boolean advance = false; // true if we need to auto-advance FindSpliterator(Pattern pattern) { super(Long.MAX_VALUE, @@ -2861,12 +2862,15 @@ throw new ConcurrentModificationException(); } } else { + // init + matchValid = false; + matcher.usePattern(pattern); expectedCount = modCount; } while (true) { // assert expectedCount == modCount - if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount + if (nextInBuffer()) { // doesn't increment modCount cons.accept(matcher.toMatchResult()); if (expectedCount != modCount) { throw new ConcurrentModificationException(); @@ -2879,6 +2883,29 @@ return false; // reached end of input } } + + // reimplementation of findPatternInBuffer with auto-advance on zero-length matches + private boolean nextInBuffer() { + if (advance) { + if (position + 1 > buf.limit()) { + if (!sourceClosed) + needInput = true; + return false; + } + position++; + advance = false; + } + matcher.region(position, buf.limit()); + if (matcher.find() && (!matcher.hitEnd() || sourceClosed)) { + // Did not hit end, or hit real end + position = matcher.end(); + advance = matcher.start() == position; + return true; + } + if (!sourceClosed) + needInput = true; + return false; + } } /** Small LRU cache of Patterns. */ diff -r 5a486e0acd29 -r 30e9f8892612 jdk/test/java/util/Scanner/ScannerStreamTest.java --- a/jdk/test/java/util/Scanner/ScannerStreamTest.java Fri Apr 28 14:16:33 2017 +0100 +++ b/jdk/test/java/util/Scanner/ScannerStreamTest.java Fri Apr 28 12:16:30 2017 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,7 @@ import java.util.Scanner; import java.util.function.Consumer; import java.util.function.Supplier; +import java.util.regex.Matcher; import java.util.regex.MatchResult; import java.util.regex.Pattern; import java.util.stream.LambdaTestHelpers; @@ -44,7 +45,7 @@ /** * @test - * @bug 8072722 + * @bug 8072722 8150488 * @summary Tests of stream support in java.util.Scanner * @library ../stream/bootlib * @build java.base/java.util.stream.OpTestCase @@ -56,19 +57,22 @@ static File inputFile = new File(System.getProperty("test.src", "."), "input.txt"); - @DataProvider(name = "Patterns") - public static Object[][] makeStreamTestData() { + @DataProvider(name = "Tokens") + public static Object[][] makeTokensTestData() { // each inner array is [String description, String input, String delimiter] // delimiter may be null List data = new ArrayList<>(); data.add(new Object[] { "default delimiter", "abc def ghi", null }); data.add(new Object[] { "fixed delimiter", "abc,def,,ghi", "," }); - data.add(new Object[] { "regexp delimiter", "###abc##def###ghi###j", "#+" }); + data.add(new Object[] { "regex delimiter", "###abc##def###ghi###j", "#+" }); return data.toArray(new Object[0][]); } + /* + * Creates a scanner over the input, applying a delimiter if non-null. + */ Scanner makeScanner(String input, String delimiter) { Scanner sc = new Scanner(input); if (delimiter != null) { @@ -77,7 +81,11 @@ return sc; } - @Test(dataProvider = "Patterns") + /* + * Given input and a delimiter, tests that tokens() returns the same + * results that would be provided by a Scanner hasNext/next loop. + */ + @Test(dataProvider = "Tokens") public void tokensTest(String description, String input, String delimiter) { // derive expected result by using conventional loop Scanner sc = makeScanner(input, delimiter); @@ -93,6 +101,9 @@ .exercise(); } + /* + * Creates a Scanner over the given input file. + */ Scanner makeFileScanner(File file) { try { return new Scanner(file, "UTF-8"); @@ -101,7 +112,12 @@ } } - public void findAllTest() { + /* + * Tests that the matches produced by findAll(pat) are the same + * as what are returned by findWithinHorizon(pat, 0). This tests + * a single pattern against a single input file. + */ + public void findAllFileTest() { // derive expected result by using conventional loop Pattern pat = Pattern.compile("[A-Z]{7,}"); List expected = new ArrayList<>(); @@ -116,10 +132,66 @@ Supplier> ss = () -> makeFileScanner(inputFile).findAll(pat).map(MatchResult::group); - withData(TestData.Factory.ofSupplier("findAllTest", ss)) + withData(TestData.Factory.ofSupplier("findAllFileTest", ss)) .stream(LambdaTestHelpers.identity()) .expectedResult(expected) .exercise(); } + @DataProvider(name = "FindAllZero") + public static Object[][] makeFindAllZeroTestData() { + // each inner array is [String input, String patternString] + List data = new ArrayList<>(); + + data.add(new Object[] { "aaaaa", "a*" }); + data.add(new Object[] { "aaaaab", "a*" }); + data.add(new Object[] { "aaaaabb", "a*" }); + data.add(new Object[] { "aaaaabbb", "a*" }); + data.add(new Object[] { "aaabbaaaa", "a*" }); + data.add(new Object[] { "aaabbaaaab", "a*" }); + data.add(new Object[] { "aaabbaaaabb", "a*" }); + data.add(new Object[] { "aaabbaaaabbb", "a*" }); + data.add(new Object[] { "aaabbaaaa", "a*|b*" }); + data.add(new Object[] { "aaabbaaaab", "a*|b*" }); + data.add(new Object[] { "aaabbaaaabb", "a*|b*" }); + data.add(new Object[] { "aaabbaaaabbb", "a*|b*" }); + + return data.toArray(new Object[0][]); + } + + /* + * Tests findAll() using a pattern against an input string. + * The results from findAll() should equal the results obtained + * using a loop around Matcher.find(). + * + * The provided regexes should allow zero-length matches. + * This primarily tests the auto-advance feature of findAll() that + * occurs if the regex match is of zero length to see if it has the + * same behavior as Matcher.find()'s auto-advance (JDK-8150488). + * Without auto-advance, findAll() would return an infinite stream + * of zero-length matches. Apply a limit to the stream so + * that an infinite stream will be truncated. The limit must be + * high enough that the resulting truncated stream won't be + * mistaken for a correct expected result. + */ + @Test(dataProvider = "FindAllZero") + public void findAllZeroTest(String input, String patternString) { + Pattern pattern = Pattern.compile(patternString); + + // generate expected result using Matcher.find() + Matcher m = pattern.matcher(input); + List expected = new ArrayList<>(); + while (m.find()) { + expected.add(m.group()); + } + + Supplier> ss = () -> new Scanner(input).findAll(pattern) + .limit(100) + .map(MatchResult::group); + + withData(TestData.Factory.ofSupplier("findAllZeroTest", ss)) + .stream(LambdaTestHelpers.identity()) + .expectedResult(expected) + .exercise(); + } }