8024341: j.u.regex.Pattern.splitAsStream() doesn't correspond to split() method if using an example from the spec
authorpsandoz
Fri, 20 Sep 2013 17:11:32 -0700
changeset 20190 15c72885e3fd
parent 20189 1e618f2a82d9
child 20191 f0e23e7272d6
child 20192 1ed8a9f56dea
8024341: j.u.regex.Pattern.splitAsStream() doesn't correspond to split() method if using an example from the spec Reviewed-by: alanb
jdk/src/share/classes/java/util/regex/Pattern.java
jdk/test/java/util/regex/PatternStreamTest.java
jdk/test/java/util/regex/PatternTest.java
--- a/jdk/src/share/classes/java/util/regex/Pattern.java	Fri Sep 20 15:12:05 2013 -0700
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java	Fri Sep 20 17:11:32 2013 -0700
@@ -5755,7 +5755,8 @@
      * input sequence that is terminated by another subsequence that matches
      * this pattern or is terminated by the end of the input sequence.  The
      * substrings in the stream are in the order in which they occur in the
-     * input.
+     * input.  Trailing empty strings will be discarded and not encountered in
+     * the stream.
      *
      * <p> If this pattern does not match any subsequence of the input then
      * the resulting stream has just one element, namely the input sequence in
@@ -5781,6 +5782,8 @@
             private int current;
             // null if the next element, if any, needs to obtained
             private String nextElement;
+            // > 0 if there are N next empty elements
+            private int emptyElementCount;
 
             MatcherIterator() {
                 this.matcher = matcher(input);
@@ -5790,26 +5793,46 @@
                 if (!hasNext())
                     throw new NoSuchElementException();
 
-                String n = nextElement;
-                nextElement = null;
-                return n;
+                if (emptyElementCount == 0) {
+                    String n = nextElement;
+                    nextElement = null;
+                    return n;
+                } else {
+                    emptyElementCount--;
+                    return "";
+                }
             }
 
             public boolean hasNext() {
-                if (nextElement != null)
+                if (nextElement != null || emptyElementCount > 0)
                     return true;
 
                 if (current == input.length())
                     return false;
 
-                if (matcher.find()) {
+                // Consume the next matching element
+                // Count sequence of matching empty elements
+                while (matcher.find()) {
                     nextElement = input.subSequence(current, matcher.start()).toString();
                     current = matcher.end();
+                    if (!nextElement.isEmpty()) {
+                        return true;
+                    } else {
+                        emptyElementCount++;
+                    }
+                }
+
+                // Consume last matching element
+                nextElement = input.subSequence(current, input.length()).toString();
+                current = input.length();
+                if (!nextElement.isEmpty()) {
+                    return true;
                 } else {
-                    nextElement = input.subSequence(current, input.length()).toString();
-                    current = input.length();
+                    // Ignore a terminal sequence of matching empty elements
+                    emptyElementCount = 0;
+                    nextElement = null;
+                    return false;
                 }
-                return true;
             }
         }
         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/regex/PatternStreamTest.java	Fri Sep 20 17:11:32 2013 -0700
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8016846 8024341
+ * @summary Unit tests for wrapping classes should delegate to default methods
+ * @library ../stream/bootlib
+ * @build java.util.stream.OpTestCase
+ * @run testng/othervm PatternStreamTest
+ */
+
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+import java.util.regex.Pattern;
+import java.util.stream.LambdaTestHelpers;
+import java.util.stream.OpTestCase;
+import java.util.stream.Stream;
+import java.util.stream.TestData;
+
+@Test
+public class PatternStreamTest extends OpTestCase {
+
+    @DataProvider(name = "Stream<String>")
+    public static Object[][] makeStreamTestData() {
+        List<Object[]> data = new ArrayList<>();
+
+        String description = "";
+        String input = "awgqwefg1fefw4vssv1vvv1";
+        Pattern pattern = Pattern.compile("4");
+        List<String> expected = new ArrayList<>();
+        expected.add("awgqwefg1fefw");
+        expected.add("vssv1vvv1");
+
+        // Must match the type signature of the consumer of this data, testStrings
+        // String, String, Pattern, List<String>
+        data.add(new Object[]{description, input, pattern, expected});
+
+        input = "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh";
+        pattern = Pattern.compile("\u00a3a");
+        expected = new ArrayList<>();
+        expected.add("afbfq");
+        expected.add("bgwgb");
+        expected.add("wngnwggw");
+        expected.add("");
+        expected.add("hjrnhneerh");
+
+        data.add(new Object[]{description, input, pattern, expected});
+
+
+        input = "awgqwefg1fefw4vssv1vvv1";
+        pattern = Pattern.compile("1");
+        expected = new ArrayList<>();
+        expected.add("awgqwefg");
+        expected.add("fefw4vssv");
+        expected.add("vvv");
+
+        data.add(new Object[]{description, input, pattern, expected});
+
+
+        input = "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv";
+        pattern = Pattern.compile("1");
+        expected = new ArrayList<>();
+        expected.add("a\u4ebafg");
+        expected.add("fefw\u4eba4\u9f9cvssv\u9f9c");
+        expected.add("v\u672c\u672cvv");
+
+        data.add(new Object[]{description, input, pattern, expected});
+
+
+        input = "1\u56da23\u56da456\u56da7890";
+        pattern = Pattern.compile("\u56da");
+        expected = new ArrayList<>();
+        expected.add("1");
+        expected.add("23");
+        expected.add("456");
+        expected.add("7890");
+
+        data.add(new Object[]{description, input, pattern, expected});
+
+
+        input = "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890";
+        pattern = Pattern.compile("\u56da");
+        expected = new ArrayList<>();
+        expected.add("1");
+        expected.add("23\u9f9c\u672c\u672c");
+        expected.add("456");
+        expected.add("\u9f9c\u672c7890");
+
+        data.add(new Object[]{description, input, pattern, expected});
+
+
+        input = "";
+        pattern = Pattern.compile("\u56da");
+        expected = new ArrayList<>();
+
+        data.add(new Object[]{description, input, pattern, expected});
+
+
+        description = "Multiple separators";
+        input = "This is,testing: with\tdifferent separators.";
+        pattern = Pattern.compile("[ \t,:.]");
+        expected = new ArrayList<>();
+        expected.add("This");
+        expected.add("is");
+        expected.add("testing");
+        expected.add("");
+        expected.add("with");
+        expected.add("different");
+        expected.add("separators");
+
+
+        description = "Repeated separators within and at end";
+        input = "boo:and:foo";
+        pattern = Pattern.compile("o");
+        expected = new ArrayList<>();
+        expected.add("b");
+        expected.add("");
+        expected.add(":and:f");
+
+
+        description = "Many repeated separators within and at end";
+        input = "booooo:and:fooooo";
+        pattern = Pattern.compile("o");
+        expected = new ArrayList<>();
+        expected.add("b");
+        expected.add("");
+        expected.add("");
+        expected.add("");
+        expected.add("");
+        expected.add(":and:f");
+
+        description = "Many repeated separators before last match";
+        input = "fooooo:";
+        pattern = Pattern.compile("o");
+        expected = new ArrayList<>();
+        expected.add("f");
+        expected.add("");
+        expected.add("");
+        expected.add("");
+        expected.add("");
+        expected.add(":");
+
+        data.add(new Object[] {description, input, pattern, expected});
+        return data.toArray(new Object[0][]);
+    }
+
+    @Test(dataProvider = "Stream<String>")
+    public void testStrings(String description, String input, Pattern pattern, List<String> expected) {
+        Supplier<Stream<String>> ss =  () -> pattern.splitAsStream(input);
+        withData(TestData.Factory.ofSupplier(description, ss))
+                .stream(LambdaTestHelpers.identity())
+                .expectedResult(expected)
+                .exercise();
+    }
+}
--- a/jdk/test/java/util/regex/PatternTest.java	Fri Sep 20 15:12:05 2013 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/**
- * @test
- * @summary Unit tests for wrapping classes should delegate to default methods
- * @library ../stream/bootlib
- * @build java.util.stream.OpTestCase
- * @run testng/othervm PatternTest
- */
-
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.function.Supplier;
-import java.util.regex.Pattern;
-import java.util.stream.LambdaTestHelpers;
-import java.util.stream.OpTestCase;
-import java.util.stream.Stream;
-import java.util.stream.TestData;
-
-@Test
-public class PatternTest extends OpTestCase {
-
-    @DataProvider(name = "Stream<String>")
-    public static Object[][] makeStreamTestData() {
-        List<Object[]> data = new ArrayList<>();
-
-        String description = "";
-        String input = "awgqwefg1fefw4vssv1vvv1";
-        Pattern pattern = Pattern.compile("4");
-        List<String> expected = new ArrayList<>();
-        expected.add("awgqwefg1fefw");
-        expected.add("vssv1vvv1");
-
-        // Must match the type signature of the consumer of this data, testStrings
-        // String, String, Pattern, List<String>
-        data.add(new Object[]{description, input, pattern, expected});
-
-        input = "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh";
-        pattern = Pattern.compile("\u00a3a");
-        expected = new ArrayList<>();
-        expected.add("afbfq");
-        expected.add("bgwgb");
-        expected.add("wngnwggw");
-        expected.add("");
-        expected.add("hjrnhneerh");
-
-        data.add(new Object[]{description, input, pattern, expected});
-
-
-        input = "awgqwefg1fefw4vssv1vvv1";
-        pattern = Pattern.compile("1");
-        expected = new ArrayList<>();
-        expected.add("awgqwefg");
-        expected.add("fefw4vssv");
-        expected.add("vvv");
-
-        data.add(new Object[]{description, input, pattern, expected});
-
-
-        input = "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv";
-        pattern = Pattern.compile("1");
-        expected = new ArrayList<>();
-        expected.add("a\u4ebafg");
-        expected.add("fefw\u4eba4\u9f9cvssv\u9f9c");
-        expected.add("v\u672c\u672cvv");
-
-        data.add(new Object[]{description, input, pattern, expected});
-
-
-        input = "1\u56da23\u56da456\u56da7890";
-        pattern = Pattern.compile("\u56da");
-        expected = new ArrayList<>();
-        expected.add("1");
-        expected.add("23");
-        expected.add("456");
-        expected.add("7890");
-
-        data.add(new Object[]{description, input, pattern, expected});
-
-
-        input = "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890";
-        pattern = Pattern.compile("\u56da");
-        expected = new ArrayList<>();
-        expected.add("1");
-        expected.add("23\u9f9c\u672c\u672c");
-        expected.add("456");
-        expected.add("\u9f9c\u672c7890");
-
-        data.add(new Object[]{description, input, pattern, expected});
-
-
-        input = "";
-        pattern = Pattern.compile("\u56da");
-        expected = new ArrayList<>();
-
-        data.add(new Object[]{description, input, pattern, expected});
-
-
-        description = "Multiple separators";
-        input = "This is,testing: with\tdifferent separators.";
-        pattern = Pattern.compile("[ \t,:.]");
-        expected = new ArrayList<>();
-        expected.add("This");
-        expected.add("is");
-        expected.add("testing");
-        expected.add("");
-        expected.add("with");
-        expected.add("different");
-        expected.add("separators");
-
-        data.add(new Object[] {description, input, pattern, expected});
-        return data.toArray(new Object[0][]);
-    }
-
-    @Test(dataProvider = "Stream<String>")
-    public void testStrings(String description, String input, Pattern pattern, List<String> expected) {
-        Supplier<Stream<String>> ss =  () -> pattern.splitAsStream(input);
-        withData(TestData.Factory.ofSupplier(description, ss))
-                .stream(LambdaTestHelpers.identity())
-                .expectedResult(expected)
-                .exercise();
-    }
-}