8200380: String::lines
authorjlaskey
Tue, 22 May 2018 12:04:05 -0300
changeset 50215 2fb27c352cae
parent 50214 9bdd233f270d
child 50216 f4fd580dd7d1
8200380: String::lines Reviewed-by: sundar, sherman, rriggs, psandoz
src/java.base/share/classes/java/lang/String.java
src/java.base/share/classes/java/lang/StringLatin1.java
src/java.base/share/classes/java/lang/StringUTF16.java
test/jdk/java/lang/String/Lines.java
--- a/src/java.base/share/classes/java/lang/String.java	Tue May 22 12:26:17 2018 +0200
+++ b/src/java.base/share/classes/java/lang/String.java	Tue May 22 12:04:05 2018 -0300
@@ -41,6 +41,7 @@
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 import java.util.stream.IntStream;
+import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import jdk.internal.HotSpotIntrinsicCandidate;
 import jdk.internal.vm.annotation.Stable;
@@ -2754,6 +2755,39 @@
     }
 
     /**
+     * Returns a stream of substrings extracted from this string
+     * partitioned by line terminators.
+     * <p>
+     * Line terminators recognized are line feed
+     * {@code "\n"} ({@code U+000A}),
+     * carriage return
+     * {@code "\r"} ({@code U+000D})
+     * and a carriage return followed immediately by a line feed
+     * {@code "\r\n"} ({@code U+000D U+000A}).
+     * <p>
+     * The stream returned by this method contains each line of
+     * this string that is terminated by a line terminator except that
+     * the last line can either be terminated by a line terminator or the
+     * end of the string.
+     * The lines in the stream are in the order in which
+     * they occur in this string and do not include the line terminators
+     * partitioning the lines.
+     *
+     * @implNote This method provides better performance than
+     *           split("\R") by supplying elements lazily and
+     *           by faster search of new line terminators.
+     *
+     * @return  the stream of strings extracted from this string
+     *          partitioned by line terminators
+     *
+     * @since 11
+     */
+    public Stream<String> lines() {
+        return isLatin1() ? StringLatin1.lines(value)
+                          : StringUTF16.lines(value);
+    }
+
+    /**
      * This object (which is already a string!) is itself returned.
      *
      * @return  the string itself.
--- a/src/java.base/share/classes/java/lang/StringLatin1.java	Tue May 22 12:26:17 2018 +0200
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java	Tue May 22 12:04:05 2018 -0300
@@ -29,8 +29,11 @@
 import java.util.Locale;
 import java.util.Objects;
 import java.util.Spliterator;
+import java.util.function.Consumer;
 import java.util.function.IntConsumer;
 import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 import jdk.internal.HotSpotIntrinsicCandidate;
 
 import static java.lang.String.LATIN1;
@@ -589,6 +592,100 @@
         return (right != value.length) ? newString(value, 0, right) : null;
     }
 
+    private final static class LinesSpliterator implements Spliterator<String> {
+        private byte[] value;
+        private int index;        // current index, modified on advance/split
+        private final int fence;  // one past last index
+
+        LinesSpliterator(byte[] value) {
+            this(value, 0, value.length);
+        }
+
+        LinesSpliterator(byte[] value, int start, int length) {
+            this.value = value;
+            this.index = start;
+            this.fence = start + length;
+        }
+
+        private int indexOfLineSeparator(int start) {
+            for (int current = start; current < fence; current++) {
+                byte ch = value[current];
+                if (ch == '\n' || ch == '\r') {
+                    return current;
+                }
+            }
+            return fence;
+        }
+
+        private int skipLineSeparator(int start) {
+            if (start < fence) {
+                if (value[start] == '\r') {
+                    int next = start + 1;
+                    if (next < fence && value[next] == '\n') {
+                        return next + 1;
+                    }
+                }
+                return start + 1;
+            }
+            return fence;
+        }
+
+        private String next() {
+            int start = index;
+            int end = indexOfLineSeparator(start);
+            index = skipLineSeparator(end);
+            return newString(value, start, end - start);
+        }
+
+        @Override
+        public boolean tryAdvance(Consumer<? super String> action) {
+            if (action == null) {
+                throw new NullPointerException("tryAdvance action missing");
+            }
+            if (index != fence) {
+                action.accept(next());
+                return true;
+            }
+            return false;
+        }
+
+        @Override
+        public void forEachRemaining(Consumer<? super String> action) {
+            if (action == null) {
+                throw new NullPointerException("forEachRemaining action missing");
+            }
+            while (index != fence) {
+                action.accept(next());
+            }
+        }
+
+        @Override
+        public Spliterator<String> trySplit() {
+            int half = (fence + index) >>> 1;
+            int mid = skipLineSeparator(indexOfLineSeparator(half));
+            if (mid < fence) {
+                int start = index;
+                index = mid;
+                return new LinesSpliterator(value, start, mid - start);
+            }
+            return null;
+        }
+
+        @Override
+        public long estimateSize() {
+            return fence - index + 1;
+        }
+
+        @Override
+        public int characteristics() {
+            return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
+        }
+    }
+
+    static Stream<String> lines(byte[] value) {
+        return StreamSupport.stream(new LinesSpliterator(value), false);
+    }
+
     public static void putChar(byte[] val, int index, int c) {
         //assert (canEncode(c));
         val[index] = (byte)(c);
--- a/src/java.base/share/classes/java/lang/StringUTF16.java	Tue May 22 12:26:17 2018 +0200
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java	Tue May 22 12:04:05 2018 -0300
@@ -28,7 +28,10 @@
 import java.util.Arrays;
 import java.util.Locale;
 import java.util.Spliterator;
+import java.util.function.Consumer;
 import java.util.function.IntConsumer;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 import jdk.internal.HotSpotIntrinsicCandidate;
 import jdk.internal.vm.annotation.ForceInline;
 import jdk.internal.vm.annotation.DontInline;
@@ -911,6 +914,100 @@
         return (right != length) ? newString(value, 0, right) : null;
     }
 
+    private final static class LinesSpliterator implements Spliterator<String> {
+        private byte[] value;
+        private int index;        // current index, modified on advance/split
+        private final int fence;  // one past last index
+
+        LinesSpliterator(byte[] value) {
+            this(value, 0, value.length >>> 1);
+        }
+
+        LinesSpliterator(byte[] value, int start, int length) {
+            this.value = value;
+            this.index = start;
+            this.fence = start + length;
+        }
+
+        private int indexOfLineSeparator(int start) {
+            for (int current = start; current < fence; current++) {
+                char ch = getChar(value, current);
+                if (ch == '\n' || ch == '\r') {
+                    return current;
+                }
+            }
+            return fence;
+        }
+
+        private int skipLineSeparator(int start) {
+            if (start < fence) {
+                if (getChar(value, start) == '\r') {
+                    int next = start + 1;
+                    if (next < fence && getChar(value, next) == '\n') {
+                        return next + 1;
+                    }
+                }
+                return start + 1;
+            }
+            return fence;
+        }
+
+        private String next() {
+            int start = index;
+            int end = indexOfLineSeparator(start);
+            index = skipLineSeparator(end);
+            return newString(value, start, end - start);
+        }
+
+        @Override
+        public boolean tryAdvance(Consumer<? super String> action) {
+            if (action == null) {
+                throw new NullPointerException("tryAdvance action missing");
+            }
+            if (index != fence) {
+                action.accept(next());
+                return true;
+            }
+            return false;
+        }
+
+        @Override
+        public void forEachRemaining(Consumer<? super String> action) {
+            if (action == null) {
+                throw new NullPointerException("forEachRemaining action missing");
+            }
+            while (index != fence) {
+                action.accept(next());
+            }
+        }
+
+        @Override
+        public Spliterator<String> trySplit() {
+            int half = (fence + index) >>> 1;
+            int mid = skipLineSeparator(indexOfLineSeparator(half));
+            if (mid < fence) {
+                int start = index;
+                index = mid;
+                return new LinesSpliterator(value, start, mid - start);
+            }
+            return null;
+        }
+
+        @Override
+        public long estimateSize() {
+            return fence - index + 1;
+        }
+
+        @Override
+        public int characteristics() {
+            return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
+        }
+    }
+
+    static Stream<String> lines(byte[] value) {
+        return StreamSupport.stream(new LinesSpliterator(value), false);
+    }
+
     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
         while (off < end) {
             putChar(val, index++, str[off++]);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/String/Lines.java	Tue May 22 12:04:05 2018 -0300
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @summary Basic lines functionality
+ * @bug 8200380
+ * @run main/othervm Lines
+ */
+
+import java.util.Iterator;
+import java.util.stream.Stream;
+import java.io.BufferedReader;
+import java.io.StringReader;
+
+public class Lines {
+    public static void main(String... arg) {
+        testLines();
+    }
+
+    /*
+     * Test with strings
+     */
+    static void testLines() {
+        testString("");
+        testString(" ");
+        testString("\n");
+        testString("\n\n\n");
+        testString("\r\r\r");
+        testString("\r\n\r\n\r\n");
+        testString("\n\r\r\n");
+        testString("abc\ndef\nghi\n");
+        testString("abc\ndef\nghi");
+        testString("abc\rdef\rghi\r");
+        testString("abc\rdef\rghi");
+        testString("abc\r\ndef\r\nghi\r\n");
+        testString("abc\r\ndef\r\nghi");
+
+        testString("\2022");
+        testString("\2022\n");
+        testString("\2022\n\2022\n\2022\n");
+        testString("\2022\r\2022\r\2022\r");
+        testString("\2022\r\n\2022\r\n\2022\r\n");
+        testString("\2022\n\2022\r\2022\r\n");
+        testString("abc\2022\ndef\2022\nghi\2022\n");
+        testString("abc\2022\ndef\2022\nghi\2022");
+        testString("abc\2022\rdef\2022\rghi\2022\r");
+        testString("abc\2022\rdef\2022\rghi\2022");
+        testString("abc\2022\r\ndef\2022\r\nghi\2022\r\n");
+        testString("abc\2022\r\ndef\2022\r\nghi\2022");
+        testString("\2022\n\n\n");
+    }
+
+    static void testString(String string) {
+        Stream<String> lines = string.lines();
+        Stream<String> brLines = new BufferedReader(new StringReader(string)).lines();
+
+        Iterator<String> iterator = lines.iterator();
+        Iterator<String> brIterator = brLines.iterator();
+        int count = 0;
+
+        while (iterator.hasNext() && brIterator.hasNext()) {
+            count++;
+            String line = iterator.next();
+            String brLine = brIterator.next();
+
+            if (!line.equals(brLine)) {
+                String replace = string.replaceAll("\n", "\\n").replaceAll("\r", "\\r");
+                System.err.format("Mismatch at line %d of \"%s\"%n", count, replace);
+                throw new RuntimeException();
+            }
+        }
+
+        if (iterator.hasNext() || brIterator.hasNext()) {
+            System.err.format("Mismatch after line %d of \"%s\"%n", count, string);
+            throw new RuntimeException();
+        }
+    }
+}