8071477: Better Spliterator implementations for String.chars() and String.codePoints()
authorpsandoz
Mon, 26 Jan 2015 17:26:49 +0000
changeset 28667 2245cc40bf5d
parent 28666 49cdfa0ea390
child 28668 a5b9168af1bb
child 28758 343c4ddd1520
8071477: Better Spliterator implementations for String.chars() and String.codePoints() Reviewed-by: sherman
jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java
jdk/src/java.base/share/classes/java/lang/String.java
jdk/test/TEST.groups
jdk/test/java/lang/CharSequence/DefaultTest.java
jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java
jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java
--- a/jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java	Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java	Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,9 @@
 
 import sun.misc.FloatingDecimal;
 import java.util.Arrays;
+import java.util.Spliterator;
+import java.util.stream.IntStream;
+import java.util.stream.StreamSupport;
 
 /**
  * A mutable sequence of characters.
@@ -292,7 +295,7 @@
         if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) {
             throw new IndexOutOfBoundsException();
         }
-        return Character.codePointCountImpl(value, beginIndex, endIndex-beginIndex);
+        return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
     }
 
     /**
@@ -1432,6 +1435,34 @@
     public abstract String toString();
 
     /**
+     * {@inheritDoc}
+     * @since 1.9
+     */
+    @Override
+    public IntStream chars() {
+        // Reuse String-based spliterator. This requires a supplier to
+        // capture the value and count when the terminal operation is executed
+        return StreamSupport.intStream(
+                () -> new String.IntCharArraySpliterator(value, 0, count, 0),
+                Spliterator.ORDERED | Spliterator.SIZED | Spliterator.SUBSIZED,
+                false);
+    }
+
+    /**
+     * {@inheritDoc}
+     * @since 1.9
+     */
+    @Override
+    public IntStream codePoints() {
+        // Reuse String-based spliterator. This requires a supplier to
+        // capture the value and count when the terminal operation is executed
+        return StreamSupport.intStream(
+                () -> new String.CodePointsSpliterator(value, 0, count, 0),
+                Spliterator.ORDERED,
+                false);
+    }
+
+    /**
      * Needed by {@code String} for the contentEquals method.
      */
     final char[] getValue() {
--- a/jdk/src/java.base/share/classes/java/lang/String.java	Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/src/java.base/share/classes/java/lang/String.java	Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1994, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,10 +34,14 @@
 import java.util.Formatter;
 import java.util.Locale;
 import java.util.Objects;
+import java.util.Spliterator;
 import java.util.StringJoiner;
+import java.util.function.IntConsumer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
+import java.util.stream.IntStream;
+import java.util.stream.StreamSupport;
 
 /**
  * The {@code String} class represents character strings. All
@@ -2894,6 +2898,180 @@
         return this;
     }
 
+    static class IntCharArraySpliterator implements Spliterator.OfInt {
+        private final char[] array;
+        private int index;        // current index, modified on advance/split
+        private final int fence;  // one past last index
+        private final int cs;
+
+        IntCharArraySpliterator(char[] array, int acs) {
+            this(array, 0, array.length, acs);
+        }
+
+        IntCharArraySpliterator(char[] array, int origin, int fence, int acs) {
+            this.array = array;
+            this.index = origin;
+            this.fence = fence;
+            this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
+                      | Spliterator.SUBSIZED;
+        }
+
+        @Override
+        public OfInt trySplit() {
+            int lo = index, mid = (lo + fence) >>> 1;
+            return (lo >= mid)
+                   ? null
+                   : new IntCharArraySpliterator(array, lo, index = mid, cs);
+        }
+
+        @Override
+        public void forEachRemaining(IntConsumer action) {
+            char[] a; int i, hi; // hoist accesses and checks from loop
+            if (action == null)
+                throw new NullPointerException();
+            if ((a = array).length >= (hi = fence) &&
+                (i = index) >= 0 && i < (index = hi)) {
+                do { action.accept(a[i]); } while (++i < hi);
+            }
+        }
+
+        @Override
+        public boolean tryAdvance(IntConsumer action) {
+            if (action == null)
+                throw new NullPointerException();
+            if (index >= 0 && index < fence) {
+                action.accept(array[index++]);
+                return true;
+            }
+            return false;
+        }
+
+        @Override
+        public long estimateSize() { return (long)(fence - index); }
+
+        @Override
+        public int characteristics() {
+            return cs;
+        }
+    }
+
+    /**
+     * Returns a stream of {@code int} zero-extending the {@code char} values
+     * from this sequence.  Any char which maps to a <a
+     * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
+     * point</a> is passed through uninterpreted.
+     *
+     * @return an IntStream of char values from this sequence
+     * @since 1.9
+     */
+    @Override
+    public IntStream chars() {
+        return StreamSupport.intStream(
+                new IntCharArraySpliterator(value, Spliterator.IMMUTABLE), false);
+    }
+
+    static class CodePointsSpliterator implements Spliterator.OfInt {
+        private final char[] array;
+        private int index;        // current index, modified on advance/split
+        private final int fence;  // one past last index
+        private final int cs;
+
+        CodePointsSpliterator(char[] array, int acs) {
+            this(array, 0, array.length, acs);
+        }
+
+        CodePointsSpliterator(char[] array, int origin, int fence, int acs) {
+            this.array = array;
+            this.index = origin;
+            this.fence = fence;
+            this.cs = acs | Spliterator.ORDERED;
+        }
+
+        @Override
+        public OfInt trySplit() {
+            int lo = index, mid = (lo + fence) >>> 1;
+            if (lo >= mid)
+                return null;
+
+            int midOneLess;
+            // If the mid-point intersects a surrogate pair
+            if (Character.isLowSurrogate(array[mid]) &&
+                Character.isHighSurrogate(array[midOneLess = (mid -1)])) {
+                // If there is only one pair it cannot be split
+                if (lo >= midOneLess)
+                    return null;
+                // Shift the mid-point to align with the surrogate pair
+                return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
+            }
+            return new CodePointsSpliterator(array, lo, index = mid, cs);
+        }
+
+        @Override
+        public void forEachRemaining(IntConsumer action) {
+            char[] a; int i, hi; // hoist accesses and checks from loop
+            if (action == null)
+                throw new NullPointerException();
+            if ((a = array).length >= (hi = fence) &&
+                (i = index) >= 0 && i < (index = hi)) {
+                do {
+                    i = advance(a, i, hi, action);
+                } while (i < hi);
+            }
+        }
+
+        @Override
+        public boolean tryAdvance(IntConsumer action) {
+            if (action == null)
+                throw new NullPointerException();
+            if (index >= 0 && index < fence) {
+                index = advance(array, index, fence, action);
+                return true;
+            }
+            return false;
+        }
+
+        // Advance one code point from the index, i, and return the next
+        // index to advance from
+        private static int advance(char[] a, int i, int hi, IntConsumer action) {
+            char c1 = a[i++];
+            int cp = c1;
+            if (Character.isHighSurrogate(c1) && i < hi) {
+                char c2 = a[i];
+                if (Character.isLowSurrogate(c2)) {
+                    i++;
+                    cp = Character.toCodePoint(c1, c2);
+                }
+            }
+            action.accept(cp);
+            return i;
+        }
+
+        @Override
+        public long estimateSize() { return (long)(fence - index); }
+
+        @Override
+        public int characteristics() {
+            return cs;
+        }
+    }
+
+    /**
+     * Returns a stream of code point values from this sequence.  Any surrogate
+     * pairs encountered in the sequence are combined as if by {@linkplain
+     * Character#toCodePoint Character.toCodePoint} and the result is passed
+     * to the stream. Any other code units, including ordinary BMP characters,
+     * unpaired surrogates, and undefined code units, are zero-extended to
+     * {@code int} values which are then passed to the stream.
+     *
+     * @return an IntStream of Unicode code points from this sequence
+     * @since 1.9
+     */
+    @Override
+    public IntStream codePoints() {
+        return StreamSupport.intStream(
+                new CodePointsSpliterator(value, Spliterator.IMMUTABLE), false);
+    }
+
     /**
      * Converts this string to a new character array.
      *
--- a/jdk/test/TEST.groups	Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/test/TEST.groups	Mon Jan 26 17:26:49 2015 +0000
@@ -628,7 +628,6 @@
   sun/net/www/protocol/http \
   java/io/BufferedReader/Lines.java  \
   java/lang/reflect/DefaultStaticTest/DefaultStaticInvokeTest.java \
-  java/lang/CharSequence/DefaultTest.java  \
   java/lang/IntegralPrimitiveToString.java  \
   java/lang/PrimitiveSumMinMaxTest.java  \
   java/lang/String/StringJoinTest.java  \
--- a/jdk/test/java/lang/CharSequence/DefaultTest.java	Mon Jan 26 17:06:00 2015 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator;
-import java.util.Spliterator;
-import java.util.stream.Collectors;
-
-import org.testng.annotations.Test;
-
-import static org.testng.Assert.*;
-
-/*
- * @test
- * @summary Unit test for CharSequence default methods
- * @bug 8012665 8025002
- * @run testng DefaultTest
- */
-
-@Test(groups = "lib")
-public class DefaultTest {
-
-    @Test(expectedExceptions = NoSuchElementException.class)
-    public void testEmptyChars() {
-        PrimitiveIterator.OfInt s = "".chars().iterator();
-        assertFalse(s.hasNext());
-        int ch = s.nextInt();
-    }
-
-    public void testSimpleChars() {
-        List<Integer> list = "abc".chars().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList((int) 'a', (int) 'b', (int) 'c'));
-    }
-
-    public void testCodePointsCharacteristics() {
-        Spliterator.OfInt s = "".codePoints().spliterator();
-        assertFalse(s.hasCharacteristics(Spliterator.SIZED | Spliterator.SUBSIZED));
-        assertTrue(s.hasCharacteristics(Spliterator.ORDERED));
-    }
-
-    @Test(expectedExceptions = NoSuchElementException.class)
-    public void testEmptyCodePoints() {
-        PrimitiveIterator.OfInt s = "".codePoints().iterator();
-        assertFalse(s.hasNext());
-        int cp = s.nextInt();
-    }
-
-    public void testSimpleCodePoints() {
-        List<Integer> list = "abc".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList((int)'a', (int)'b', (int)'c'));
-    }
-
-    public void testUndefCodePoints() {
-        List<Integer> list = "X\ufffeY".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList((int)'X', 0xFFFE, (int)'Y'));
-    }
-
-    public void testSurrogatePairing() {
-        // U+1D11E = MUSICAL SYMBOL G CLEF
-        // equivalent to surrogate pair U+D834 U+DD1E
-        List<Integer> list;
-        final int GCLEF = 0x1d11e;
-
-        list = "\ud834\udd1e".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList(GCLEF));
-        list = "A\ud834\udd1e".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList((int)'A', GCLEF));
-        list = "\ud834\udd1eB".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList(GCLEF, (int)'B'));
-        list = "X\ud834\udd1eY".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList((int)'X', GCLEF, (int)'Y'));
-    }
-
-    public void testUndefUnpaired() {
-        List<Integer> list = "W\udd1eX\ud834Y\ufffeZ".codePoints().boxed().collect(Collectors.toList());
-        assertEquals(list, Arrays.asList(
-            (int)'W', 0xdd1e, (int)'X', 0xd834, (int)'Y', 0xfffe, (int)'Z'));
-    }
-}
--- a/jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java	Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java	Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
 
 /**
  * @test
- * @bug 8020156 8020009 8022326 8012913 8024405 8024408
+ * @bug 8020156 8020009 8022326 8012913 8024405 8024408 8071477
  * @run testng SpliteratorCharacteristics
  */
 
@@ -59,6 +59,57 @@
 @Test
 public class SpliteratorCharacteristics {
 
+    public void testSpliteratorFromCharSequence() {
+        class CharSequenceImpl implements CharSequence {
+            final String s;
+
+            public CharSequenceImpl(String s) {
+                this.s = s;
+            }
+
+            @Override
+            public int length() {
+                return s.length();
+            }
+
+            @Override
+            public char charAt(int index) {
+                return s.charAt(index);
+            }
+
+            @Override
+            public CharSequence subSequence(int start, int end) {
+                return s.subSequence(start, end);
+            }
+
+            @Override
+            public String toString() {
+                return s;
+            }
+        }
+
+        CharSequence cs = "A";
+        Spliterator.OfInt s = cs.chars().spliterator();
+        assertCharacteristics(s, Spliterator.IMMUTABLE | Spliterator.ORDERED |
+                                 Spliterator.SIZED | Spliterator.SUBSIZED);
+        assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+        s = cs.codePoints().spliterator();
+        assertCharacteristics(s, Spliterator.IMMUTABLE | Spliterator.ORDERED);
+        assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+
+        for (CharSequence c : Arrays.asList(new CharSequenceImpl("A"),
+                                             new StringBuilder("A"),
+                                             new StringBuffer("A"))) {
+            s = cs.chars().spliterator();
+            assertCharacteristics(s, Spliterator.ORDERED |
+                                     Spliterator.SIZED | Spliterator.SUBSIZED);
+            assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+            s = cs.codePoints().spliterator();
+            assertCharacteristics(s, Spliterator.ORDERED);
+            assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+        }
+    }
+
     public void testSpliteratorFromCollection() {
         List<Integer> l = Arrays.asList(1, 2, 3, 4);
 
--- a/jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java	Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java	Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @summary Spliterator traversing and splitting tests
  * @run testng SpliteratorTraversingAndSplittingTest
- * @bug 8020016
+ * @bug 8020016 8071477
  */
 
 import org.testng.annotations.DataProvider;
@@ -85,7 +85,38 @@
 @Test
 public class SpliteratorTraversingAndSplittingTest {
 
-    private static List<Integer> SIZES = Arrays.asList(0, 1, 10, 100, 1000);
+    private static final List<Integer> SIZES = Arrays.asList(0, 1, 10, 100, 1000);
+
+    private static final String LOW = new String(new char[] {Character.MIN_LOW_SURROGATE});
+    private static final String HIGH = new String(new char[] {Character.MIN_HIGH_SURROGATE});
+    private static final String HIGH_LOW = HIGH + LOW;
+    private static final String CHAR_HIGH_LOW = "A" + HIGH_LOW;
+    private static final String HIGH_LOW_CHAR = HIGH_LOW + "A";
+    private static final String CHAR_HIGH_LOW_CHAR = "A" + HIGH_LOW + "A";
+
+    private static final List<String> STRINGS = generateTestStrings();
+
+    private static List<String> generateTestStrings() {
+        List<String> strings = new ArrayList<>();
+        for (int n : Arrays.asList(1, 2, 3, 16, 17)) {
+            strings.add(generate("A", n));
+            strings.add(generate(LOW, n));
+            strings.add(generate(HIGH, n));
+            strings.add(generate(HIGH_LOW, n));
+            strings.add(generate(CHAR_HIGH_LOW, n));
+            strings.add(generate(HIGH_LOW_CHAR, n));
+            strings.add(generate(CHAR_HIGH_LOW_CHAR, n));
+        }
+        return strings;
+    }
+
+    private static String generate(String s, int n) {
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < n; i++) {
+            sb.append(s);
+        }
+        return sb.toString();
+    }
 
     private static class SpliteratorDataBuilder<T> {
         List<Object[]> data;
@@ -564,6 +595,60 @@
         }
     }
 
+    private static class SpliteratorOfIntCharDataBuilder {
+        List<Object[]> data;
+
+        String s;
+
+        List<Integer> expChars;
+
+        List<Integer> expCodePoints;
+
+        SpliteratorOfIntCharDataBuilder(List<Object[]> data, String s) {
+            this.data = data;
+            this.s = s;
+            this.expChars = transform(s, false);
+            this.expCodePoints = transform(s, true);
+        }
+
+        static List<Integer> transform(String s, boolean toCodePoints) {
+            List<Integer> l = new ArrayList<>();
+
+            if (!toCodePoints) {
+                for (int i = 0; i < s.length(); i++) {
+                    l.add((int) s.charAt(i));
+                }
+            }
+            else {
+                for (int i = 0; i < s.length();) {
+                    char c1 = s.charAt(i++);
+                    int cp = c1;
+                    if (Character.isHighSurrogate(c1) && i < s.length()) {
+                        char c2 = s.charAt(i);
+                        if (Character.isLowSurrogate(c2)) {
+                            i++;
+                            cp = Character.toCodePoint(c1, c2);
+                        }
+                    }
+                    l.add(cp);
+                }
+            }
+            return l;
+        }
+
+        void add(String description, Function<String, CharSequence> f) {
+            description = description.replace("%s", s);
+            {
+                Supplier<Spliterator.OfInt> supplier = () -> f.apply(s).chars().spliterator();
+                data.add(new Object[]{description + ".chars().spliterator()", expChars, supplier});
+            }
+            {
+                Supplier<Spliterator.OfInt> supplier = () -> f.apply(s).codePoints().spliterator();
+                data.add(new Object[]{description + ".codePoints().spliterator()", expCodePoints, supplier});
+            }
+        }
+    }
+
     static Object[][] spliteratorOfIntDataProvider;
 
     @DataProvider(name = "Spliterator.OfInt")
@@ -615,6 +700,43 @@
                    () -> new IntSpliteratorFromArray(exp));
         }
 
+        // Class for testing default methods
+        class CharSequenceImpl implements CharSequence {
+            final String s;
+
+            public CharSequenceImpl(String s) {
+                this.s = s;
+            }
+
+            @Override
+            public int length() {
+                return s.length();
+            }
+
+            @Override
+            public char charAt(int index) {
+                return s.charAt(index);
+            }
+
+            @Override
+            public CharSequence subSequence(int start, int end) {
+                return s.subSequence(start, end);
+            }
+
+            @Override
+            public String toString() {
+                return s;
+            }
+        }
+
+        for (String string : STRINGS) {
+            SpliteratorOfIntCharDataBuilder cdb = new SpliteratorOfIntCharDataBuilder(data, string);
+            cdb.add("\"%s\"", s -> s);
+            cdb.add("new CharSequenceImpl(\"%s\")", CharSequenceImpl::new);
+            cdb.add("new StringBuilder(\"%s\")", StringBuilder::new);
+            cdb.add("new StringBuffer(\"%s\")", StringBuffer::new);
+        }
+
         return spliteratorOfIntDataProvider = data.toArray(new Object[0][]);
     }