8071477: Better Spliterator implementations for String.chars() and String.codePoints()
Reviewed-by: sherman
--- a/jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,9 @@
import sun.misc.FloatingDecimal;
import java.util.Arrays;
+import java.util.Spliterator;
+import java.util.stream.IntStream;
+import java.util.stream.StreamSupport;
/**
* A mutable sequence of characters.
@@ -292,7 +295,7 @@
if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) {
throw new IndexOutOfBoundsException();
}
- return Character.codePointCountImpl(value, beginIndex, endIndex-beginIndex);
+ return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
}
/**
@@ -1432,6 +1435,34 @@
public abstract String toString();
/**
+ * {@inheritDoc}
+ * @since 1.9
+ */
+ @Override
+ public IntStream chars() {
+ // Reuse String-based spliterator. This requires a supplier to
+ // capture the value and count when the terminal operation is executed
+ return StreamSupport.intStream(
+ () -> new String.IntCharArraySpliterator(value, 0, count, 0),
+ Spliterator.ORDERED | Spliterator.SIZED | Spliterator.SUBSIZED,
+ false);
+ }
+
+ /**
+ * {@inheritDoc}
+ * @since 1.9
+ */
+ @Override
+ public IntStream codePoints() {
+ // Reuse String-based spliterator. This requires a supplier to
+ // capture the value and count when the terminal operation is executed
+ return StreamSupport.intStream(
+ () -> new String.CodePointsSpliterator(value, 0, count, 0),
+ Spliterator.ORDERED,
+ false);
+ }
+
+ /**
* Needed by {@code String} for the contentEquals method.
*/
final char[] getValue() {
--- a/jdk/src/java.base/share/classes/java/lang/String.java Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/src/java.base/share/classes/java/lang/String.java Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1994, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,10 +34,14 @@
import java.util.Formatter;
import java.util.Locale;
import java.util.Objects;
+import java.util.Spliterator;
import java.util.StringJoiner;
+import java.util.function.IntConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
+import java.util.stream.IntStream;
+import java.util.stream.StreamSupport;
/**
* The {@code String} class represents character strings. All
@@ -2894,6 +2898,180 @@
return this;
}
+ static class IntCharArraySpliterator implements Spliterator.OfInt {
+ private final char[] array;
+ private int index; // current index, modified on advance/split
+ private final int fence; // one past last index
+ private final int cs;
+
+ IntCharArraySpliterator(char[] array, int acs) {
+ this(array, 0, array.length, acs);
+ }
+
+ IntCharArraySpliterator(char[] array, int origin, int fence, int acs) {
+ this.array = array;
+ this.index = origin;
+ this.fence = fence;
+ this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
+ | Spliterator.SUBSIZED;
+ }
+
+ @Override
+ public OfInt trySplit() {
+ int lo = index, mid = (lo + fence) >>> 1;
+ return (lo >= mid)
+ ? null
+ : new IntCharArraySpliterator(array, lo, index = mid, cs);
+ }
+
+ @Override
+ public void forEachRemaining(IntConsumer action) {
+ char[] a; int i, hi; // hoist accesses and checks from loop
+ if (action == null)
+ throw new NullPointerException();
+ if ((a = array).length >= (hi = fence) &&
+ (i = index) >= 0 && i < (index = hi)) {
+ do { action.accept(a[i]); } while (++i < hi);
+ }
+ }
+
+ @Override
+ public boolean tryAdvance(IntConsumer action) {
+ if (action == null)
+ throw new NullPointerException();
+ if (index >= 0 && index < fence) {
+ action.accept(array[index++]);
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public long estimateSize() { return (long)(fence - index); }
+
+ @Override
+ public int characteristics() {
+ return cs;
+ }
+ }
+
+ /**
+ * Returns a stream of {@code int} zero-extending the {@code char} values
+ * from this sequence. Any char which maps to a <a
+ * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
+ * point</a> is passed through uninterpreted.
+ *
+ * @return an IntStream of char values from this sequence
+ * @since 1.9
+ */
+ @Override
+ public IntStream chars() {
+ return StreamSupport.intStream(
+ new IntCharArraySpliterator(value, Spliterator.IMMUTABLE), false);
+ }
+
+ static class CodePointsSpliterator implements Spliterator.OfInt {
+ private final char[] array;
+ private int index; // current index, modified on advance/split
+ private final int fence; // one past last index
+ private final int cs;
+
+ CodePointsSpliterator(char[] array, int acs) {
+ this(array, 0, array.length, acs);
+ }
+
+ CodePointsSpliterator(char[] array, int origin, int fence, int acs) {
+ this.array = array;
+ this.index = origin;
+ this.fence = fence;
+ this.cs = acs | Spliterator.ORDERED;
+ }
+
+ @Override
+ public OfInt trySplit() {
+ int lo = index, mid = (lo + fence) >>> 1;
+ if (lo >= mid)
+ return null;
+
+ int midOneLess;
+ // If the mid-point intersects a surrogate pair
+ if (Character.isLowSurrogate(array[mid]) &&
+ Character.isHighSurrogate(array[midOneLess = (mid -1)])) {
+ // If there is only one pair it cannot be split
+ if (lo >= midOneLess)
+ return null;
+ // Shift the mid-point to align with the surrogate pair
+ return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
+ }
+ return new CodePointsSpliterator(array, lo, index = mid, cs);
+ }
+
+ @Override
+ public void forEachRemaining(IntConsumer action) {
+ char[] a; int i, hi; // hoist accesses and checks from loop
+ if (action == null)
+ throw new NullPointerException();
+ if ((a = array).length >= (hi = fence) &&
+ (i = index) >= 0 && i < (index = hi)) {
+ do {
+ i = advance(a, i, hi, action);
+ } while (i < hi);
+ }
+ }
+
+ @Override
+ public boolean tryAdvance(IntConsumer action) {
+ if (action == null)
+ throw new NullPointerException();
+ if (index >= 0 && index < fence) {
+ index = advance(array, index, fence, action);
+ return true;
+ }
+ return false;
+ }
+
+ // Advance one code point from the index, i, and return the next
+ // index to advance from
+ private static int advance(char[] a, int i, int hi, IntConsumer action) {
+ char c1 = a[i++];
+ int cp = c1;
+ if (Character.isHighSurrogate(c1) && i < hi) {
+ char c2 = a[i];
+ if (Character.isLowSurrogate(c2)) {
+ i++;
+ cp = Character.toCodePoint(c1, c2);
+ }
+ }
+ action.accept(cp);
+ return i;
+ }
+
+ @Override
+ public long estimateSize() { return (long)(fence - index); }
+
+ @Override
+ public int characteristics() {
+ return cs;
+ }
+ }
+
+ /**
+ * Returns a stream of code point values from this sequence. Any surrogate
+ * pairs encountered in the sequence are combined as if by {@linkplain
+ * Character#toCodePoint Character.toCodePoint} and the result is passed
+ * to the stream. Any other code units, including ordinary BMP characters,
+ * unpaired surrogates, and undefined code units, are zero-extended to
+ * {@code int} values which are then passed to the stream.
+ *
+ * @return an IntStream of Unicode code points from this sequence
+ * @since 1.9
+ */
+ @Override
+ public IntStream codePoints() {
+ return StreamSupport.intStream(
+ new CodePointsSpliterator(value, Spliterator.IMMUTABLE), false);
+ }
+
/**
* Converts this string to a new character array.
*
--- a/jdk/test/TEST.groups Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/test/TEST.groups Mon Jan 26 17:26:49 2015 +0000
@@ -628,7 +628,6 @@
sun/net/www/protocol/http \
java/io/BufferedReader/Lines.java \
java/lang/reflect/DefaultStaticTest/DefaultStaticInvokeTest.java \
- java/lang/CharSequence/DefaultTest.java \
java/lang/IntegralPrimitiveToString.java \
java/lang/PrimitiveSumMinMaxTest.java \
java/lang/String/StringJoinTest.java \
--- a/jdk/test/java/lang/CharSequence/DefaultTest.java Mon Jan 26 17:06:00 2015 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.PrimitiveIterator;
-import java.util.Spliterator;
-import java.util.stream.Collectors;
-
-import org.testng.annotations.Test;
-
-import static org.testng.Assert.*;
-
-/*
- * @test
- * @summary Unit test for CharSequence default methods
- * @bug 8012665 8025002
- * @run testng DefaultTest
- */
-
-@Test(groups = "lib")
-public class DefaultTest {
-
- @Test(expectedExceptions = NoSuchElementException.class)
- public void testEmptyChars() {
- PrimitiveIterator.OfInt s = "".chars().iterator();
- assertFalse(s.hasNext());
- int ch = s.nextInt();
- }
-
- public void testSimpleChars() {
- List<Integer> list = "abc".chars().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList((int) 'a', (int) 'b', (int) 'c'));
- }
-
- public void testCodePointsCharacteristics() {
- Spliterator.OfInt s = "".codePoints().spliterator();
- assertFalse(s.hasCharacteristics(Spliterator.SIZED | Spliterator.SUBSIZED));
- assertTrue(s.hasCharacteristics(Spliterator.ORDERED));
- }
-
- @Test(expectedExceptions = NoSuchElementException.class)
- public void testEmptyCodePoints() {
- PrimitiveIterator.OfInt s = "".codePoints().iterator();
- assertFalse(s.hasNext());
- int cp = s.nextInt();
- }
-
- public void testSimpleCodePoints() {
- List<Integer> list = "abc".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList((int)'a', (int)'b', (int)'c'));
- }
-
- public void testUndefCodePoints() {
- List<Integer> list = "X\ufffeY".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList((int)'X', 0xFFFE, (int)'Y'));
- }
-
- public void testSurrogatePairing() {
- // U+1D11E = MUSICAL SYMBOL G CLEF
- // equivalent to surrogate pair U+D834 U+DD1E
- List<Integer> list;
- final int GCLEF = 0x1d11e;
-
- list = "\ud834\udd1e".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList(GCLEF));
- list = "A\ud834\udd1e".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList((int)'A', GCLEF));
- list = "\ud834\udd1eB".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList(GCLEF, (int)'B'));
- list = "X\ud834\udd1eY".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList((int)'X', GCLEF, (int)'Y'));
- }
-
- public void testUndefUnpaired() {
- List<Integer> list = "W\udd1eX\ud834Y\ufffeZ".codePoints().boxed().collect(Collectors.toList());
- assertEquals(list, Arrays.asList(
- (int)'W', 0xdd1e, (int)'X', 0xd834, (int)'Y', 0xfffe, (int)'Z'));
- }
-}
--- a/jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
/**
* @test
- * @bug 8020156 8020009 8022326 8012913 8024405 8024408
+ * @bug 8020156 8020009 8022326 8012913 8024405 8024408 8071477
* @run testng SpliteratorCharacteristics
*/
@@ -59,6 +59,57 @@
@Test
public class SpliteratorCharacteristics {
+ public void testSpliteratorFromCharSequence() {
+ class CharSequenceImpl implements CharSequence {
+ final String s;
+
+ public CharSequenceImpl(String s) {
+ this.s = s;
+ }
+
+ @Override
+ public int length() {
+ return s.length();
+ }
+
+ @Override
+ public char charAt(int index) {
+ return s.charAt(index);
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ return s.subSequence(start, end);
+ }
+
+ @Override
+ public String toString() {
+ return s;
+ }
+ }
+
+ CharSequence cs = "A";
+ Spliterator.OfInt s = cs.chars().spliterator();
+ assertCharacteristics(s, Spliterator.IMMUTABLE | Spliterator.ORDERED |
+ Spliterator.SIZED | Spliterator.SUBSIZED);
+ assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+ s = cs.codePoints().spliterator();
+ assertCharacteristics(s, Spliterator.IMMUTABLE | Spliterator.ORDERED);
+ assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+
+ for (CharSequence c : Arrays.asList(new CharSequenceImpl("A"),
+ new StringBuilder("A"),
+ new StringBuffer("A"))) {
+ s = cs.chars().spliterator();
+ assertCharacteristics(s, Spliterator.ORDERED |
+ Spliterator.SIZED | Spliterator.SUBSIZED);
+ assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+ s = cs.codePoints().spliterator();
+ assertCharacteristics(s, Spliterator.ORDERED);
+ assertHasNotCharacteristics(s, Spliterator.CONCURRENT);
+ }
+ }
+
public void testSpliteratorFromCollection() {
List<Integer> l = Arrays.asList(1, 2, 3, 4);
--- a/jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java Mon Jan 26 17:06:00 2015 +0000
+++ b/jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java Mon Jan 26 17:26:49 2015 +0000
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
* @test
* @summary Spliterator traversing and splitting tests
* @run testng SpliteratorTraversingAndSplittingTest
- * @bug 8020016
+ * @bug 8020016 8071477
*/
import org.testng.annotations.DataProvider;
@@ -85,7 +85,38 @@
@Test
public class SpliteratorTraversingAndSplittingTest {
- private static List<Integer> SIZES = Arrays.asList(0, 1, 10, 100, 1000);
+ private static final List<Integer> SIZES = Arrays.asList(0, 1, 10, 100, 1000);
+
+ private static final String LOW = new String(new char[] {Character.MIN_LOW_SURROGATE});
+ private static final String HIGH = new String(new char[] {Character.MIN_HIGH_SURROGATE});
+ private static final String HIGH_LOW = HIGH + LOW;
+ private static final String CHAR_HIGH_LOW = "A" + HIGH_LOW;
+ private static final String HIGH_LOW_CHAR = HIGH_LOW + "A";
+ private static final String CHAR_HIGH_LOW_CHAR = "A" + HIGH_LOW + "A";
+
+ private static final List<String> STRINGS = generateTestStrings();
+
+ private static List<String> generateTestStrings() {
+ List<String> strings = new ArrayList<>();
+ for (int n : Arrays.asList(1, 2, 3, 16, 17)) {
+ strings.add(generate("A", n));
+ strings.add(generate(LOW, n));
+ strings.add(generate(HIGH, n));
+ strings.add(generate(HIGH_LOW, n));
+ strings.add(generate(CHAR_HIGH_LOW, n));
+ strings.add(generate(HIGH_LOW_CHAR, n));
+ strings.add(generate(CHAR_HIGH_LOW_CHAR, n));
+ }
+ return strings;
+ }
+
+ private static String generate(String s, int n) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < n; i++) {
+ sb.append(s);
+ }
+ return sb.toString();
+ }
private static class SpliteratorDataBuilder<T> {
List<Object[]> data;
@@ -564,6 +595,60 @@
}
}
+ private static class SpliteratorOfIntCharDataBuilder {
+ List<Object[]> data;
+
+ String s;
+
+ List<Integer> expChars;
+
+ List<Integer> expCodePoints;
+
+ SpliteratorOfIntCharDataBuilder(List<Object[]> data, String s) {
+ this.data = data;
+ this.s = s;
+ this.expChars = transform(s, false);
+ this.expCodePoints = transform(s, true);
+ }
+
+ static List<Integer> transform(String s, boolean toCodePoints) {
+ List<Integer> l = new ArrayList<>();
+
+ if (!toCodePoints) {
+ for (int i = 0; i < s.length(); i++) {
+ l.add((int) s.charAt(i));
+ }
+ }
+ else {
+ for (int i = 0; i < s.length();) {
+ char c1 = s.charAt(i++);
+ int cp = c1;
+ if (Character.isHighSurrogate(c1) && i < s.length()) {
+ char c2 = s.charAt(i);
+ if (Character.isLowSurrogate(c2)) {
+ i++;
+ cp = Character.toCodePoint(c1, c2);
+ }
+ }
+ l.add(cp);
+ }
+ }
+ return l;
+ }
+
+ void add(String description, Function<String, CharSequence> f) {
+ description = description.replace("%s", s);
+ {
+ Supplier<Spliterator.OfInt> supplier = () -> f.apply(s).chars().spliterator();
+ data.add(new Object[]{description + ".chars().spliterator()", expChars, supplier});
+ }
+ {
+ Supplier<Spliterator.OfInt> supplier = () -> f.apply(s).codePoints().spliterator();
+ data.add(new Object[]{description + ".codePoints().spliterator()", expCodePoints, supplier});
+ }
+ }
+ }
+
static Object[][] spliteratorOfIntDataProvider;
@DataProvider(name = "Spliterator.OfInt")
@@ -615,6 +700,43 @@
() -> new IntSpliteratorFromArray(exp));
}
+ // Class for testing default methods
+ class CharSequenceImpl implements CharSequence {
+ final String s;
+
+ public CharSequenceImpl(String s) {
+ this.s = s;
+ }
+
+ @Override
+ public int length() {
+ return s.length();
+ }
+
+ @Override
+ public char charAt(int index) {
+ return s.charAt(index);
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ return s.subSequence(start, end);
+ }
+
+ @Override
+ public String toString() {
+ return s;
+ }
+ }
+
+ for (String string : STRINGS) {
+ SpliteratorOfIntCharDataBuilder cdb = new SpliteratorOfIntCharDataBuilder(data, string);
+ cdb.add("\"%s\"", s -> s);
+ cdb.add("new CharSequenceImpl(\"%s\")", CharSequenceImpl::new);
+ cdb.add("new StringBuilder(\"%s\")", StringBuilder::new);
+ cdb.add("new StringBuffer(\"%s\")", StringBuffer::new);
+ }
+
return spliteratorOfIntDataProvider = data.toArray(new Object[0][]);
}