# HG changeset patch # User psandoz # Date 1422293209 0 # Node ID 2245cc40bf5dccee3d968dfee84766d2aab416e9 # Parent 49cdfa0ea390cb0dbc33bb4f5a0785f182893afb 8071477: Better Spliterator implementations for String.chars() and String.codePoints() Reviewed-by: sherman diff -r 49cdfa0ea390 -r 2245cc40bf5d jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java --- a/jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java Mon Jan 26 17:06:00 2015 +0000 +++ b/jdk/src/java.base/share/classes/java/lang/AbstractStringBuilder.java Mon Jan 26 17:26:49 2015 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,9 @@ import sun.misc.FloatingDecimal; import java.util.Arrays; +import java.util.Spliterator; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; /** * A mutable sequence of characters. @@ -292,7 +295,7 @@ if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) { throw new IndexOutOfBoundsException(); } - return Character.codePointCountImpl(value, beginIndex, endIndex-beginIndex); + return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex); } /** @@ -1432,6 +1435,34 @@ public abstract String toString(); /** + * {@inheritDoc} + * @since 1.9 + */ + @Override + public IntStream chars() { + // Reuse String-based spliterator. This requires a supplier to + // capture the value and count when the terminal operation is executed + return StreamSupport.intStream( + () -> new String.IntCharArraySpliterator(value, 0, count, 0), + Spliterator.ORDERED | Spliterator.SIZED | Spliterator.SUBSIZED, + false); + } + + /** + * {@inheritDoc} + * @since 1.9 + */ + @Override + public IntStream codePoints() { + // Reuse String-based spliterator. This requires a supplier to + // capture the value and count when the terminal operation is executed + return StreamSupport.intStream( + () -> new String.CodePointsSpliterator(value, 0, count, 0), + Spliterator.ORDERED, + false); + } + + /** * Needed by {@code String} for the contentEquals method. */ final char[] getValue() { diff -r 49cdfa0ea390 -r 2245cc40bf5d jdk/src/java.base/share/classes/java/lang/String.java --- a/jdk/src/java.base/share/classes/java/lang/String.java Mon Jan 26 17:06:00 2015 +0000 +++ b/jdk/src/java.base/share/classes/java/lang/String.java Mon Jan 26 17:26:49 2015 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,10 +34,14 @@ import java.util.Formatter; import java.util.Locale; import java.util.Objects; +import java.util.Spliterator; import java.util.StringJoiner; +import java.util.function.IntConsumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; /** * The {@code String} class represents character strings. All @@ -2894,6 +2898,180 @@ return this; } + static class IntCharArraySpliterator implements Spliterator.OfInt { + private final char[] array; + private int index; // current index, modified on advance/split + private final int fence; // one past last index + private final int cs; + + IntCharArraySpliterator(char[] array, int acs) { + this(array, 0, array.length, acs); + } + + IntCharArraySpliterator(char[] array, int origin, int fence, int acs) { + this.array = array; + this.index = origin; + this.fence = fence; + this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED + | Spliterator.SUBSIZED; + } + + @Override + public OfInt trySplit() { + int lo = index, mid = (lo + fence) >>> 1; + return (lo >= mid) + ? null + : new IntCharArraySpliterator(array, lo, index = mid, cs); + } + + @Override + public void forEachRemaining(IntConsumer action) { + char[] a; int i, hi; // hoist accesses and checks from loop + if (action == null) + throw new NullPointerException(); + if ((a = array).length >= (hi = fence) && + (i = index) >= 0 && i < (index = hi)) { + do { action.accept(a[i]); } while (++i < hi); + } + } + + @Override + public boolean tryAdvance(IntConsumer action) { + if (action == null) + throw new NullPointerException(); + if (index >= 0 && index < fence) { + action.accept(array[index++]); + return true; + } + return false; + } + + @Override + public long estimateSize() { return (long)(fence - index); } + + @Override + public int characteristics() { + return cs; + } + } + + /** + * Returns a stream of {@code int} zero-extending the {@code char} values + * from this sequence. Any char which maps to a surrogate code + * point is passed through uninterpreted. + * + * @return an IntStream of char values from this sequence + * @since 1.9 + */ + @Override + public IntStream chars() { + return StreamSupport.intStream( + new IntCharArraySpliterator(value, Spliterator.IMMUTABLE), false); + } + + static class CodePointsSpliterator implements Spliterator.OfInt { + private final char[] array; + private int index; // current index, modified on advance/split + private final int fence; // one past last index + private final int cs; + + CodePointsSpliterator(char[] array, int acs) { + this(array, 0, array.length, acs); + } + + CodePointsSpliterator(char[] array, int origin, int fence, int acs) { + this.array = array; + this.index = origin; + this.fence = fence; + this.cs = acs | Spliterator.ORDERED; + } + + @Override + public OfInt trySplit() { + int lo = index, mid = (lo + fence) >>> 1; + if (lo >= mid) + return null; + + int midOneLess; + // If the mid-point intersects a surrogate pair + if (Character.isLowSurrogate(array[mid]) && + Character.isHighSurrogate(array[midOneLess = (mid -1)])) { + // If there is only one pair it cannot be split + if (lo >= midOneLess) + return null; + // Shift the mid-point to align with the surrogate pair + return new CodePointsSpliterator(array, lo, index = midOneLess, cs); + } + return new CodePointsSpliterator(array, lo, index = mid, cs); + } + + @Override + public void forEachRemaining(IntConsumer action) { + char[] a; int i, hi; // hoist accesses and checks from loop + if (action == null) + throw new NullPointerException(); + if ((a = array).length >= (hi = fence) && + (i = index) >= 0 && i < (index = hi)) { + do { + i = advance(a, i, hi, action); + } while (i < hi); + } + } + + @Override + public boolean tryAdvance(IntConsumer action) { + if (action == null) + throw new NullPointerException(); + if (index >= 0 && index < fence) { + index = advance(array, index, fence, action); + return true; + } + return false; + } + + // Advance one code point from the index, i, and return the next + // index to advance from + private static int advance(char[] a, int i, int hi, IntConsumer action) { + char c1 = a[i++]; + int cp = c1; + if (Character.isHighSurrogate(c1) && i < hi) { + char c2 = a[i]; + if (Character.isLowSurrogate(c2)) { + i++; + cp = Character.toCodePoint(c1, c2); + } + } + action.accept(cp); + return i; + } + + @Override + public long estimateSize() { return (long)(fence - index); } + + @Override + public int characteristics() { + return cs; + } + } + + /** + * Returns a stream of code point values from this sequence. Any surrogate + * pairs encountered in the sequence are combined as if by {@linkplain + * Character#toCodePoint Character.toCodePoint} and the result is passed + * to the stream. Any other code units, including ordinary BMP characters, + * unpaired surrogates, and undefined code units, are zero-extended to + * {@code int} values which are then passed to the stream. + * + * @return an IntStream of Unicode code points from this sequence + * @since 1.9 + */ + @Override + public IntStream codePoints() { + return StreamSupport.intStream( + new CodePointsSpliterator(value, Spliterator.IMMUTABLE), false); + } + /** * Converts this string to a new character array. * diff -r 49cdfa0ea390 -r 2245cc40bf5d jdk/test/TEST.groups --- a/jdk/test/TEST.groups Mon Jan 26 17:06:00 2015 +0000 +++ b/jdk/test/TEST.groups Mon Jan 26 17:26:49 2015 +0000 @@ -628,7 +628,6 @@ sun/net/www/protocol/http \ java/io/BufferedReader/Lines.java \ java/lang/reflect/DefaultStaticTest/DefaultStaticInvokeTest.java \ - java/lang/CharSequence/DefaultTest.java \ java/lang/IntegralPrimitiveToString.java \ java/lang/PrimitiveSumMinMaxTest.java \ java/lang/String/StringJoinTest.java \ diff -r 49cdfa0ea390 -r 2245cc40bf5d jdk/test/java/lang/CharSequence/DefaultTest.java --- a/jdk/test/java/lang/CharSequence/DefaultTest.java Mon Jan 26 17:06:00 2015 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -import java.util.Arrays; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.PrimitiveIterator; -import java.util.Spliterator; -import java.util.stream.Collectors; - -import org.testng.annotations.Test; - -import static org.testng.Assert.*; - -/* - * @test - * @summary Unit test for CharSequence default methods - * @bug 8012665 8025002 - * @run testng DefaultTest - */ - -@Test(groups = "lib") -public class DefaultTest { - - @Test(expectedExceptions = NoSuchElementException.class) - public void testEmptyChars() { - PrimitiveIterator.OfInt s = "".chars().iterator(); - assertFalse(s.hasNext()); - int ch = s.nextInt(); - } - - public void testSimpleChars() { - List list = "abc".chars().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList((int) 'a', (int) 'b', (int) 'c')); - } - - public void testCodePointsCharacteristics() { - Spliterator.OfInt s = "".codePoints().spliterator(); - assertFalse(s.hasCharacteristics(Spliterator.SIZED | Spliterator.SUBSIZED)); - assertTrue(s.hasCharacteristics(Spliterator.ORDERED)); - } - - @Test(expectedExceptions = NoSuchElementException.class) - public void testEmptyCodePoints() { - PrimitiveIterator.OfInt s = "".codePoints().iterator(); - assertFalse(s.hasNext()); - int cp = s.nextInt(); - } - - public void testSimpleCodePoints() { - List list = "abc".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList((int)'a', (int)'b', (int)'c')); - } - - public void testUndefCodePoints() { - List list = "X\ufffeY".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList((int)'X', 0xFFFE, (int)'Y')); - } - - public void testSurrogatePairing() { - // U+1D11E = MUSICAL SYMBOL G CLEF - // equivalent to surrogate pair U+D834 U+DD1E - List list; - final int GCLEF = 0x1d11e; - - list = "\ud834\udd1e".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList(GCLEF)); - list = "A\ud834\udd1e".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList((int)'A', GCLEF)); - list = "\ud834\udd1eB".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList(GCLEF, (int)'B')); - list = "X\ud834\udd1eY".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList((int)'X', GCLEF, (int)'Y')); - } - - public void testUndefUnpaired() { - List list = "W\udd1eX\ud834Y\ufffeZ".codePoints().boxed().collect(Collectors.toList()); - assertEquals(list, Arrays.asList( - (int)'W', 0xdd1e, (int)'X', 0xd834, (int)'Y', 0xfffe, (int)'Z')); - } -} diff -r 49cdfa0ea390 -r 2245cc40bf5d jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java --- a/jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java Mon Jan 26 17:06:00 2015 +0000 +++ b/jdk/test/java/util/Spliterator/SpliteratorCharacteristics.java Mon Jan 26 17:26:49 2015 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,7 +23,7 @@ /** * @test - * @bug 8020156 8020009 8022326 8012913 8024405 8024408 + * @bug 8020156 8020009 8022326 8012913 8024405 8024408 8071477 * @run testng SpliteratorCharacteristics */ @@ -59,6 +59,57 @@ @Test public class SpliteratorCharacteristics { + public void testSpliteratorFromCharSequence() { + class CharSequenceImpl implements CharSequence { + final String s; + + public CharSequenceImpl(String s) { + this.s = s; + } + + @Override + public int length() { + return s.length(); + } + + @Override + public char charAt(int index) { + return s.charAt(index); + } + + @Override + public CharSequence subSequence(int start, int end) { + return s.subSequence(start, end); + } + + @Override + public String toString() { + return s; + } + } + + CharSequence cs = "A"; + Spliterator.OfInt s = cs.chars().spliterator(); + assertCharacteristics(s, Spliterator.IMMUTABLE | Spliterator.ORDERED | + Spliterator.SIZED | Spliterator.SUBSIZED); + assertHasNotCharacteristics(s, Spliterator.CONCURRENT); + s = cs.codePoints().spliterator(); + assertCharacteristics(s, Spliterator.IMMUTABLE | Spliterator.ORDERED); + assertHasNotCharacteristics(s, Spliterator.CONCURRENT); + + for (CharSequence c : Arrays.asList(new CharSequenceImpl("A"), + new StringBuilder("A"), + new StringBuffer("A"))) { + s = cs.chars().spliterator(); + assertCharacteristics(s, Spliterator.ORDERED | + Spliterator.SIZED | Spliterator.SUBSIZED); + assertHasNotCharacteristics(s, Spliterator.CONCURRENT); + s = cs.codePoints().spliterator(); + assertCharacteristics(s, Spliterator.ORDERED); + assertHasNotCharacteristics(s, Spliterator.CONCURRENT); + } + } + public void testSpliteratorFromCollection() { List l = Arrays.asList(1, 2, 3, 4); diff -r 49cdfa0ea390 -r 2245cc40bf5d jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java --- a/jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java Mon Jan 26 17:06:00 2015 +0000 +++ b/jdk/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java Mon Jan 26 17:26:49 2015 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @summary Spliterator traversing and splitting tests * @run testng SpliteratorTraversingAndSplittingTest - * @bug 8020016 + * @bug 8020016 8071477 */ import org.testng.annotations.DataProvider; @@ -85,7 +85,38 @@ @Test public class SpliteratorTraversingAndSplittingTest { - private static List SIZES = Arrays.asList(0, 1, 10, 100, 1000); + private static final List SIZES = Arrays.asList(0, 1, 10, 100, 1000); + + private static final String LOW = new String(new char[] {Character.MIN_LOW_SURROGATE}); + private static final String HIGH = new String(new char[] {Character.MIN_HIGH_SURROGATE}); + private static final String HIGH_LOW = HIGH + LOW; + private static final String CHAR_HIGH_LOW = "A" + HIGH_LOW; + private static final String HIGH_LOW_CHAR = HIGH_LOW + "A"; + private static final String CHAR_HIGH_LOW_CHAR = "A" + HIGH_LOW + "A"; + + private static final List STRINGS = generateTestStrings(); + + private static List generateTestStrings() { + List strings = new ArrayList<>(); + for (int n : Arrays.asList(1, 2, 3, 16, 17)) { + strings.add(generate("A", n)); + strings.add(generate(LOW, n)); + strings.add(generate(HIGH, n)); + strings.add(generate(HIGH_LOW, n)); + strings.add(generate(CHAR_HIGH_LOW, n)); + strings.add(generate(HIGH_LOW_CHAR, n)); + strings.add(generate(CHAR_HIGH_LOW_CHAR, n)); + } + return strings; + } + + private static String generate(String s, int n) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n; i++) { + sb.append(s); + } + return sb.toString(); + } private static class SpliteratorDataBuilder { List data; @@ -564,6 +595,60 @@ } } + private static class SpliteratorOfIntCharDataBuilder { + List data; + + String s; + + List expChars; + + List expCodePoints; + + SpliteratorOfIntCharDataBuilder(List data, String s) { + this.data = data; + this.s = s; + this.expChars = transform(s, false); + this.expCodePoints = transform(s, true); + } + + static List transform(String s, boolean toCodePoints) { + List l = new ArrayList<>(); + + if (!toCodePoints) { + for (int i = 0; i < s.length(); i++) { + l.add((int) s.charAt(i)); + } + } + else { + for (int i = 0; i < s.length();) { + char c1 = s.charAt(i++); + int cp = c1; + if (Character.isHighSurrogate(c1) && i < s.length()) { + char c2 = s.charAt(i); + if (Character.isLowSurrogate(c2)) { + i++; + cp = Character.toCodePoint(c1, c2); + } + } + l.add(cp); + } + } + return l; + } + + void add(String description, Function f) { + description = description.replace("%s", s); + { + Supplier supplier = () -> f.apply(s).chars().spliterator(); + data.add(new Object[]{description + ".chars().spliterator()", expChars, supplier}); + } + { + Supplier supplier = () -> f.apply(s).codePoints().spliterator(); + data.add(new Object[]{description + ".codePoints().spliterator()", expCodePoints, supplier}); + } + } + } + static Object[][] spliteratorOfIntDataProvider; @DataProvider(name = "Spliterator.OfInt") @@ -615,6 +700,43 @@ () -> new IntSpliteratorFromArray(exp)); } + // Class for testing default methods + class CharSequenceImpl implements CharSequence { + final String s; + + public CharSequenceImpl(String s) { + this.s = s; + } + + @Override + public int length() { + return s.length(); + } + + @Override + public char charAt(int index) { + return s.charAt(index); + } + + @Override + public CharSequence subSequence(int start, int end) { + return s.subSequence(start, end); + } + + @Override + public String toString() { + return s; + } + } + + for (String string : STRINGS) { + SpliteratorOfIntCharDataBuilder cdb = new SpliteratorOfIntCharDataBuilder(data, string); + cdb.add("\"%s\"", s -> s); + cdb.add("new CharSequenceImpl(\"%s\")", CharSequenceImpl::new); + cdb.add("new StringBuilder(\"%s\")", StringBuilder::new); + cdb.add("new StringBuffer(\"%s\")", StringBuffer::new); + } + return spliteratorOfIntDataProvider = data.toArray(new Object[0][]); }