# HG changeset patch # User jlaskey # Date 1526301648 10800 # Node ID 92560438d306e2a3084f1dce917d0f9af5796aa9 # Parent ed8a43d83fcce09e7f9820bcab17a732c27d5ea4 8200377: String::strip, String::stripLeading, String::stripTrailing Reviewed-by: sundar, rriggs diff -r ed8a43d83fcc -r 92560438d306 src/java.base/share/classes/java/lang/String.java --- a/src/java.base/share/classes/java/lang/String.java Mon May 14 11:47:03 2018 +0200 +++ b/src/java.base/share/classes/java/lang/String.java Mon May 14 09:40:48 2018 -0300 @@ -2602,7 +2602,7 @@ * Returns a string whose value is this string, with all leading * and trailing space removed, where space is defined * as any character whose codepoint is less than or equal to - * {@code '\u005Cu0020'} (the space character). + * {@code 'U+0020'} (the space character). *

* If this {@code String} object represents an empty character * sequence, or the first and last characters of character sequence @@ -2637,6 +2637,98 @@ } /** + * Returns a string whose value is this string, with all leading + * and trailing {@link Character#isWhitespace(int) white space} + * removed. + *

+ * If this {@code String} object represents an empty string, + * or if all code points in this string are + * {@link Character#isWhitespace(int) white space}, then an empty string + * is returned. + *

+ * Otherwise, returns a substring of this string beginning with the first + * code point that is not a {@link Character#isWhitespace(int) white space} + * up to and including the last code point that is not a + * {@link Character#isWhitespace(int) white space}. + *

+ * This method may be used to strip + * {@link Character#isWhitespace(int) white space} from + * the beginning and end of a string. + * + * @return a string whose value is this string, with all leading + * and trailing white space removed + * + * @see Character#isWhitespace(int) + * + * @since 11 + */ + public String strip() { + String ret = isLatin1() ? StringLatin1.strip(value) + : StringUTF16.strip(value); + return ret == null ? this : ret; + } + + /** + * Returns a string whose value is this string, with all leading + * {@link Character#isWhitespace(int) white space} removed. + *

+ * If this {@code String} object represents an empty string, + * or if all code points in this string are + * {@link Character#isWhitespace(int) white space}, then an empty string + * is returned. + *

+ * Otherwise, returns a substring of this string beginning with the first + * code point that is not a {@link Character#isWhitespace(int) white space} + * up to to and including the last code point of this string. + *

+ * This method may be used to trim + * {@link Character#isWhitespace(int) white space} from + * the beginning of a string. + * + * @return a string whose value is this string, with all leading white + * space removed + * + * @see Character#isWhitespace(int) + * + * @since 11 + */ + public String stripLeading() { + String ret = isLatin1() ? StringLatin1.stripLeading(value) + : StringUTF16.stripLeading(value); + return ret == null ? this : ret; + } + + /** + * Returns a string whose value is this string, with all trailing + * {@link Character#isWhitespace(int) white space} removed. + *

+ * If this {@code String} object represents an empty string, + * or if all characters in this string are + * {@link Character#isWhitespace(int) white space}, then an empty string + * is returned. + *

+ * Otherwise, returns a substring of this string beginning with the first + * code point of this string up to and including the last code point + * that is not a {@link Character#isWhitespace(int) white space}. + *

+ * This method may be used to trim + * {@link Character#isWhitespace(int) white space} from + * the end of a string. + * + * @return a string whose value is this string, with all trailing white + * space removed + * + * @see Character#isWhitespace(int) + * + * @since 11 + */ + public String stripTrailing() { + String ret = isLatin1() ? StringLatin1.stripTrailing(value) + : StringUTF16.stripTrailing(value); + return ret == null ? this : ret; + } + + /** * This object (which is already a string!) is itself returned. * * @return the string itself. diff -r ed8a43d83fcc -r 92560438d306 src/java.base/share/classes/java/lang/StringLatin1.java --- a/src/java.base/share/classes/java/lang/StringLatin1.java Mon May 14 11:47:03 2018 +0200 +++ b/src/java.base/share/classes/java/lang/StringLatin1.java Mon May 14 09:40:48 2018 -0300 @@ -538,6 +538,57 @@ newString(value, st, len - st) : null; } + public static int indexOfNonWhitespace(byte[] value) { + int length = value.length; + int left = 0; + while (left < length) { + char ch = (char)(value[left] & 0xff); + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + break; + } + left++; + } + return left; + } + + public static int lastIndexOfNonWhitespace(byte[] value) { + int length = value.length; + int right = length; + while (0 < right) { + char ch = (char)(value[right - 1] & 0xff); + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + break; + } + right--; + } + return right; + } + + public static String strip(byte[] value) { + int left = indexOfNonWhitespace(value); + if (left == value.length) { + return ""; + } + int right = lastIndexOfNonWhitespace(value); + return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null; + } + + public static String stripLeading(byte[] value) { + int left = indexOfNonWhitespace(value); + if (left == value.length) { + return ""; + } + return (left != 0) ? newString(value, left, value.length - left) : null; + } + + public static String stripTrailing(byte[] value) { + int right = lastIndexOfNonWhitespace(value); + if (right == 0) { + return ""; + } + return (right != value.length) ? newString(value, 0, right) : null; + } + public static void putChar(byte[] val, int index, int c) { //assert (canEncode(c)); val[index] = (byte)(c); diff -r ed8a43d83fcc -r 92560438d306 src/java.base/share/classes/java/lang/StringUTF16.java --- a/src/java.base/share/classes/java/lang/StringUTF16.java Mon May 14 11:47:03 2018 +0200 +++ b/src/java.base/share/classes/java/lang/StringUTF16.java Mon May 14 09:40:48 2018 -0300 @@ -856,6 +856,61 @@ null; } + + public static int indexOfNonWhitespace(byte[] value) { + int length = value.length >> 1; + int left = 0; + while (left < length) { + int codepoint = codePointAt(value, left, length); + if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) { + break; + } + left += Character.charCount(codepoint); + } + return left; + } + + public static int lastIndexOfNonWhitespace(byte[] value) { + int length = value.length >> 1; + int right = length; + while (0 < right) { + int codepoint = codePointBefore(value, right); + if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) { + break; + } + right -= Character.charCount(codepoint); + } + return right; + } + + public static String strip(byte[] value) { + int length = value.length >> 1; + int left = indexOfNonWhitespace(value); + if (left == length) { + return ""; + } + int right = lastIndexOfNonWhitespace(value); + return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null; + } + + public static String stripLeading(byte[] value) { + int length = value.length >> 1; + int left = indexOfNonWhitespace(value); + if (left == length) { + return ""; + } + return (left != 0) ? newString(value, left, length - left) : null; + } + + public static String stripTrailing(byte[] value) { + int length = value.length >> 1; + int right = lastIndexOfNonWhitespace(value); + if (right == 0) { + return ""; + } + return (right != length) ? newString(value, 0, right) : null; + } + private static void putChars(byte[] val, int index, char[] str, int off, int end) { while (off < end) { putChar(val, index++, str[off++]); diff -r ed8a43d83fcc -r 92560438d306 test/jdk/java/lang/String/Strip.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/jdk/java/lang/String/Strip.java Mon May 14 09:40:48 2018 -0300 @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * @test + * @summary Basic strip, stripLeading, stripTrailing functionality + * @bug 8200377 + * @run main/othervm Strip + */ + +public class Strip { + public static void main(String... arg) { + testStrip(); + testWhitespace(); + } + + /* + * Test basic stripping routines + */ + static void testStrip() { + equal(" abc ".strip(), "abc"); + equal(" abc ".stripLeading(), "abc "); + equal(" abc ".stripTrailing(), " abc"); + equal(" abc\u2022 ".strip(), "abc\u2022"); + equal(" abc\u2022 ".stripLeading(), "abc\u2022 "); + equal(" abc\u2022 ".stripTrailing(), " abc\u2022"); + equal("".strip(), ""); + equal("".stripLeading(), ""); + equal("".stripTrailing(), ""); + equal("\b".strip(), "\b"); + equal("\b".stripLeading(), "\b"); + equal("\b".stripTrailing(), "\b"); + } + + /* + * Test full whitespace range + */ + static void testWhitespace() { + StringBuilder sb = new StringBuilder(64); + IntStream.range(1, 0xFFFF).filter(c -> Character.isWhitespace(c)) + .forEach(c -> sb.append((char)c)); + String whiteSpace = sb.toString(); + + String testString = whiteSpace + "abc" + whiteSpace; + equal(testString.strip(), "abc"); + equal(testString.stripLeading(), "abc" + whiteSpace); + equal(testString.stripTrailing(), whiteSpace + "abc"); + } + + /* + * Report difference in result. + */ + static void report(String message, String inputTag, String input, + String outputTag, String output) { + System.err.println(message); + System.err.println(); + System.err.println(inputTag); + System.err.println(input.codePoints() + .mapToObj(c -> (Integer)c) + .collect(Collectors.toList())); + System.err.println(); + System.err.println(outputTag); + System.err.println(output.codePoints() + .mapToObj(c -> (Integer)c) + .collect(Collectors.toList())); + throw new RuntimeException(); + } + + /* + * Raise an exception if the two inputs are not equivalent. + */ + static void equal(String input, String expected) { + if (input == null || expected == null || !expected.equals(input)) { + report("Failed equal", "Input:", input, "Expected:", expected); + } + } +}