8200377: String::strip, String::stripLeading, String::stripTrailing
authorjlaskey
Mon, 14 May 2018 09:40:48 -0300
changeset 50098 92560438d306
parent 50097 ed8a43d83fcc
child 50099 b3e97e932e05
8200377: String::strip, String::stripLeading, String::stripTrailing Reviewed-by: sundar, rriggs
src/java.base/share/classes/java/lang/String.java
src/java.base/share/classes/java/lang/StringLatin1.java
src/java.base/share/classes/java/lang/StringUTF16.java
test/jdk/java/lang/String/Strip.java
--- a/src/java.base/share/classes/java/lang/String.java	Mon May 14 11:47:03 2018 +0200
+++ b/src/java.base/share/classes/java/lang/String.java	Mon May 14 09:40:48 2018 -0300
@@ -2602,7 +2602,7 @@
      * Returns a string whose value is this string, with all leading
      * and trailing space removed, where space is defined
      * as any character whose codepoint is less than or equal to
-     * {@code '\u005Cu0020'} (the space character).
+     * {@code 'U+0020'} (the space character).
      * <p>
      * If this {@code String} object represents an empty character
      * sequence, or the first and last characters of character sequence
@@ -2637,6 +2637,98 @@
     }
 
     /**
+     * Returns a string whose value is this string, with all leading
+     * and trailing {@link Character#isWhitespace(int) white space}
+     * removed.
+     * <p>
+     * If this {@code String} object represents an empty string,
+     * or if all code points in this string are
+     * {@link Character#isWhitespace(int) white space}, then an empty string
+     * is returned.
+     * <p>
+     * Otherwise, returns a substring of this string beginning with the first
+     * code point that is not a {@link Character#isWhitespace(int) white space}
+     * up to and including the last code point that is not a
+     * {@link Character#isWhitespace(int) white space}.
+     * <p>
+     * This method may be used to strip
+     * {@link Character#isWhitespace(int) white space} from
+     * the beginning and end of a string.
+     *
+     * @return  a string whose value is this string, with all leading
+     *          and trailing white space removed
+     *
+     * @see Character#isWhitespace(int)
+     *
+     * @since 11
+     */
+    public String strip() {
+        String ret = isLatin1() ? StringLatin1.strip(value)
+                                : StringUTF16.strip(value);
+        return ret == null ? this : ret;
+    }
+
+    /**
+     * Returns a string whose value is this string, with all leading
+     * {@link Character#isWhitespace(int) white space} removed.
+     * <p>
+     * If this {@code String} object represents an empty string,
+     * or if all code points in this string are
+     * {@link Character#isWhitespace(int) white space}, then an empty string
+     * is returned.
+     * <p>
+     * Otherwise, returns a substring of this string beginning with the first
+     * code point that is not a {@link Character#isWhitespace(int) white space}
+     * up to to and including the last code point of this string.
+     * <p>
+     * This method may be used to trim
+     * {@link Character#isWhitespace(int) white space} from
+     * the beginning of a string.
+     *
+     * @return  a string whose value is this string, with all leading white
+     *          space removed
+     *
+     * @see Character#isWhitespace(int)
+     *
+     * @since 11
+     */
+    public String stripLeading() {
+        String ret = isLatin1() ? StringLatin1.stripLeading(value)
+                                : StringUTF16.stripLeading(value);
+        return ret == null ? this : ret;
+    }
+
+    /**
+     * Returns a string whose value is this string, with all trailing
+     * {@link Character#isWhitespace(int) white space} removed.
+     * <p>
+     * If this {@code String} object represents an empty string,
+     * or if all characters in this string are
+     * {@link Character#isWhitespace(int) white space}, then an empty string
+     * is returned.
+     * <p>
+     * Otherwise, returns a substring of this string beginning with the first
+     * code point of this string up to and including the last code point
+     * that is not a {@link Character#isWhitespace(int) white space}.
+     * <p>
+     * This method may be used to trim
+     * {@link Character#isWhitespace(int) white space} from
+     * the end of a string.
+     *
+     * @return  a string whose value is this string, with all trailing white
+     *          space removed
+     *
+     * @see Character#isWhitespace(int)
+     *
+     * @since 11
+     */
+    public String stripTrailing() {
+        String ret = isLatin1() ? StringLatin1.stripTrailing(value)
+                                : StringUTF16.stripTrailing(value);
+        return ret == null ? this : ret;
+    }
+
+    /**
      * This object (which is already a string!) is itself returned.
      *
      * @return  the string itself.
--- a/src/java.base/share/classes/java/lang/StringLatin1.java	Mon May 14 11:47:03 2018 +0200
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java	Mon May 14 09:40:48 2018 -0300
@@ -538,6 +538,57 @@
             newString(value, st, len - st) : null;
     }
 
+    public static int indexOfNonWhitespace(byte[] value) {
+        int length = value.length;
+        int left = 0;
+        while (left < length) {
+            char ch = (char)(value[left] & 0xff);
+            if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                break;
+            }
+            left++;
+        }
+        return left;
+    }
+
+    public static int lastIndexOfNonWhitespace(byte[] value) {
+        int length = value.length;
+        int right = length;
+        while (0 < right) {
+            char ch = (char)(value[right - 1] & 0xff);
+            if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                break;
+            }
+            right--;
+        }
+        return right;
+    }
+
+    public static String strip(byte[] value) {
+        int left = indexOfNonWhitespace(value);
+        if (left == value.length) {
+            return "";
+        }
+        int right = lastIndexOfNonWhitespace(value);
+        return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null;
+    }
+
+    public static String stripLeading(byte[] value) {
+        int left = indexOfNonWhitespace(value);
+        if (left == value.length) {
+            return "";
+        }
+        return (left != 0) ? newString(value, left, value.length - left) : null;
+    }
+
+    public static String stripTrailing(byte[] value) {
+        int right = lastIndexOfNonWhitespace(value);
+        if (right == 0) {
+            return "";
+        }
+        return (right != value.length) ? newString(value, 0, right) : null;
+    }
+
     public static void putChar(byte[] val, int index, int c) {
         //assert (canEncode(c));
         val[index] = (byte)(c);
--- a/src/java.base/share/classes/java/lang/StringUTF16.java	Mon May 14 11:47:03 2018 +0200
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java	Mon May 14 09:40:48 2018 -0300
@@ -856,6 +856,61 @@
             null;
     }
 
+
+    public static int indexOfNonWhitespace(byte[] value) {
+        int length = value.length >> 1;
+        int left = 0;
+        while (left < length) {
+            int codepoint = codePointAt(value, left, length);
+            if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) {
+                break;
+            }
+            left += Character.charCount(codepoint);
+        }
+        return left;
+    }
+
+    public static int lastIndexOfNonWhitespace(byte[] value) {
+        int length = value.length >> 1;
+        int right = length;
+        while (0 < right) {
+            int codepoint = codePointBefore(value, right);
+            if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) {
+                break;
+            }
+            right -= Character.charCount(codepoint);
+        }
+        return right;
+    }
+
+    public static String strip(byte[] value) {
+        int length = value.length >> 1;
+        int left = indexOfNonWhitespace(value);
+        if (left == length) {
+            return "";
+        }
+        int right = lastIndexOfNonWhitespace(value);
+        return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null;
+    }
+
+    public static String stripLeading(byte[] value) {
+        int length = value.length >> 1;
+        int left = indexOfNonWhitespace(value);
+        if (left == length) {
+            return "";
+        }
+        return (left != 0) ? newString(value, left, length - left) : null;
+    }
+
+    public static String stripTrailing(byte[] value) {
+        int length = value.length >> 1;
+        int right = lastIndexOfNonWhitespace(value);
+        if (right == 0) {
+            return "";
+        }
+        return (right != length) ? newString(value, 0, right) : null;
+    }
+
     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
         while (off < end) {
             putChar(val, index++, str[off++]);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/String/Strip.java	Mon May 14 09:40:48 2018 -0300
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * @test
+ * @summary Basic strip, stripLeading, stripTrailing functionality
+ * @bug 8200377
+ * @run main/othervm Strip
+ */
+
+public class Strip {
+   public static void main(String... arg) {
+        testStrip();
+        testWhitespace();
+    }
+
+    /*
+     * Test basic stripping routines
+     */
+    static void testStrip() {
+        equal("   abc   ".strip(), "abc");
+        equal("   abc   ".stripLeading(), "abc   ");
+        equal("   abc   ".stripTrailing(), "   abc");
+        equal("   abc\u2022   ".strip(), "abc\u2022");
+        equal("   abc\u2022   ".stripLeading(), "abc\u2022   ");
+        equal("   abc\u2022   ".stripTrailing(), "   abc\u2022");
+        equal("".strip(), "");
+        equal("".stripLeading(), "");
+        equal("".stripTrailing(), "");
+        equal("\b".strip(), "\b");
+        equal("\b".stripLeading(), "\b");
+        equal("\b".stripTrailing(), "\b");
+    }
+
+    /*
+     * Test full whitespace range
+     */
+    static void testWhitespace() {
+        StringBuilder sb = new StringBuilder(64);
+        IntStream.range(1, 0xFFFF).filter(c -> Character.isWhitespace(c))
+                .forEach(c -> sb.append((char)c));
+        String whiteSpace = sb.toString();
+
+        String testString = whiteSpace + "abc" + whiteSpace;
+        equal(testString.strip(), "abc");
+        equal(testString.stripLeading(), "abc"  + whiteSpace);
+        equal(testString.stripTrailing(), whiteSpace + "abc");
+    }
+
+    /*
+     * Report difference in result.
+     */
+    static void report(String message, String inputTag, String input,
+                       String outputTag, String output) {
+        System.err.println(message);
+        System.err.println();
+        System.err.println(inputTag);
+        System.err.println(input.codePoints()
+                .mapToObj(c -> (Integer)c)
+                .collect(Collectors.toList()));
+        System.err.println();
+        System.err.println(outputTag);
+        System.err.println(output.codePoints()
+                .mapToObj(c -> (Integer)c)
+                .collect(Collectors.toList()));
+        throw new RuntimeException();
+    }
+
+    /*
+     * Raise an exception if the two inputs are not equivalent.
+     */
+    static void equal(String input, String expected) {
+        if (input == null || expected == null || !expected.equals(input)) {
+            report("Failed equal", "Input:", input, "Expected:", expected);
+        }
+    }
+}