8223775: String::stripIndent (Preview)
authorjlaskey
Thu, 06 Jun 2019 12:24:44 -0300
changeset 55260 cc0f117f4405
parent 55259 61fff1345ee6
child 55261 dff30b1557ee
8223775: String::stripIndent (Preview) Reviewed-by: abuckley, vromero, jlahoda, bchristi, rriggs, smarks
src/java.base/share/classes/java/lang/String.java
src/java.base/share/classes/java/lang/StringLatin1.java
src/java.base/share/classes/java/lang/StringUTF16.java
test/jdk/java/lang/String/StripIndent.java
--- a/src/java.base/share/classes/java/lang/String.java	Thu Jun 06 08:11:25 2019 -0700
+++ b/src/java.base/share/classes/java/lang/String.java	Thu Jun 06 12:24:44 2019 -0300
@@ -36,6 +36,7 @@
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Formatter;
+import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
@@ -2797,11 +2798,6 @@
         return indexOfNonWhitespace() == length();
     }
 
-    private Stream<String> lines(int maxLeading, int maxTrailing) {
-        return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
-                          : StringUTF16.lines(value, maxLeading, maxTrailing);
-    }
-
     /**
      * Returns a stream of lines extracted from this string,
      * separated by line terminators.
@@ -2833,7 +2829,7 @@
      * @since 11
      */
     public Stream<String> lines() {
-        return lines(0, 0);
+        return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
     }
 
     /**
@@ -2873,12 +2869,10 @@
      * @since 12
      */
     public String indent(int n) {
-        return isEmpty() ? "" :  indent(n, false);
-    }
-
-    private String indent(int n, boolean removeBlanks) {
-        Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
-                                             : lines();
+        if (isEmpty()) {
+            return "";
+        }
+        Stream<String> stream = lines();
         if (n > 0) {
             final String spaces = " ".repeat(n);
             stream = stream.map(s -> spaces + s);
@@ -2901,6 +2895,123 @@
     }
 
     /**
+     * Returns a string whose value is this string, with incidental
+     * {@linkplain Character#isWhitespace(int) white space} removed from
+     * the beginning and end of every line.
+     * <p>
+     * Incidental {@linkplain Character#isWhitespace(int) white space}
+     * is often present in a text block to align the content with the opening
+     * delimiter. For example, in the following code, dots represent incidental
+     * {@linkplain Character#isWhitespace(int) white space}:
+     * <blockquote><pre>
+     * String html = """
+     * ..............&lt;html&gt;
+     * ..............    &lt;body&gt;
+     * ..............        &lt;p&gt;Hello, world&lt;/p&gt;
+     * ..............    &lt;/body&gt;
+     * ..............&lt;/html&gt;
+     * ..............""";
+     * </pre></blockquote>
+     * This method treats the incidental
+     * {@linkplain Character#isWhitespace(int) white space} as indentation to be
+     * stripped, producing a string that preserves the relative indentation of
+     * the content. Using | to visualize the start of each line of the string:
+     * <blockquote><pre>
+     * |&lt;html&gt;
+     * |    &lt;body&gt;
+     * |        &lt;p&gt;Hello, world&lt;/p&gt;
+     * |    &lt;/body&gt;
+     * |&lt;/html&gt;
+     * </pre></blockquote>
+     * First, the individual lines of this string are extracted as if by using
+     * {@link String#lines()}.
+     * <p>
+     * Then, the <i>minimum indentation</i> (min) is determined as follows.
+     * For each non-blank line (as defined by {@link String#isBlank()}), the
+     * leading {@linkplain Character#isWhitespace(int) white space} characters are
+     * counted. The leading {@linkplain Character#isWhitespace(int) white space}
+     * characters on the last line are also counted even if
+     * {@linkplain String#isBlank() blank}. The <i>min</i> value is the smallest
+     * of these counts.
+     * <p>
+     * For each {@linkplain String#isBlank() non-blank} line, <i>min</i> leading
+     * {@linkplain Character#isWhitespace(int) white space} characters are removed,
+     * and any trailing {@linkplain Character#isWhitespace(int) white space}
+     * characters are removed. {@linkplain String#isBlank() Blank} lines are
+     * replaced with the empty string.
+     *
+     * <p>
+     * Finally, the lines are joined into a new string, using the LF character
+     * {@code "\n"} (U+000A) to separate lines.
+     *
+     * @apiNote
+     * This method's primary purpose is to shift a block of lines as far as
+     * possible to the left, while preserving relative indentation. Lines
+     * that were indented the least will thus have no leading
+     * {@linkplain Character#isWhitespace(int) white space}.
+     * The line count of the result will be the same as line count of this
+     * string.
+     * If this string ends with a line terminator then the result will end
+     * with a line terminator.
+     *
+     * @implNote
+     * This method treats all {@linkplain Character#isWhitespace(int) white space}
+     * characters as having equal width. As long as the indentation on every
+     * line is consistently composed of the same character sequences, then the
+     * result will be as described above.
+     *
+     * @return string with incidental indentation removed and line
+     *         terminators normalized
+     *
+     * @see String#lines()
+     * @see String#isBlank()
+     * @see String#indent(int)
+     * @see Character#isWhitespace(int)
+     *
+     * @since 13
+     *
+     * @deprecated  This method is associated with text blocks, a preview language feature.
+     *              Text blocks and/or this method may be changed or removed in a future release.
+     */
+    @Deprecated(forRemoval=true, since="13")
+    public String stripIndent() {
+        int length = length();
+        if (length == 0) {
+            return "";
+        }
+        char lastChar = charAt(length - 1);
+        boolean optOut = lastChar == '\n' || lastChar == '\r';
+        List<String> lines = lines().collect(Collectors.toList());
+        final int outdent = optOut ? 0 : outdent(lines);
+        return lines.stream()
+            .map(line -> {
+                int firstNonWhitespace = line.indexOfNonWhitespace();
+                int lastNonWhitespace = line.lastIndexOfNonWhitespace();
+                int incidentalWhitespace = Math.min(outdent, firstNonWhitespace);
+                return firstNonWhitespace > lastNonWhitespace
+                    ? "" : line.substring(incidentalWhitespace, lastNonWhitespace);
+            })
+            .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
+    }
+
+    private static int outdent(List<String> lines) {
+        // Note: outdent is guaranteed to be zero or positive number.
+        // If there isn't a non-blank line then the last must be blank
+        int outdent = Integer.MAX_VALUE;
+        for (String line : lines) {
+            int leadingWhitespace = line.indexOfNonWhitespace();
+            if (leadingWhitespace != line.length()) {
+                outdent = Integer.min(outdent, leadingWhitespace);
+            }
+        }
+        String lastLine = lines.get(lines.size() - 1);
+        if (lastLine.isBlank()) {
+            outdent = Integer.min(outdent, lastLine.length());
+        }
+        return outdent;
+    }
+
+    /**
      * This method allows the application of a function to {@code this}
      * string. The function should expect a single String argument
      * and produce an {@code R} result.
--- a/src/java.base/share/classes/java/lang/StringLatin1.java	Thu Jun 06 08:11:25 2019 -0700
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java	Thu Jun 06 12:24:44 2019 -0300
@@ -732,76 +732,10 @@
         static LinesSpliterator spliterator(byte[] value) {
             return new LinesSpliterator(value, 0, value.length);
         }
-
-        static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
-            int length = value.length;
-            int left = 0;
-            int index;
-            for (int l = 0; l < leading; l++) {
-                index = skipBlankForward(value, left, length);
-                if (index == left) {
-                    break;
-                }
-                left = index;
-            }
-            int right = length;
-            for (int t = 0; t < trailing; t++) {
-                index = skipBlankBackward(value, left, right);
-                if (index == right) {
-                    break;
-                }
-                right = index;
-            }
-            return new LinesSpliterator(value, left, right - left);
-        }
-
-        private static int skipBlankForward(byte[] value, int start, int length) {
-            int index = start;
-            while (index < length) {
-                char ch = getChar(value, index++);
-                if (ch == '\n') {
-                    return index;
-                }
-                if (ch == '\r') {
-                    if (index < length && getChar(value, index) == '\n') {
-                        return index + 1;
-                    }
-                    return index;
-                }
-                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
-                    return start;
-                }
-            }
-            return length;
-        }
-
-        private static int skipBlankBackward(byte[] value, int start, int fence) {
-            int index = fence;
-            if (start < index && getChar(value, index - 1) == '\n') {
-                index--;
-            }
-            if (start < index && getChar(value, index - 1) == '\r') {
-                index--;
-            }
-            while (start < index) {
-                char ch = getChar(value, --index);
-                if (ch == '\r' || ch == '\n') {
-                    return index + 1;
-                }
-                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
-                    return fence;
-                }
-            }
-            return start;
-        }
     }
 
-    static Stream<String> lines(byte[] value, int leading, int trailing) {
-        if (leading == 0 && trailing == 0) {
-            return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
-        } else {
-            return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
-        }
+    static Stream<String> lines(byte[] value) {
+        return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
     }
 
     public static void putChar(byte[] val, int index, int c) {
--- a/src/java.base/share/classes/java/lang/StringUTF16.java	Thu Jun 06 08:11:25 2019 -0700
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java	Thu Jun 06 12:24:44 2019 -0300
@@ -1119,76 +1119,10 @@
         static LinesSpliterator spliterator(byte[] value) {
             return new LinesSpliterator(value, 0, value.length >>> 1);
         }
-
-        static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
-            int length = value.length >>> 1;
-            int left = 0;
-            int index;
-            for (int l = 0; l < leading; l++) {
-                index = skipBlankForward(value, left, length);
-                if (index == left) {
-                    break;
-                }
-                left = index;
-            }
-            int right = length;
-            for (int t = 0; t < trailing; t++) {
-                index = skipBlankBackward(value, left, right);
-                if (index == right) {
-                    break;
-                }
-                right = index;
-            }
-            return new LinesSpliterator(value, left, right - left);
-        }
-
-        private static int skipBlankForward(byte[] value, int start, int length) {
-            int index = start;
-            while (index < length) {
-                char ch = getChar(value, index++);
-                if (ch == '\n') {
-                    return index;
-                }
-                if (ch == '\r') {
-                    if (index < length && getChar(value, index) == '\n') {
-                        return index + 1;
-                    }
-                    return index;
-                }
-                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
-                    return start;
-                }
-            }
-            return length;
-        }
-
-        private static int skipBlankBackward(byte[] value, int start, int fence) {
-            int index = fence;
-            if (start < index && getChar(value, index - 1) == '\n') {
-                index--;
-            }
-            if (start < index && getChar(value, index - 1) == '\r') {
-                index--;
-            }
-            while (start < index) {
-                char ch = getChar(value, --index);
-                if (ch == '\r' || ch == '\n') {
-                    return index + 1;
-                }
-                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
-                    return fence;
-                }
-            }
-            return start;
-        }
     }
 
-    static Stream<String> lines(byte[] value, int leading, int trailing) {
-        if (leading == 0 && trailing == 0) {
-            return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
-        } else {
-            return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
-        }
+    static Stream<String> lines(byte[] value) {
+        return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
     }
 
     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/String/StripIndent.java	Thu Jun 06 12:24:44 2019 -0300
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8223775
+ * @summary This exercises String#stripIndent patterns and limits.
+ * @run main StripIndent
+ */
+
+public class StripIndent {
+    public static void main(String... arg) {
+        test1();
+    }
+
+    /*
+     * Case combinations.
+     */
+    static void test1() {
+        verify("", "");
+        verify("abc", "abc");
+        verify("   abc", "abc");
+        verify("abc   ", "abc");
+        verify("   abc\n   def\n   ", "abc\ndef\n");
+        verify("   abc\n   def\n", "   abc\n   def\n");
+        verify("   abc\n   def", "abc\ndef");
+        verify("   abc\n      def\n   ", "abc\n   def\n");
+    }
+
+    static void verify(String a, String b) {
+        if (!a.stripIndent().equals(b)) {
+            System.err.format("\"%s\" not equal \"%s\"%n", a, b);
+            throw new RuntimeException();
+        }
+    }
+}
+
+