8200434: String::align, String::indent
authorjlaskey
Wed, 12 Sep 2018 14:19:36 -0300
changeset 51714 975d3636a2f9
parent 51713 d424675a9743
child 51715 13a63d4a3f8d
8200434: String::align, String::indent Reviewed-by: abuckley, smarks, sherman, rriggs, jrose, sundar, igerasim, briangoetz, darcy, jjg
src/java.base/share/classes/java/lang/String.java
src/java.base/share/classes/java/lang/StringLatin1.java
src/java.base/share/classes/java/lang/StringUTF16.java
test/jdk/java/lang/String/AlignIndent.java
--- a/src/java.base/share/classes/java/lang/String.java	Wed Sep 12 14:19:36 2018 -0300
+++ b/src/java.base/share/classes/java/lang/String.java	Wed Sep 12 14:19:36 2018 -0300
@@ -40,12 +40,15 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 import jdk.internal.HotSpotIntrinsicCandidate;
 import jdk.internal.vm.annotation.Stable;
 
+import static java.util.function.Predicate.not;
+
 /**
  * The {@code String} class represents character strings. All
  * string literals in Java programs, such as {@code "abc"}, are
@@ -2755,12 +2758,9 @@
         return indexOfNonWhitespace() == length();
     }
 
-    private int indexOfNonWhitespace() {
-        if (isLatin1()) {
-            return StringLatin1.indexOfNonWhitespace(value);
-        } else {
-            return StringUTF16.indexOfNonWhitespace(value);
-        }
+    private Stream<String> lines(int maxLeading, int maxTrailing) {
+        return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
+                          : StringUTF16.lines(value, maxLeading, maxTrailing);
     }
 
     /**
@@ -2794,8 +2794,181 @@
      * @since 11
      */
     public Stream<String> lines() {
-        return isLatin1() ? StringLatin1.lines(value)
-                          : StringUTF16.lines(value);
+        return lines(0, 0);
+    }
+
+    /**
+     * Adjusts the indentation of each line of this string based on the value of
+     * {@code n}, and normalizes line termination characters.
+     * <p>
+     * This string is conceptually separated into lines using
+     * {@link String#lines()}. Each line is then adjusted as described below
+     * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
+     * lines are then concatenated and returned.
+     * <p>
+     * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
+     * beginning of each line. {@link String#isBlank() Blank lines} are
+     * unaffected.
+     * <p>
+     * If {@code n < 0} then up to {@code n}
+     * {@link Character#isWhitespace(int) white space characters} are removed
+     * from the beginning of each line. If a given line does not contain
+     * sufficient white space then all leading
+     * {@link Character#isWhitespace(int) white space characters} are removed.
+     * Each white space character is treated as a single character. In
+     * particular, the tab character {@code "\t"} (U+0009) is considered a
+     * single character; it is not expanded.
+     * <p>
+     * If {@code n == 0} then the line remains unchanged. However, line
+     * terminators are still normalized.
+     * <p>
+     *
+     * @param n  number of leading
+     *           {@link Character#isWhitespace(int) white space characters}
+     *           to add or remove
+     *
+     * @return string with indentation adjusted and line endings normalized
+     *
+     * @see String#lines()
+     * @see String#isBlank()
+     * @see Character#isWhitespace(int)
+     *
+     * @since 12
+     */
+    public String indent(int n) {
+        return isEmpty() ? "" :  indent(n, false);
+    }
+
+    private String indent(int n, boolean removeBlanks) {
+        Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
+                                             : lines();
+        if (n > 0) {
+            final String spaces = " ".repeat(n);
+            stream = stream.map(s -> s.isBlank() ? s : spaces + s);
+        } else if (n == Integer.MIN_VALUE) {
+            stream = stream.map(s -> s.stripLeading());
+        } else if (n < 0) {
+            stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
+        }
+        return stream.collect(Collectors.joining("\n", "", "\n"));
+    }
+
+    private int indexOfNonWhitespace() {
+        return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
+                          : StringUTF16.indexOfNonWhitespace(value);
+    }
+
+    private int lastIndexOfNonWhitespace() {
+        return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
+                          : StringUTF16.lastIndexOfNonWhitespace(value);
+    }
+
+    /**
+     * Removes vertical and horizontal white space margins from around the
+     * essential body of a multi-line string, while preserving relative
+     * indentation.
+     * <p>
+     * This string is first conceptually separated into lines as if by
+     * {@link String#lines()}.
+     * <p>
+     * Then, the <i>minimum indentation</i> (min) is determined as follows. For
+     * each non-blank line (as defined by {@link String#isBlank()}), the
+     * leading {@link Character#isWhitespace(int) white space} characters are
+     * counted. The <i>min</i> value is the smallest of these counts.
+     * <p>
+     * For each non-blank line, <i>min</i> leading white space characters are
+     * removed. Each white space character is treated as a single character. In
+     * particular, the tab character {@code "\t"} (U+0009) is considered a
+     * single character; it is not expanded.
+     * <p>
+     * Leading and trailing blank lines, if any, are removed. Trailing spaces are
+     * preserved.
+     * <p>
+     * Each line is suffixed with a line feed character {@code "\n"} (U+000A).
+     * <p>
+     * Finally, the lines are concatenated into a single string and returned.
+     *
+     * @apiNote
+     * This method's primary purpose is to shift a block of lines as far as
+     * possible to the left, while preserving relative indentation. Lines
+     * that were indented the least will thus have no leading white space.
+     *
+     * Example:
+     * <blockquote><pre>
+     * `
+     *      This is the first line
+     *          This is the second line
+     * `.align();
+     *
+     * returns
+     * This is the first line
+     *     This is the second line
+     * </pre></blockquote>
+     *
+     * @return string with margins removed and line terminators normalized
+     *
+     * @see String#lines()
+     * @see String#isBlank()
+     * @see String#indent(int)
+     * @see Character#isWhitespace(int)
+     *
+     * @since 12
+     */
+    public String align() {
+        return align(0);
+    }
+
+    /**
+     * Removes vertical and horizontal white space margins from around the
+     * essential body of a multi-line string, while preserving relative
+     * indentation and with optional indentation adjustment.
+     * <p>
+     * Invoking this method is equivalent to:
+     * <blockquote>
+     *  {@code this.align().indent(n)}
+     * </blockquote>
+     *
+     * @apiNote
+     * Examples:
+     * <blockquote><pre>
+     * `
+     *      This is the first line
+     *          This is the second line
+     * `.align(0);
+     *
+     * returns
+     * This is the first line
+     *     This is the second line
+     *
+     *
+     * `
+     *    This is the first line
+     *       This is the second line
+     * `.align(4);
+     * returns
+     *     This is the first line
+     *         This is the second line
+     * </pre></blockquote>
+     *
+     * @param n  number of leading white space characters
+     *           to add or remove
+     *
+     * @return string with margins removed, indentation adjusted and
+     *         line terminators normalized
+     *
+     * @see String#align()
+     *
+     * @since 12
+     */
+    public String align(int n) {
+        if (isEmpty()) {
+            return "";
+        }
+        int outdent = lines().filter(not(String::isBlank))
+                             .mapToInt(String::indexOfNonWhitespace)
+                             .min()
+                             .orElse(0);
+        return indent(n - outdent, true);
     }
 
     /**
--- a/src/java.base/share/classes/java/lang/StringLatin1.java	Wed Sep 12 14:19:36 2018 -0300
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java	Wed Sep 12 14:19:36 2018 -0300
@@ -545,7 +545,7 @@
         int length = value.length;
         int left = 0;
         while (left < length) {
-            char ch = (char)(value[left] & 0xff);
+            char ch = getChar(value, left);
             if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
                 break;
             }
@@ -558,7 +558,7 @@
         int length = value.length;
         int right = length;
         while (0 < right) {
-            char ch = (char)(value[right - 1] & 0xff);
+            char ch = getChar(value, right - 1);
             if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
                 break;
             }
@@ -573,7 +573,8 @@
             return "";
         }
         int right = lastIndexOfNonWhitespace(value);
-        return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null;
+        boolean ifChanged = (left > 0) || (right < value.length);
+        return ifChanged ? newString(value, left, right - left) : null;
     }
 
     public static String stripLeading(byte[] value) {
@@ -597,11 +598,7 @@
         private int index;        // current index, modified on advance/split
         private final int fence;  // one past last index
 
-        LinesSpliterator(byte[] value) {
-            this(value, 0, value.length);
-        }
-
-        LinesSpliterator(byte[] value, int start, int length) {
+        private LinesSpliterator(byte[] value, int start, int length) {
             this.value = value;
             this.index = start;
             this.fence = start + length;
@@ -609,7 +606,7 @@
 
         private int indexOfLineSeparator(int start) {
             for (int current = start; current < fence; current++) {
-                byte ch = value[current];
+                char ch = getChar(value, current);
                 if (ch == '\n' || ch == '\r') {
                     return current;
                 }
@@ -619,9 +616,9 @@
 
         private int skipLineSeparator(int start) {
             if (start < fence) {
-                if (value[start] == '\r') {
+                if (getChar(value, start) == '\r') {
                     int next = start + 1;
-                    if (next < fence && value[next] == '\n') {
+                    if (next < fence && getChar(value, next) == '\n') {
                         return next + 1;
                     }
                 }
@@ -680,10 +677,80 @@
         public int characteristics() {
             return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
         }
+
+        static LinesSpliterator spliterator(byte[] value) {
+            return new LinesSpliterator(value, 0, value.length);
+        }
+
+        static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
+            int length = value.length;
+            int left = 0;
+            int index;
+            for (int l = 0; l < leading; l++) {
+                index = skipBlankForward(value, left, length);
+                if (index == left) {
+                    break;
+                }
+                left = index;
+            }
+            int right = length;
+            for (int t = 0; t < trailing; t++) {
+                index = skipBlankBackward(value, left, right);
+                if (index == right) {
+                    break;
+                }
+                right = index;
+            }
+            return new LinesSpliterator(value, left, right - left);
+        }
+
+        private static int skipBlankForward(byte[] value, int start, int length) {
+            int index = start;
+            while (index < length) {
+                char ch = getChar(value, index++);
+                if (ch == '\n') {
+                    return index;
+                }
+                if (ch == '\r') {
+                    if (index < length && getChar(value, index) == '\n') {
+                        return index + 1;
+                    }
+                    return index;
+                }
+                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                    return start;
+                }
+            }
+            return length;
+        }
+
+        private static int skipBlankBackward(byte[] value, int start, int fence) {
+            int index = fence;
+            if (start < index && getChar(value, index - 1) == '\n') {
+                index--;
+            }
+            if (start < index && getChar(value, index - 1) == '\r') {
+                index--;
+            }
+            while (start < index) {
+                char ch = getChar(value, --index);
+                if (ch == '\r' || ch == '\n') {
+                    return index + 1;
+                }
+                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                    return fence;
+                }
+            }
+            return start;
+        }
     }
 
-    static Stream<String> lines(byte[] value) {
-        return StreamSupport.stream(new LinesSpliterator(value), false);
+    static Stream<String> lines(byte[] value, int leading, int trailing) {
+        if (leading == 0 && trailing == 0) {
+            return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
+        } else {
+            return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
+        }
     }
 
     public static void putChar(byte[] val, int index, int c) {
--- a/src/java.base/share/classes/java/lang/StringUTF16.java	Wed Sep 12 14:19:36 2018 -0300
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java	Wed Sep 12 14:19:36 2018 -0300
@@ -859,7 +859,6 @@
             null;
     }
 
-
     public static int indexOfNonWhitespace(byte[] value) {
         int length = value.length >> 1;
         int left = 0;
@@ -874,7 +873,7 @@
     }
 
     public static int lastIndexOfNonWhitespace(byte[] value) {
-        int length = value.length >> 1;
+        int length = value.length >>> 1;
         int right = length;
         while (0 < right) {
             int codepoint = codePointBefore(value, right);
@@ -887,17 +886,18 @@
     }
 
     public static String strip(byte[] value) {
-        int length = value.length >> 1;
+        int length = value.length >>> 1;
         int left = indexOfNonWhitespace(value);
         if (left == length) {
             return "";
         }
         int right = lastIndexOfNonWhitespace(value);
-        return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null;
+        boolean ifChanged = (left > 0) || (right < length);
+        return ifChanged ? newString(value, left, right - left) : null;
     }
 
     public static String stripLeading(byte[] value) {
-        int length = value.length >> 1;
+        int length = value.length >>> 1;
         int left = indexOfNonWhitespace(value);
         if (left == length) {
             return "";
@@ -906,7 +906,7 @@
     }
 
     public static String stripTrailing(byte[] value) {
-        int length = value.length >> 1;
+        int length = value.length >>> 1;
         int right = lastIndexOfNonWhitespace(value);
         if (right == 0) {
             return "";
@@ -919,11 +919,7 @@
         private int index;        // current index, modified on advance/split
         private final int fence;  // one past last index
 
-        LinesSpliterator(byte[] value) {
-            this(value, 0, value.length >>> 1);
-        }
-
-        LinesSpliterator(byte[] value, int start, int length) {
+        private LinesSpliterator(byte[] value, int start, int length) {
             this.value = value;
             this.index = start;
             this.fence = start + length;
@@ -1002,10 +998,80 @@
         public int characteristics() {
             return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
         }
+
+        static LinesSpliterator spliterator(byte[] value) {
+            return new LinesSpliterator(value, 0, value.length >>> 1);
+        }
+
+        static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
+            int length = value.length >>> 1;
+            int left = 0;
+            int index;
+            for (int l = 0; l < leading; l++) {
+                index = skipBlankForward(value, left, length);
+                if (index == left) {
+                    break;
+                }
+                left = index;
+            }
+            int right = length;
+            for (int t = 0; t < trailing; t++) {
+                index = skipBlankBackward(value, left, right);
+                if (index == right) {
+                    break;
+                }
+                right = index;
+            }
+            return new LinesSpliterator(value, left, right - left);
+        }
+
+        private static int skipBlankForward(byte[] value, int start, int length) {
+            int index = start;
+            while (index < length) {
+                char ch = getChar(value, index++);
+                if (ch == '\n') {
+                    return index;
+                }
+                if (ch == '\r') {
+                    if (index < length && getChar(value, index) == '\n') {
+                        return index + 1;
+                    }
+                    return index;
+                }
+                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                    return start;
+                }
+            }
+            return length;
+        }
+
+        private static int skipBlankBackward(byte[] value, int start, int fence) {
+            int index = fence;
+            if (start < index && getChar(value, index - 1) == '\n') {
+                index--;
+            }
+            if (start < index && getChar(value, index - 1) == '\r') {
+                index--;
+            }
+            while (start < index) {
+                char ch = getChar(value, --index);
+                if (ch == '\r' || ch == '\n') {
+                    return index + 1;
+                }
+                if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                    return fence;
+                }
+            }
+            return start;
+        }
     }
 
-    static Stream<String> lines(byte[] value) {
-        return StreamSupport.stream(new LinesSpliterator(value), false);
+    static Stream<String> lines(byte[] value, int leading, int trailing) {
+        if (leading == 0 && trailing == 0) {
+            return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
+        } else {
+            return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
+        }
     }
 
     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/String/AlignIndent.java	Wed Sep 12 14:19:36 2018 -0300
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Unit tests for String#align and String#indent
+ * @run main AlignIndent
+ */
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public class AlignIndent {
+    static final List<String> ENDS = List.of("", "\n", "   \n", "\n\n", "\n\n\n");
+    static final List<String> MIDDLES = List.of(
+            "",
+            "xyz",
+            "   xyz",
+            "      xyz",
+            "xyz   ",
+            "   xyz   ",
+            "      xyz   ",
+            "xyz\u2022",
+            "   xyz\u2022",
+            "xyz\u2022   ",
+            "   xyz\u2022   ",
+            "   // comment"
+    );
+
+    public static void main(String[] args) {
+        test1();
+        test2();
+        test3();
+    }
+
+    /*
+     * Test String#align() functionality.
+     */
+    static void test1() {
+        for (String prefix : ENDS) {
+            for (String suffix : ENDS) {
+                for (String middle : MIDDLES) {
+                    {
+                        String input = prefix + "   abc   \n" + middle + "\n   def   \n" + suffix;
+                        String output = input.align();
+
+                        String[] inLines = input.split("\\R");
+                        String[] outLines = output.split("\\R");
+
+                        String[] inLinesBody = getBody(inLines);
+
+                        if (inLinesBody.length < outLines.length) {
+                            report("String::align()", "Result has more lines than expected", input, output);
+                        } else if (inLinesBody.length > outLines.length) {
+                            report("String::align()", "Result has fewer lines than expected", input, output);
+                        }
+
+                        int indent = -1;
+                        for (int i = 0; i < inLinesBody.length; i++) {
+                            String in = inLinesBody[i];
+                            String out = outLines[i];
+                            if (!out.isBlank()) {
+                                int offset = in.indexOf(out);
+                                if (offset == -1) {
+                                    report("String::align()", "Portions of line are missing", input, output);
+                                }
+                                if (indent == -1) {
+                                    indent = offset;
+                                } else if (offset != indent) {
+                                    report("String::align()",
+                                            "Inconsistent indentation in result", input, output);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /*
+     * Test String#align(int n) functionality.
+     */
+    static void test2() {
+        for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) {
+            for (String prefix : ENDS) {
+                for (String suffix : ENDS) {
+                    for (String middle : MIDDLES) {
+                        {
+                            String input = prefix + "   abc   \n" + middle + "\n   def   \n" + suffix;
+                            String output = input.align(adjust);
+                            String expected = input.align().indent(adjust);
+
+                            if (!output.equals(expected)) {
+                                report("String::align(int n)",
+                                        "Result inconsistent with align().indent(n)", expected, output);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /*
+     * Test String#indent(int n) functionality.
+     */
+    static void test3() {
+        for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) {
+            for (String prefix : ENDS) {
+                for (String suffix : ENDS) {
+                    for (String middle : MIDDLES) {
+                        String input = prefix + "   abc   \n" + middle + "\n   def   \n" + suffix;
+                        String output = input.indent(adjust);
+
+                        Stream<String> stream = input.lines();
+                        if (adjust > 0) {
+                            final String spaces = " ".repeat(adjust);
+                            stream = stream.map(s -> s.isBlank() ? s : spaces + s);
+                        } else if (adjust < 0) {
+                            stream = stream.map(s -> s.substring(Math.min(-adjust, indexOfNonWhitespace(s))));
+                        }
+                        String expected = stream.collect(Collectors.joining("\n", "", "\n"));
+
+                        if (!output.equals(expected)) {
+                            report("String::indent(int n)",
+                                    "Result indentation not as expected", expected, output);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    public static int indexOfNonWhitespace(String s) {
+        int left = 0;
+        while (left < s.length()) {
+            char ch = s.charAt(left);
+            if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+                break;
+            }
+            left++;
+        }
+        return left;
+    }
+
+
+    private static String[] getBody(String[] inLines) {
+        int from = -1, to = -1;
+        for (int i = 0; i < inLines.length; i++) {
+            String line = inLines[i];
+            if (!line.isBlank()) {
+                if (from == -1) {
+                    from = i;
+                }
+                to = i + 1;
+            }
+        }
+        return Arrays.copyOfRange(inLines, from, to);
+    }
+
+    /*
+     * Report difference in result.
+     */
+    static void report(String test, String message, String input, String output) {
+        System.err.println("Testing " + test + ": " + message);
+        System.err.println();
+        System.err.println("Input: length = " + input.length());
+        System.err.println("_".repeat(40));
+        System.err.print(input.replaceAll(" ", "."));
+        System.err.println("_".repeat(40));
+        System.err.println();
+        System.err.println("Output: length = " + output.length());
+        System.err.println("_".repeat(40));
+        System.err.print(output.replaceAll(" ", "."));
+        System.err.println("_".repeat(40));
+        throw new RuntimeException();
+    }
+}