# HG changeset patch # User jlaskey # Date 1536772776 10800 # Node ID 975d3636a2f994baf9937f35db4e5d7b4b956c95 # Parent d424675a97433c426bf1aee97192989a3359d10f 8200434: String::align, String::indent Reviewed-by: abuckley, smarks, sherman, rriggs, jrose, sundar, igerasim, briangoetz, darcy, jjg diff -r d424675a9743 -r 975d3636a2f9 src/java.base/share/classes/java/lang/String.java --- a/src/java.base/share/classes/java/lang/String.java Wed Sep 12 14:19:36 2018 -0300 +++ b/src/java.base/share/classes/java/lang/String.java Wed Sep 12 14:19:36 2018 -0300 @@ -40,12 +40,15 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; import java.util.stream.StreamSupport; import jdk.internal.HotSpotIntrinsicCandidate; import jdk.internal.vm.annotation.Stable; +import static java.util.function.Predicate.not; + /** * The {@code String} class represents character strings. All * string literals in Java programs, such as {@code "abc"}, are @@ -2755,12 +2758,9 @@ return indexOfNonWhitespace() == length(); } - private int indexOfNonWhitespace() { - if (isLatin1()) { - return StringLatin1.indexOfNonWhitespace(value); - } else { - return StringUTF16.indexOfNonWhitespace(value); - } + private Stream lines(int maxLeading, int maxTrailing) { + return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing) + : StringUTF16.lines(value, maxLeading, maxTrailing); } /** @@ -2794,8 +2794,181 @@ * @since 11 */ public Stream lines() { - return isLatin1() ? StringLatin1.lines(value) - : StringUTF16.lines(value); + return lines(0, 0); + } + + /** + * Adjusts the indentation of each line of this string based on the value of + * {@code n}, and normalizes line termination characters. + *

+ * This string is conceptually separated into lines using + * {@link String#lines()}. Each line is then adjusted as described below + * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting + * lines are then concatenated and returned. + *

+ * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the + * beginning of each line. {@link String#isBlank() Blank lines} are + * unaffected. + *

+ * If {@code n < 0} then up to {@code n} + * {@link Character#isWhitespace(int) white space characters} are removed + * from the beginning of each line. If a given line does not contain + * sufficient white space then all leading + * {@link Character#isWhitespace(int) white space characters} are removed. + * Each white space character is treated as a single character. In + * particular, the tab character {@code "\t"} (U+0009) is considered a + * single character; it is not expanded. + *

+ * If {@code n == 0} then the line remains unchanged. However, line + * terminators are still normalized. + *

+ * + * @param n number of leading + * {@link Character#isWhitespace(int) white space characters} + * to add or remove + * + * @return string with indentation adjusted and line endings normalized + * + * @see String#lines() + * @see String#isBlank() + * @see Character#isWhitespace(int) + * + * @since 12 + */ + public String indent(int n) { + return isEmpty() ? "" : indent(n, false); + } + + private String indent(int n, boolean removeBlanks) { + Stream stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE) + : lines(); + if (n > 0) { + final String spaces = " ".repeat(n); + stream = stream.map(s -> s.isBlank() ? s : spaces + s); + } else if (n == Integer.MIN_VALUE) { + stream = stream.map(s -> s.stripLeading()); + } else if (n < 0) { + stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace()))); + } + return stream.collect(Collectors.joining("\n", "", "\n")); + } + + private int indexOfNonWhitespace() { + return isLatin1() ? StringLatin1.indexOfNonWhitespace(value) + : StringUTF16.indexOfNonWhitespace(value); + } + + private int lastIndexOfNonWhitespace() { + return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value) + : StringUTF16.lastIndexOfNonWhitespace(value); + } + + /** + * Removes vertical and horizontal white space margins from around the + * essential body of a multi-line string, while preserving relative + * indentation. + *

+ * This string is first conceptually separated into lines as if by + * {@link String#lines()}. + *

+ * Then, the minimum indentation (min) is determined as follows. For + * each non-blank line (as defined by {@link String#isBlank()}), the + * leading {@link Character#isWhitespace(int) white space} characters are + * counted. The min value is the smallest of these counts. + *

+ * For each non-blank line, min leading white space characters are + * removed. Each white space character is treated as a single character. In + * particular, the tab character {@code "\t"} (U+0009) is considered a + * single character; it is not expanded. + *

+ * Leading and trailing blank lines, if any, are removed. Trailing spaces are + * preserved. + *

+ * Each line is suffixed with a line feed character {@code "\n"} (U+000A). + *

+ * Finally, the lines are concatenated into a single string and returned. + * + * @apiNote + * This method's primary purpose is to shift a block of lines as far as + * possible to the left, while preserving relative indentation. Lines + * that were indented the least will thus have no leading white space. + * + * Example: + *

+     * `
+     *      This is the first line
+     *          This is the second line
+     * `.align();
+     *
+     * returns
+     * This is the first line
+     *     This is the second line
+     * 
+ * + * @return string with margins removed and line terminators normalized + * + * @see String#lines() + * @see String#isBlank() + * @see String#indent(int) + * @see Character#isWhitespace(int) + * + * @since 12 + */ + public String align() { + return align(0); + } + + /** + * Removes vertical and horizontal white space margins from around the + * essential body of a multi-line string, while preserving relative + * indentation and with optional indentation adjustment. + *

+ * Invoking this method is equivalent to: + *

+ * {@code this.align().indent(n)} + *
+ * + * @apiNote + * Examples: + *
+     * `
+     *      This is the first line
+     *          This is the second line
+     * `.align(0);
+     *
+     * returns
+     * This is the first line
+     *     This is the second line
+     *
+     *
+     * `
+     *    This is the first line
+     *       This is the second line
+     * `.align(4);
+     * returns
+     *     This is the first line
+     *         This is the second line
+     * 
+ * + * @param n number of leading white space characters + * to add or remove + * + * @return string with margins removed, indentation adjusted and + * line terminators normalized + * + * @see String#align() + * + * @since 12 + */ + public String align(int n) { + if (isEmpty()) { + return ""; + } + int outdent = lines().filter(not(String::isBlank)) + .mapToInt(String::indexOfNonWhitespace) + .min() + .orElse(0); + return indent(n - outdent, true); } /** diff -r d424675a9743 -r 975d3636a2f9 src/java.base/share/classes/java/lang/StringLatin1.java --- a/src/java.base/share/classes/java/lang/StringLatin1.java Wed Sep 12 14:19:36 2018 -0300 +++ b/src/java.base/share/classes/java/lang/StringLatin1.java Wed Sep 12 14:19:36 2018 -0300 @@ -545,7 +545,7 @@ int length = value.length; int left = 0; while (left < length) { - char ch = (char)(value[left] & 0xff); + char ch = getChar(value, left); if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { break; } @@ -558,7 +558,7 @@ int length = value.length; int right = length; while (0 < right) { - char ch = (char)(value[right - 1] & 0xff); + char ch = getChar(value, right - 1); if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { break; } @@ -573,7 +573,8 @@ return ""; } int right = lastIndexOfNonWhitespace(value); - return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null; + boolean ifChanged = (left > 0) || (right < value.length); + return ifChanged ? newString(value, left, right - left) : null; } public static String stripLeading(byte[] value) { @@ -597,11 +598,7 @@ private int index; // current index, modified on advance/split private final int fence; // one past last index - LinesSpliterator(byte[] value) { - this(value, 0, value.length); - } - - LinesSpliterator(byte[] value, int start, int length) { + private LinesSpliterator(byte[] value, int start, int length) { this.value = value; this.index = start; this.fence = start + length; @@ -609,7 +606,7 @@ private int indexOfLineSeparator(int start) { for (int current = start; current < fence; current++) { - byte ch = value[current]; + char ch = getChar(value, current); if (ch == '\n' || ch == '\r') { return current; } @@ -619,9 +616,9 @@ private int skipLineSeparator(int start) { if (start < fence) { - if (value[start] == '\r') { + if (getChar(value, start) == '\r') { int next = start + 1; - if (next < fence && value[next] == '\n') { + if (next < fence && getChar(value, next) == '\n') { return next + 1; } } @@ -680,10 +677,80 @@ public int characteristics() { return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL; } + + static LinesSpliterator spliterator(byte[] value) { + return new LinesSpliterator(value, 0, value.length); + } + + static LinesSpliterator spliterator(byte[] value, int leading, int trailing) { + int length = value.length; + int left = 0; + int index; + for (int l = 0; l < leading; l++) { + index = skipBlankForward(value, left, length); + if (index == left) { + break; + } + left = index; + } + int right = length; + for (int t = 0; t < trailing; t++) { + index = skipBlankBackward(value, left, right); + if (index == right) { + break; + } + right = index; + } + return new LinesSpliterator(value, left, right - left); + } + + private static int skipBlankForward(byte[] value, int start, int length) { + int index = start; + while (index < length) { + char ch = getChar(value, index++); + if (ch == '\n') { + return index; + } + if (ch == '\r') { + if (index < length && getChar(value, index) == '\n') { + return index + 1; + } + return index; + } + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + return start; + } + } + return length; + } + + private static int skipBlankBackward(byte[] value, int start, int fence) { + int index = fence; + if (start < index && getChar(value, index - 1) == '\n') { + index--; + } + if (start < index && getChar(value, index - 1) == '\r') { + index--; + } + while (start < index) { + char ch = getChar(value, --index); + if (ch == '\r' || ch == '\n') { + return index + 1; + } + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + return fence; + } + } + return start; + } } - static Stream lines(byte[] value) { - return StreamSupport.stream(new LinesSpliterator(value), false); + static Stream lines(byte[] value, int leading, int trailing) { + if (leading == 0 && trailing == 0) { + return StreamSupport.stream(LinesSpliterator.spliterator(value), false); + } else { + return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false); + } } public static void putChar(byte[] val, int index, int c) { diff -r d424675a9743 -r 975d3636a2f9 src/java.base/share/classes/java/lang/StringUTF16.java --- a/src/java.base/share/classes/java/lang/StringUTF16.java Wed Sep 12 14:19:36 2018 -0300 +++ b/src/java.base/share/classes/java/lang/StringUTF16.java Wed Sep 12 14:19:36 2018 -0300 @@ -859,7 +859,6 @@ null; } - public static int indexOfNonWhitespace(byte[] value) { int length = value.length >> 1; int left = 0; @@ -874,7 +873,7 @@ } public static int lastIndexOfNonWhitespace(byte[] value) { - int length = value.length >> 1; + int length = value.length >>> 1; int right = length; while (0 < right) { int codepoint = codePointBefore(value, right); @@ -887,17 +886,18 @@ } public static String strip(byte[] value) { - int length = value.length >> 1; + int length = value.length >>> 1; int left = indexOfNonWhitespace(value); if (left == length) { return ""; } int right = lastIndexOfNonWhitespace(value); - return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null; + boolean ifChanged = (left > 0) || (right < length); + return ifChanged ? newString(value, left, right - left) : null; } public static String stripLeading(byte[] value) { - int length = value.length >> 1; + int length = value.length >>> 1; int left = indexOfNonWhitespace(value); if (left == length) { return ""; @@ -906,7 +906,7 @@ } public static String stripTrailing(byte[] value) { - int length = value.length >> 1; + int length = value.length >>> 1; int right = lastIndexOfNonWhitespace(value); if (right == 0) { return ""; @@ -919,11 +919,7 @@ private int index; // current index, modified on advance/split private final int fence; // one past last index - LinesSpliterator(byte[] value) { - this(value, 0, value.length >>> 1); - } - - LinesSpliterator(byte[] value, int start, int length) { + private LinesSpliterator(byte[] value, int start, int length) { this.value = value; this.index = start; this.fence = start + length; @@ -1002,10 +998,80 @@ public int characteristics() { return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL; } + + static LinesSpliterator spliterator(byte[] value) { + return new LinesSpliterator(value, 0, value.length >>> 1); + } + + static LinesSpliterator spliterator(byte[] value, int leading, int trailing) { + int length = value.length >>> 1; + int left = 0; + int index; + for (int l = 0; l < leading; l++) { + index = skipBlankForward(value, left, length); + if (index == left) { + break; + } + left = index; + } + int right = length; + for (int t = 0; t < trailing; t++) { + index = skipBlankBackward(value, left, right); + if (index == right) { + break; + } + right = index; + } + return new LinesSpliterator(value, left, right - left); + } + + private static int skipBlankForward(byte[] value, int start, int length) { + int index = start; + while (index < length) { + char ch = getChar(value, index++); + if (ch == '\n') { + return index; + } + if (ch == '\r') { + if (index < length && getChar(value, index) == '\n') { + return index + 1; + } + return index; + } + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + return start; + } + } + return length; + } + + private static int skipBlankBackward(byte[] value, int start, int fence) { + int index = fence; + if (start < index && getChar(value, index - 1) == '\n') { + index--; + } + if (start < index && getChar(value, index - 1) == '\r') { + index--; + } + while (start < index) { + char ch = getChar(value, --index); + if (ch == '\r' || ch == '\n') { + return index + 1; + } + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + return fence; + } + } + return start; + } } - static Stream lines(byte[] value) { - return StreamSupport.stream(new LinesSpliterator(value), false); + static Stream lines(byte[] value, int leading, int trailing) { + if (leading == 0 && trailing == 0) { + return StreamSupport.stream(LinesSpliterator.spliterator(value), false); + } else { + return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false); + } } private static void putChars(byte[] val, int index, char[] str, int off, int end) { diff -r d424675a9743 -r 975d3636a2f9 test/jdk/java/lang/String/AlignIndent.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/jdk/java/lang/String/AlignIndent.java Wed Sep 12 14:19:36 2018 -0300 @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary Unit tests for String#align and String#indent + * @run main AlignIndent + */ + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class AlignIndent { + static final List ENDS = List.of("", "\n", " \n", "\n\n", "\n\n\n"); + static final List MIDDLES = List.of( + "", + "xyz", + " xyz", + " xyz", + "xyz ", + " xyz ", + " xyz ", + "xyz\u2022", + " xyz\u2022", + "xyz\u2022 ", + " xyz\u2022 ", + " // comment" + ); + + public static void main(String[] args) { + test1(); + test2(); + test3(); + } + + /* + * Test String#align() functionality. + */ + static void test1() { + for (String prefix : ENDS) { + for (String suffix : ENDS) { + for (String middle : MIDDLES) { + { + String input = prefix + " abc \n" + middle + "\n def \n" + suffix; + String output = input.align(); + + String[] inLines = input.split("\\R"); + String[] outLines = output.split("\\R"); + + String[] inLinesBody = getBody(inLines); + + if (inLinesBody.length < outLines.length) { + report("String::align()", "Result has more lines than expected", input, output); + } else if (inLinesBody.length > outLines.length) { + report("String::align()", "Result has fewer lines than expected", input, output); + } + + int indent = -1; + for (int i = 0; i < inLinesBody.length; i++) { + String in = inLinesBody[i]; + String out = outLines[i]; + if (!out.isBlank()) { + int offset = in.indexOf(out); + if (offset == -1) { + report("String::align()", "Portions of line are missing", input, output); + } + if (indent == -1) { + indent = offset; + } else if (offset != indent) { + report("String::align()", + "Inconsistent indentation in result", input, output); + } + } + } + } + } + } + } + } + + /* + * Test String#align(int n) functionality. + */ + static void test2() { + for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) { + for (String prefix : ENDS) { + for (String suffix : ENDS) { + for (String middle : MIDDLES) { + { + String input = prefix + " abc \n" + middle + "\n def \n" + suffix; + String output = input.align(adjust); + String expected = input.align().indent(adjust); + + if (!output.equals(expected)) { + report("String::align(int n)", + "Result inconsistent with align().indent(n)", expected, output); + } + } + } + } + } + } + } + + /* + * Test String#indent(int n) functionality. + */ + static void test3() { + for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) { + for (String prefix : ENDS) { + for (String suffix : ENDS) { + for (String middle : MIDDLES) { + String input = prefix + " abc \n" + middle + "\n def \n" + suffix; + String output = input.indent(adjust); + + Stream stream = input.lines(); + if (adjust > 0) { + final String spaces = " ".repeat(adjust); + stream = stream.map(s -> s.isBlank() ? s : spaces + s); + } else if (adjust < 0) { + stream = stream.map(s -> s.substring(Math.min(-adjust, indexOfNonWhitespace(s)))); + } + String expected = stream.collect(Collectors.joining("\n", "", "\n")); + + if (!output.equals(expected)) { + report("String::indent(int n)", + "Result indentation not as expected", expected, output); + } + } + } + } + } + } + + public static int indexOfNonWhitespace(String s) { + int left = 0; + while (left < s.length()) { + char ch = s.charAt(left); + if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { + break; + } + left++; + } + return left; + } + + + private static String[] getBody(String[] inLines) { + int from = -1, to = -1; + for (int i = 0; i < inLines.length; i++) { + String line = inLines[i]; + if (!line.isBlank()) { + if (from == -1) { + from = i; + } + to = i + 1; + } + } + return Arrays.copyOfRange(inLines, from, to); + } + + /* + * Report difference in result. + */ + static void report(String test, String message, String input, String output) { + System.err.println("Testing " + test + ": " + message); + System.err.println(); + System.err.println("Input: length = " + input.length()); + System.err.println("_".repeat(40)); + System.err.print(input.replaceAll(" ", ".")); + System.err.println("_".repeat(40)); + System.err.println(); + System.err.println("Output: length = " + output.length()); + System.err.println("_".repeat(40)); + System.err.print(output.replaceAll(" ", ".")); + System.err.println("_".repeat(40)); + throw new RuntimeException(); + } +}