8200434: String::align, String::indent
Reviewed-by: abuckley, smarks, sherman, rriggs, jrose, sundar, igerasim, briangoetz, darcy, jjg
--- a/src/java.base/share/classes/java/lang/String.java Wed Sep 12 14:19:36 2018 -0300
+++ b/src/java.base/share/classes/java/lang/String.java Wed Sep 12 14:19:36 2018 -0300
@@ -40,12 +40,15 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import jdk.internal.HotSpotIntrinsicCandidate;
import jdk.internal.vm.annotation.Stable;
+import static java.util.function.Predicate.not;
+
/**
* The {@code String} class represents character strings. All
* string literals in Java programs, such as {@code "abc"}, are
@@ -2755,12 +2758,9 @@
return indexOfNonWhitespace() == length();
}
- private int indexOfNonWhitespace() {
- if (isLatin1()) {
- return StringLatin1.indexOfNonWhitespace(value);
- } else {
- return StringUTF16.indexOfNonWhitespace(value);
- }
+ private Stream<String> lines(int maxLeading, int maxTrailing) {
+ return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
+ : StringUTF16.lines(value, maxLeading, maxTrailing);
}
/**
@@ -2794,8 +2794,181 @@
* @since 11
*/
public Stream<String> lines() {
- return isLatin1() ? StringLatin1.lines(value)
- : StringUTF16.lines(value);
+ return lines(0, 0);
+ }
+
+ /**
+ * Adjusts the indentation of each line of this string based on the value of
+ * {@code n}, and normalizes line termination characters.
+ * <p>
+ * This string is conceptually separated into lines using
+ * {@link String#lines()}. Each line is then adjusted as described below
+ * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
+ * lines are then concatenated and returned.
+ * <p>
+ * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
+ * beginning of each line. {@link String#isBlank() Blank lines} are
+ * unaffected.
+ * <p>
+ * If {@code n < 0} then up to {@code n}
+ * {@link Character#isWhitespace(int) white space characters} are removed
+ * from the beginning of each line. If a given line does not contain
+ * sufficient white space then all leading
+ * {@link Character#isWhitespace(int) white space characters} are removed.
+ * Each white space character is treated as a single character. In
+ * particular, the tab character {@code "\t"} (U+0009) is considered a
+ * single character; it is not expanded.
+ * <p>
+ * If {@code n == 0} then the line remains unchanged. However, line
+ * terminators are still normalized.
+ * <p>
+ *
+ * @param n number of leading
+ * {@link Character#isWhitespace(int) white space characters}
+ * to add or remove
+ *
+ * @return string with indentation adjusted and line endings normalized
+ *
+ * @see String#lines()
+ * @see String#isBlank()
+ * @see Character#isWhitespace(int)
+ *
+ * @since 12
+ */
+ public String indent(int n) {
+ return isEmpty() ? "" : indent(n, false);
+ }
+
+ private String indent(int n, boolean removeBlanks) {
+ Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
+ : lines();
+ if (n > 0) {
+ final String spaces = " ".repeat(n);
+ stream = stream.map(s -> s.isBlank() ? s : spaces + s);
+ } else if (n == Integer.MIN_VALUE) {
+ stream = stream.map(s -> s.stripLeading());
+ } else if (n < 0) {
+ stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
+ }
+ return stream.collect(Collectors.joining("\n", "", "\n"));
+ }
+
+ private int indexOfNonWhitespace() {
+ return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
+ : StringUTF16.indexOfNonWhitespace(value);
+ }
+
+ private int lastIndexOfNonWhitespace() {
+ return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
+ : StringUTF16.lastIndexOfNonWhitespace(value);
+ }
+
+ /**
+ * Removes vertical and horizontal white space margins from around the
+ * essential body of a multi-line string, while preserving relative
+ * indentation.
+ * <p>
+ * This string is first conceptually separated into lines as if by
+ * {@link String#lines()}.
+ * <p>
+ * Then, the <i>minimum indentation</i> (min) is determined as follows. For
+ * each non-blank line (as defined by {@link String#isBlank()}), the
+ * leading {@link Character#isWhitespace(int) white space} characters are
+ * counted. The <i>min</i> value is the smallest of these counts.
+ * <p>
+ * For each non-blank line, <i>min</i> leading white space characters are
+ * removed. Each white space character is treated as a single character. In
+ * particular, the tab character {@code "\t"} (U+0009) is considered a
+ * single character; it is not expanded.
+ * <p>
+ * Leading and trailing blank lines, if any, are removed. Trailing spaces are
+ * preserved.
+ * <p>
+ * Each line is suffixed with a line feed character {@code "\n"} (U+000A).
+ * <p>
+ * Finally, the lines are concatenated into a single string and returned.
+ *
+ * @apiNote
+ * This method's primary purpose is to shift a block of lines as far as
+ * possible to the left, while preserving relative indentation. Lines
+ * that were indented the least will thus have no leading white space.
+ *
+ * Example:
+ * <blockquote><pre>
+ * `
+ * This is the first line
+ * This is the second line
+ * `.align();
+ *
+ * returns
+ * This is the first line
+ * This is the second line
+ * </pre></blockquote>
+ *
+ * @return string with margins removed and line terminators normalized
+ *
+ * @see String#lines()
+ * @see String#isBlank()
+ * @see String#indent(int)
+ * @see Character#isWhitespace(int)
+ *
+ * @since 12
+ */
+ public String align() {
+ return align(0);
+ }
+
+ /**
+ * Removes vertical and horizontal white space margins from around the
+ * essential body of a multi-line string, while preserving relative
+ * indentation and with optional indentation adjustment.
+ * <p>
+ * Invoking this method is equivalent to:
+ * <blockquote>
+ * {@code this.align().indent(n)}
+ * </blockquote>
+ *
+ * @apiNote
+ * Examples:
+ * <blockquote><pre>
+ * `
+ * This is the first line
+ * This is the second line
+ * `.align(0);
+ *
+ * returns
+ * This is the first line
+ * This is the second line
+ *
+ *
+ * `
+ * This is the first line
+ * This is the second line
+ * `.align(4);
+ * returns
+ * This is the first line
+ * This is the second line
+ * </pre></blockquote>
+ *
+ * @param n number of leading white space characters
+ * to add or remove
+ *
+ * @return string with margins removed, indentation adjusted and
+ * line terminators normalized
+ *
+ * @see String#align()
+ *
+ * @since 12
+ */
+ public String align(int n) {
+ if (isEmpty()) {
+ return "";
+ }
+ int outdent = lines().filter(not(String::isBlank))
+ .mapToInt(String::indexOfNonWhitespace)
+ .min()
+ .orElse(0);
+ return indent(n - outdent, true);
}
/**
--- a/src/java.base/share/classes/java/lang/StringLatin1.java Wed Sep 12 14:19:36 2018 -0300
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java Wed Sep 12 14:19:36 2018 -0300
@@ -545,7 +545,7 @@
int length = value.length;
int left = 0;
while (left < length) {
- char ch = (char)(value[left] & 0xff);
+ char ch = getChar(value, left);
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
break;
}
@@ -558,7 +558,7 @@
int length = value.length;
int right = length;
while (0 < right) {
- char ch = (char)(value[right - 1] & 0xff);
+ char ch = getChar(value, right - 1);
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
break;
}
@@ -573,7 +573,8 @@
return "";
}
int right = lastIndexOfNonWhitespace(value);
- return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null;
+ boolean ifChanged = (left > 0) || (right < value.length);
+ return ifChanged ? newString(value, left, right - left) : null;
}
public static String stripLeading(byte[] value) {
@@ -597,11 +598,7 @@
private int index; // current index, modified on advance/split
private final int fence; // one past last index
- LinesSpliterator(byte[] value) {
- this(value, 0, value.length);
- }
-
- LinesSpliterator(byte[] value, int start, int length) {
+ private LinesSpliterator(byte[] value, int start, int length) {
this.value = value;
this.index = start;
this.fence = start + length;
@@ -609,7 +606,7 @@
private int indexOfLineSeparator(int start) {
for (int current = start; current < fence; current++) {
- byte ch = value[current];
+ char ch = getChar(value, current);
if (ch == '\n' || ch == '\r') {
return current;
}
@@ -619,9 +616,9 @@
private int skipLineSeparator(int start) {
if (start < fence) {
- if (value[start] == '\r') {
+ if (getChar(value, start) == '\r') {
int next = start + 1;
- if (next < fence && value[next] == '\n') {
+ if (next < fence && getChar(value, next) == '\n') {
return next + 1;
}
}
@@ -680,10 +677,80 @@
public int characteristics() {
return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
}
+
+ static LinesSpliterator spliterator(byte[] value) {
+ return new LinesSpliterator(value, 0, value.length);
+ }
+
+ static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
+ int length = value.length;
+ int left = 0;
+ int index;
+ for (int l = 0; l < leading; l++) {
+ index = skipBlankForward(value, left, length);
+ if (index == left) {
+ break;
+ }
+ left = index;
+ }
+ int right = length;
+ for (int t = 0; t < trailing; t++) {
+ index = skipBlankBackward(value, left, right);
+ if (index == right) {
+ break;
+ }
+ right = index;
+ }
+ return new LinesSpliterator(value, left, right - left);
+ }
+
+ private static int skipBlankForward(byte[] value, int start, int length) {
+ int index = start;
+ while (index < length) {
+ char ch = getChar(value, index++);
+ if (ch == '\n') {
+ return index;
+ }
+ if (ch == '\r') {
+ if (index < length && getChar(value, index) == '\n') {
+ return index + 1;
+ }
+ return index;
+ }
+ if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+ return start;
+ }
+ }
+ return length;
+ }
+
+ private static int skipBlankBackward(byte[] value, int start, int fence) {
+ int index = fence;
+ if (start < index && getChar(value, index - 1) == '\n') {
+ index--;
+ }
+ if (start < index && getChar(value, index - 1) == '\r') {
+ index--;
+ }
+ while (start < index) {
+ char ch = getChar(value, --index);
+ if (ch == '\r' || ch == '\n') {
+ return index + 1;
+ }
+ if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+ return fence;
+ }
+ }
+ return start;
+ }
}
- static Stream<String> lines(byte[] value) {
- return StreamSupport.stream(new LinesSpliterator(value), false);
+ static Stream<String> lines(byte[] value, int leading, int trailing) {
+ if (leading == 0 && trailing == 0) {
+ return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
+ } else {
+ return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
+ }
}
public static void putChar(byte[] val, int index, int c) {
--- a/src/java.base/share/classes/java/lang/StringUTF16.java Wed Sep 12 14:19:36 2018 -0300
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java Wed Sep 12 14:19:36 2018 -0300
@@ -859,7 +859,6 @@
null;
}
-
public static int indexOfNonWhitespace(byte[] value) {
int length = value.length >> 1;
int left = 0;
@@ -874,7 +873,7 @@
}
public static int lastIndexOfNonWhitespace(byte[] value) {
- int length = value.length >> 1;
+ int length = value.length >>> 1;
int right = length;
while (0 < right) {
int codepoint = codePointBefore(value, right);
@@ -887,17 +886,18 @@
}
public static String strip(byte[] value) {
- int length = value.length >> 1;
+ int length = value.length >>> 1;
int left = indexOfNonWhitespace(value);
if (left == length) {
return "";
}
int right = lastIndexOfNonWhitespace(value);
- return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null;
+ boolean ifChanged = (left > 0) || (right < length);
+ return ifChanged ? newString(value, left, right - left) : null;
}
public static String stripLeading(byte[] value) {
- int length = value.length >> 1;
+ int length = value.length >>> 1;
int left = indexOfNonWhitespace(value);
if (left == length) {
return "";
@@ -906,7 +906,7 @@
}
public static String stripTrailing(byte[] value) {
- int length = value.length >> 1;
+ int length = value.length >>> 1;
int right = lastIndexOfNonWhitespace(value);
if (right == 0) {
return "";
@@ -919,11 +919,7 @@
private int index; // current index, modified on advance/split
private final int fence; // one past last index
- LinesSpliterator(byte[] value) {
- this(value, 0, value.length >>> 1);
- }
-
- LinesSpliterator(byte[] value, int start, int length) {
+ private LinesSpliterator(byte[] value, int start, int length) {
this.value = value;
this.index = start;
this.fence = start + length;
@@ -1002,10 +998,80 @@
public int characteristics() {
return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
}
+
+ static LinesSpliterator spliterator(byte[] value) {
+ return new LinesSpliterator(value, 0, value.length >>> 1);
+ }
+
+ static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
+ int length = value.length >>> 1;
+ int left = 0;
+ int index;
+ for (int l = 0; l < leading; l++) {
+ index = skipBlankForward(value, left, length);
+ if (index == left) {
+ break;
+ }
+ left = index;
+ }
+ int right = length;
+ for (int t = 0; t < trailing; t++) {
+ index = skipBlankBackward(value, left, right);
+ if (index == right) {
+ break;
+ }
+ right = index;
+ }
+ return new LinesSpliterator(value, left, right - left);
+ }
+
+ private static int skipBlankForward(byte[] value, int start, int length) {
+ int index = start;
+ while (index < length) {
+ char ch = getChar(value, index++);
+ if (ch == '\n') {
+ return index;
+ }
+ if (ch == '\r') {
+ if (index < length && getChar(value, index) == '\n') {
+ return index + 1;
+ }
+ return index;
+ }
+ if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+ return start;
+ }
+ }
+ return length;
+ }
+
+ private static int skipBlankBackward(byte[] value, int start, int fence) {
+ int index = fence;
+ if (start < index && getChar(value, index - 1) == '\n') {
+ index--;
+ }
+ if (start < index && getChar(value, index - 1) == '\r') {
+ index--;
+ }
+ while (start < index) {
+ char ch = getChar(value, --index);
+ if (ch == '\r' || ch == '\n') {
+ return index + 1;
+ }
+ if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+ return fence;
+ }
+ }
+ return start;
+ }
}
- static Stream<String> lines(byte[] value) {
- return StreamSupport.stream(new LinesSpliterator(value), false);
+ static Stream<String> lines(byte[] value, int leading, int trailing) {
+ if (leading == 0 && trailing == 0) {
+ return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
+ } else {
+ return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
+ }
}
private static void putChars(byte[] val, int index, char[] str, int off, int end) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/String/AlignIndent.java Wed Sep 12 14:19:36 2018 -0300
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Unit tests for String#align and String#indent
+ * @run main AlignIndent
+ */
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public class AlignIndent {
+ static final List<String> ENDS = List.of("", "\n", " \n", "\n\n", "\n\n\n");
+ static final List<String> MIDDLES = List.of(
+ "",
+ "xyz",
+ " xyz",
+ " xyz",
+ "xyz ",
+ " xyz ",
+ " xyz ",
+ "xyz\u2022",
+ " xyz\u2022",
+ "xyz\u2022 ",
+ " xyz\u2022 ",
+ " // comment"
+ );
+
+ public static void main(String[] args) {
+ test1();
+ test2();
+ test3();
+ }
+
+ /*
+ * Test String#align() functionality.
+ */
+ static void test1() {
+ for (String prefix : ENDS) {
+ for (String suffix : ENDS) {
+ for (String middle : MIDDLES) {
+ {
+ String input = prefix + " abc \n" + middle + "\n def \n" + suffix;
+ String output = input.align();
+
+ String[] inLines = input.split("\\R");
+ String[] outLines = output.split("\\R");
+
+ String[] inLinesBody = getBody(inLines);
+
+ if (inLinesBody.length < outLines.length) {
+ report("String::align()", "Result has more lines than expected", input, output);
+ } else if (inLinesBody.length > outLines.length) {
+ report("String::align()", "Result has fewer lines than expected", input, output);
+ }
+
+ int indent = -1;
+ for (int i = 0; i < inLinesBody.length; i++) {
+ String in = inLinesBody[i];
+ String out = outLines[i];
+ if (!out.isBlank()) {
+ int offset = in.indexOf(out);
+ if (offset == -1) {
+ report("String::align()", "Portions of line are missing", input, output);
+ }
+ if (indent == -1) {
+ indent = offset;
+ } else if (offset != indent) {
+ report("String::align()",
+ "Inconsistent indentation in result", input, output);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Test String#align(int n) functionality.
+ */
+ static void test2() {
+ for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) {
+ for (String prefix : ENDS) {
+ for (String suffix : ENDS) {
+ for (String middle : MIDDLES) {
+ {
+ String input = prefix + " abc \n" + middle + "\n def \n" + suffix;
+ String output = input.align(adjust);
+ String expected = input.align().indent(adjust);
+
+ if (!output.equals(expected)) {
+ report("String::align(int n)",
+ "Result inconsistent with align().indent(n)", expected, output);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Test String#indent(int n) functionality.
+ */
+ static void test3() {
+ for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) {
+ for (String prefix : ENDS) {
+ for (String suffix : ENDS) {
+ for (String middle : MIDDLES) {
+ String input = prefix + " abc \n" + middle + "\n def \n" + suffix;
+ String output = input.indent(adjust);
+
+ Stream<String> stream = input.lines();
+ if (adjust > 0) {
+ final String spaces = " ".repeat(adjust);
+ stream = stream.map(s -> s.isBlank() ? s : spaces + s);
+ } else if (adjust < 0) {
+ stream = stream.map(s -> s.substring(Math.min(-adjust, indexOfNonWhitespace(s))));
+ }
+ String expected = stream.collect(Collectors.joining("\n", "", "\n"));
+
+ if (!output.equals(expected)) {
+ report("String::indent(int n)",
+ "Result indentation not as expected", expected, output);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ public static int indexOfNonWhitespace(String s) {
+ int left = 0;
+ while (left < s.length()) {
+ char ch = s.charAt(left);
+ if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
+ break;
+ }
+ left++;
+ }
+ return left;
+ }
+
+
+ private static String[] getBody(String[] inLines) {
+ int from = -1, to = -1;
+ for (int i = 0; i < inLines.length; i++) {
+ String line = inLines[i];
+ if (!line.isBlank()) {
+ if (from == -1) {
+ from = i;
+ }
+ to = i + 1;
+ }
+ }
+ return Arrays.copyOfRange(inLines, from, to);
+ }
+
+ /*
+ * Report difference in result.
+ */
+ static void report(String test, String message, String input, String output) {
+ System.err.println("Testing " + test + ": " + message);
+ System.err.println();
+ System.err.println("Input: length = " + input.length());
+ System.err.println("_".repeat(40));
+ System.err.print(input.replaceAll(" ", "."));
+ System.err.println("_".repeat(40));
+ System.err.println();
+ System.err.println("Output: length = " + output.length());
+ System.err.println("_".repeat(40));
+ System.err.print(output.replaceAll(" ", "."));
+ System.err.println("_".repeat(40));
+ throw new RuntimeException();
+ }
+}