--- a/jdk/src/share/classes/java/util/regex/Pattern.java Wed Nov 13 11:06:57 2013 -0800
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java Wed Nov 13 11:26:01 2013 -0800
@@ -1142,9 +1142,15 @@
* input sequence that is terminated by another subsequence that matches
* this pattern or is terminated by the end of the input sequence. The
* substrings in the array are in the order in which they occur in the
- * input. If this pattern does not match any subsequence of the input then
+ * input. If this pattern does not match any subsequence of the input then
* the resulting array has just one element, namely the input sequence in
- * string form.
+ * string form. A zero-length input sequence always results zero-length
+ * resulting array.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the resulting array. A zero-width match at the beginning however
+ * never produces such empty leading substring.
*
* <p> The <tt>limit</tt> parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
@@ -1185,7 +1191,6 @@
* <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
* </table></blockquote>
*
- *
* @param input
* The character sequence to be split
*
@@ -1196,6 +1201,8 @@
* around matches of this pattern
*/
public String[] split(CharSequence input, int limit) {
+ if (input.length() == 0)
+ return new String[0];
int index = 0;
boolean matchLimited = limit > 0;
ArrayList<String> matchList = new ArrayList<>();
@@ -1204,6 +1211,11 @@
// Add segments before each match found
while(m.find()) {
if (!matchLimited || matchList.size() < limit - 1) {
+ if (index == 0 && index == m.start() && m.start() == m.end()) {
+ // no empty leading substring included for zero-width match
+ // at the beginning of the input char sequence.
+ continue;
+ }
String match = input.subSequence(index, m.start()).toString();
matchList.add(match);
index = m.end();
@@ -5762,6 +5774,13 @@
* the resulting stream has just one element, namely the input sequence in
* string form.
*
+ * <p> A zero-length input sequence always results an empty stream.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the stream. A zero-width match at the beginning however never produces
+ * such empty leading substring.
+ *
* <p> If the input sequence is mutable, it must remain constant during the
* execution of the terminal stream operation. Otherwise, the result of the
* terminal stream operation is undefined.
@@ -5817,7 +5836,8 @@
current = matcher.end();
if (!nextElement.isEmpty()) {
return true;
- } else {
+ } else if (current > 0) { // no empty leading substring for zero-width
+ // match at the beginning of the input
emptyElementCount++;
}
}
--- a/jdk/test/java/util/regex/RegExTest.java Wed Nov 13 11:06:57 2013 -0800
+++ b/jdk/test/java/util/regex/RegExTest.java Wed Nov 13 11:26:01 2013 -0800
@@ -33,7 +33,8 @@
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
- * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647
+ * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
+ * 8027645
*/
import java.util.regex.*;
@@ -148,6 +149,7 @@
groupCurlyNotFoundSuppTest();
groupCurlyBackoffTest();
patternAsPredicate();
+
if (failure) {
throw new
RuntimeException("RegExTest failed, 1st failure: " +
@@ -1776,13 +1778,68 @@
failCount++;
}
// Check the case for limit == 0, source = "";
+ // split() now returns 0-length for empty source "" see #6559590
source = "";
result = source.split("e", 0);
- if (result.length != 1)
- failCount++;
- if (!result[0].equals(source))
- failCount++;
-
+ if (result.length != 0)
+ failCount++;
+
+ // Check both split() and splitAsStraem(), especially for zero-lenth
+ // input and zero-lenth match cases
+ String[][] input = new String[][] {
+ { " ", "Abc Efg Hij" }, // normal non-zero-match
+ { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match
+ { " ", "Abc Efg Hij" }, // non-zero-match in the middle
+ { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match
+ { "(?=\\p{Lu})", "AbcEfg" },
+ { "(?=\\p{Lu})", "Abc" },
+ { " ", "" }, // zero-length input
+ { ".*", "" },
+
+ // some tests from PatternStreamTest.java
+ { "4", "awgqwefg1fefw4vssv1vvv1" },
+ { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
+ { "1", "awgqwefg1fefw4vssv1vvv1" },
+ { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
+ { "\u56da", "1\u56da23\u56da456\u56da7890" },
+ { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
+ { "\u56da", "" },
+ { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
+ { "o", "boo:and:foo" },
+ { "o", "booooo:and:fooooo" },
+ { "o", "fooooo:" },
+ };
+
+ String[][] expected = new String[][] {
+ { "Abc", "Efg", "Hij" },
+ { "", "Abc", "Efg", "Hij" },
+ { "Abc", "", "Efg", "Hij" },
+ { "Abc", "Efg", "Hij" },
+ { "Abc", "Efg" },
+ { "Abc" },
+ {},
+ {},
+
+ { "awgqwefg1fefw", "vssv1vvv1" },
+ { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
+ { "awgqwefg", "fefw4vssv", "vvv" },
+ { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
+ { "1", "23", "456", "7890" },
+ { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
+ {},
+ { "This", "is", "testing", "", "with", "different", "separators" },
+ { "b", "", ":and:f" },
+ { "b", "", "", "", "", ":and:f" },
+ { "f", "", "", "", "", ":" },
+ };
+ for (int i = 0; i < input.length; i++) {
+ pattern = Pattern.compile(input[i][0]);
+ if (!Arrays.equals(pattern.split(input[i][1]), expected[i]))
+ failCount++;
+ if (!Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
+ expected[i]))
+ failCount++;
+ }
report("Split");
}