6840246: Lightweight implementation of String.split for simple use case
Summary: Added a fastpath for simple use case
Reviewed-by: alanb, martin
--- a/jdk/src/share/classes/java/lang/String.java Tue Aug 11 20:06:52 2009 -0600
+++ b/jdk/src/share/classes/java/lang/String.java Thu Aug 13 10:50:23 2009 -0700
@@ -2301,6 +2301,54 @@
* @spec JSR-51
*/
public String[] split(String regex, int limit) {
+ /* fastpath if the regex is a
+ (1)one-char String and this character is not one of the
+ RegEx's meta characters ".$|()[{^?*+\\", or
+ (2)two-char String and the first char is the backslash and
+ the second is not the ascii digit or ascii letter.
+ */
+ char ch = 0;
+ if (((regex.count == 1 &&
+ ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
+ (regex.length() == 2 &&
+ regex.charAt(0) == '\\' &&
+ (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
+ ((ch-'a')|('z'-ch)) < 0 &&
+ ((ch-'A')|('Z'-ch)) < 0)) &&
+ (ch < Character.MIN_HIGH_SURROGATE ||
+ ch > Character.MAX_LOW_SURROGATE))
+ {
+ int off = 0;
+ int next = 0;
+ boolean limited = limit > 0;
+ ArrayList<String> list = new ArrayList<String>();
+ while ((next = indexOf(ch, off)) != -1) {
+ if (!limited || list.size() < limit - 1) {
+ list.add(substring(off, next));
+ off = next + 1;
+ } else { // last one
+ //assert (list.size() == limit - 1);
+ list.add(substring(off, count));
+ off = count;
+ break;
+ }
+ }
+ // If no match was found, return this
+ if (off == 0)
+ return new String[] { this };
+
+ // Add remaining segment
+ if (!limited || list.size() < limit)
+ list.add(substring(off, count));
+
+ // Construct result
+ int resultSize = list.size();
+ if (limit == 0)
+ while (resultSize > 0 && list.get(resultSize-1).length() == 0)
+ resultSize--;
+ String[] result = new String[resultSize];
+ return list.subList(0, resultSize).toArray(result);
+ }
return Pattern.compile(regex).split(this, limit);
}
--- a/jdk/test/java/lang/String/Split.java Tue Aug 11 20:06:52 2009 -0600
+++ b/jdk/test/java/lang/String/Split.java Thu Aug 13 10:50:23 2009 -0700
@@ -23,14 +23,18 @@
/**
* @test
+ * @bug 6840246
* @summary test String.split()
*/
+import java.util.Arrays;
+import java.util.Random;
import java.util.regex.*;
public class Split {
public static void main(String[] args) throws Exception {
String source = "0123456789";
+
for (int limit=-2; limit<3; limit++) {
for (int x=0; x<10; x++) {
String[] result = source.split(Integer.toString(x), limit);
@@ -80,5 +84,48 @@
throw new RuntimeException("String.split failure 8");
if (!result[0].equals(source))
throw new RuntimeException("String.split failure 9");
+
+ // check fastpath of String.split()
+ source = "0123456789abcdefgABCDEFG";
+ Random r = new Random();
+
+ for (boolean doEscape: new boolean[] {false, true}) {
+ for (int cp = 0; cp < 0x11000; cp++) {
+ Pattern p = null;
+ String regex = new String(Character.toChars(cp));
+ if (doEscape)
+ regex = "\\" + regex;
+ try {
+ p = Pattern.compile(regex);
+ } catch (PatternSyntaxException pse) {
+ // illegal syntax
+ try {
+ "abc".split(regex);
+ } catch (PatternSyntaxException pse0) {
+ continue;
+ }
+ throw new RuntimeException("String.split failure 11");
+ }
+ int off = r.nextInt(source.length());
+ String[] srcStrs = new String[] {
+ "",
+ source,
+ regex + source,
+ source + regex,
+ source.substring(0, 3)
+ + regex + source.substring(3, 9)
+ + regex + source.substring(9, 15)
+ + regex + source.substring(15),
+ source.substring(0, off) + regex + source.substring(off)
+ };
+ for (String src: srcStrs) {
+ for (int limit=-2; limit<3; limit++) {
+ if (!Arrays.equals(src.split(regex, limit),
+ p.split(src, limit)))
+ throw new RuntimeException("String.split failure 12");
+ }
+ }
+ }
+ }
}
}