--- a/src/java.base/share/classes/java/util/regex/Pattern.java Wed Jun 05 09:23:09 2019 +0200
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java Wed Jun 05 10:07:22 2019 +0200
@@ -2100,7 +2100,7 @@
private Node sequence(Node end) {
Node head = null;
Node tail = null;
- Node node = null;
+ Node node;
LOOP:
for (;;) {
int ch = peek();
@@ -2617,7 +2617,6 @@
CharPredicate prev = null;
CharPredicate curr = null;
BitClass bits = new BitClass();
- BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch];
boolean isNeg = false;
boolean hasBits = false;
@@ -2658,9 +2657,9 @@
if (hasBits) {
// bits used, union has high precedence
if (prev == null) {
- prev = curr = bitsP;
+ prev = curr = bits;
} else {
- prev = prev.union(bitsP);
+ prev = prev.union(bits);
}
hasBits = false;
}
@@ -2689,9 +2688,9 @@
if (consume)
next();
if (prev == null)
- prev = bitsP;
+ prev = bits;
else if (hasBits)
- prev = prev.union(bitsP);
+ prev = prev.union(bits);
if (isNeg)
return prev.negate();
return prev;
@@ -2947,8 +2946,8 @@
*/
private Node group0() {
boolean capturingGroup = false;
- Node head = null;
- Node tail = null;
+ Node head;
+ Node tail;
int save = flags0;
int saveTCNCount = topClosureNodes.size();
root = null;
@@ -2997,7 +2996,7 @@
head = createGroup(true);
tail = root;
head.next = expr(tail);
- tail.next = lookbehindEnd;
+ tail.next = LookBehindEndNode.INSTANCE;
TreeInfo info = new TreeInfo();
head.study(info);
if (info.maxValid == false) {
@@ -3253,7 +3252,6 @@
* Prev could be a single or a group, so it could be a chain of nodes.
*/
private Node closure(Node prev) {
- Node atom;
int ch = peek();
switch (ch) {
case '?':
@@ -3486,14 +3484,10 @@
* never matches values above Latin-1, and a complemented BitClass always
* matches values above Latin-1.
*/
- static final class BitClass extends BmpCharProperty {
+ static final class BitClass implements BmpCharPredicate {
final boolean[] bits;
BitClass() {
- this(new boolean[256]);
- }
- private BitClass(boolean[] bits) {
- super( ch -> ch < 256 && bits[ch]);
- this.bits = bits;
+ bits = new boolean[256];
}
BitClass add(int c, int flags) {
assert c >= 0 && c <= 255;
@@ -3509,8 +3503,12 @@
bits[c] = true;
return this;
}
+ public boolean is(int ch) {
+ return ch < 256 && bits[ch];
+ }
}
+
/**
* Utility method for creating a string slice matcher.
*/
@@ -3923,7 +3921,7 @@
* boolean property.
*/
static class CharProperty extends Node {
- CharPredicate predicate;
+ final CharPredicate predicate;
CharProperty (CharPredicate predicate) {
this.predicate = predicate;
@@ -4698,7 +4696,7 @@
* "next".
*/
static final class BranchConn extends Node {
- BranchConn() {};
+ BranchConn() {}
boolean match(Matcher matcher, int i, CharSequence seq) {
return next.match(matcher, i, seq);
}
@@ -4795,34 +4793,6 @@
matcher.locals[localIndex] = save;
return ret;
}
- boolean matchRef(Matcher matcher, int i, CharSequence seq) {
- int save = matcher.locals[localIndex];
- matcher.locals[localIndex] = ~i; // HACK
- boolean ret = next.match(matcher, i, seq);
- matcher.locals[localIndex] = save;
- return ret;
- }
- }
-
- /**
- * Recursive reference to a group in the regular expression. It calls
- * matchRef because if the reference fails to match we would not unset
- * the group.
- */
- static final class GroupRef extends Node {
- GroupHead head;
- GroupRef(GroupHead head) {
- this.head = head;
- }
- boolean match(Matcher matcher, int i, CharSequence seq) {
- return head.matchRef(matcher, i, seq)
- && next.match(matcher, matcher.last, seq);
- }
- boolean study(TreeInfo info) {
- info.maxValid = false;
- info.deterministic = false;
- return next.study(info);
- }
}
/**
@@ -4944,7 +4914,7 @@
}
boolean matchInit(Matcher matcher, int i, CharSequence seq) {
int save = matcher.locals[countIndex];
- boolean ret = false;
+ boolean ret;
if (posIndex != -1 && matcher.localsPos[posIndex] == null) {
matcher.localsPos[posIndex] = new IntHashSet();
}
@@ -5168,7 +5138,7 @@
}
boolean match(Matcher matcher, int i, CharSequence seq) {
int savedTo = matcher.to;
- boolean conditionMatched = false;
+ boolean conditionMatched;
// Relax transparent region boundaries for lookahead
if (matcher.transparentBounds)
@@ -5193,7 +5163,7 @@
}
boolean match(Matcher matcher, int i, CharSequence seq) {
int savedTo = matcher.to;
- boolean conditionMatched = false;
+ boolean conditionMatched;
// Relax transparent region boundaries for lookahead
if (matcher.transparentBounds)
@@ -5219,11 +5189,15 @@
* For use with lookbehinds; matches the position where the lookbehind
* was encountered.
*/
- static Node lookbehindEnd = new Node() {
+ static class LookBehindEndNode extends Node {
+ private LookBehindEndNode() {} // Singleton
+
+ static LookBehindEndNode INSTANCE = new LookBehindEndNode();
+
boolean match(Matcher matcher, int i, CharSequence seq) {
return i == matcher.lookbehindTo;
}
- };
+ }
/**
* Zero width positive lookbehind.
@@ -5491,7 +5465,7 @@
if (patternLength < 4) {
return node;
}
- int i, j, k;
+ int i, j;
int[] lastOcc = new int[128];
int[] optoSft = new int[patternLength];
// Precalculate part of the bad character shift
@@ -5646,7 +5620,7 @@
static interface BmpCharPredicate extends CharPredicate {
default CharPredicate and(CharPredicate p) {
- if(p instanceof BmpCharPredicate)
+ if (p instanceof BmpCharPredicate)
return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
return ch -> is(ch) && p.is(ch);
}
--- a/test/micro/org/openjdk/bench/java/util/regex/PatternBench.java Wed Jun 05 09:23:09 2019 +0200
+++ b/test/micro/org/openjdk/bench/java/util/regex/PatternBench.java Wed Jun 05 10:07:22 2019 +0200
@@ -45,12 +45,13 @@
public String fileTestString;
public String flagsString;
-
public Pattern graphemePattern;
public Pattern jmodPattern;
public Pattern jmodCanonicalPattern;
- public Pattern pattern;
+ public String charPatternRegex;
+ public String[] charPatternStrings;
+ public Pattern charPattern;
@Setup
public void setup() {
@@ -61,6 +62,10 @@
String jmodRegex = "^.*(?:(?:_the\\.[^/]*)|(?:_[^/]*\\.marker)|(?:[^/]*\\.diz)|(?:[^/]*\\.debuginfo)|(?:[^/]*\\.dSYM/.*)|(?:[^/]*\\.dSYM)|(?:[^/]*\\.pdb)|(?:[^/]*\\.map))$";
jmodCanonicalPattern = Pattern.compile(jmodRegex, Pattern.CANON_EQ);
jmodPattern = Pattern.compile(jmodRegex);
+
+ charPatternRegex = "[ a-zA-Z]*foo[ a-zA-Z0-9]*bar[ a-z]*";
+ charPatternStrings = new String[] {"avaaafooddddddbariiii", "lorem ipsum dolor foo bar", "fpp brr lorem ipsum dolor foo bar %", "lorem ipsum dolor foo bar lorem ipsum dolor foo bar lorem ipsum dolor foo bar /"};
+ charPattern = Pattern.compile(charPatternRegex);
}
@Benchmark
@@ -83,4 +88,30 @@
public boolean normalJmodMatch() {
return jmodPattern.matcher(fileTestString).matches();
}
+
+ @Benchmark
+ @Warmup(iterations = 3)
+ @Measurement(iterations = 3)
+ public boolean charPatternMatch() {
+ return charPattern.matcher(charPatternStrings[0]).matches()
+ && charPattern.matcher(charPatternStrings[1]).matches()
+ && charPattern.matcher(charPatternStrings[2]).matches();
+ }
+
+ @Benchmark
+ @Warmup(iterations = 3)
+ @Measurement(iterations = 3)
+ public boolean charPatternMatchWithCompile() {
+ Pattern p = Pattern.compile(charPatternRegex);
+ return p.matcher(charPatternStrings[0]).matches()
+ && p.matcher(charPatternStrings[1]).matches()
+ && p.matcher(charPatternStrings[2]).matches();
+ }
+
+ @Benchmark
+ @Warmup(iterations = 3)
+ @Measurement(iterations = 3)
+ public Pattern charPatternCompile() {
+ return Pattern.compile(charPatternRegex);
+ }
}