# HG changeset patch # User redestad # Date 1559722042 -7200 # Node ID 28c93b5fb056e8322e72d1e14b50aa34814dc03e # Parent cfd1e298ca330a8d63f3dd0f6dde797727f85f75 8225179: (regex) Minor Pattern cleanup Reviewed-by: igerasim diff -r cfd1e298ca33 -r 28c93b5fb056 src/java.base/share/classes/java/util/regex/Pattern.java --- a/src/java.base/share/classes/java/util/regex/Pattern.java Wed Jun 05 09:23:09 2019 +0200 +++ b/src/java.base/share/classes/java/util/regex/Pattern.java Wed Jun 05 10:07:22 2019 +0200 @@ -2100,7 +2100,7 @@ private Node sequence(Node end) { Node head = null; Node tail = null; - Node node = null; + Node node; LOOP: for (;;) { int ch = peek(); @@ -2617,7 +2617,6 @@ CharPredicate prev = null; CharPredicate curr = null; BitClass bits = new BitClass(); - BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch]; boolean isNeg = false; boolean hasBits = false; @@ -2658,9 +2657,9 @@ if (hasBits) { // bits used, union has high precedence if (prev == null) { - prev = curr = bitsP; + prev = curr = bits; } else { - prev = prev.union(bitsP); + prev = prev.union(bits); } hasBits = false; } @@ -2689,9 +2688,9 @@ if (consume) next(); if (prev == null) - prev = bitsP; + prev = bits; else if (hasBits) - prev = prev.union(bitsP); + prev = prev.union(bits); if (isNeg) return prev.negate(); return prev; @@ -2947,8 +2946,8 @@ */ private Node group0() { boolean capturingGroup = false; - Node head = null; - Node tail = null; + Node head; + Node tail; int save = flags0; int saveTCNCount = topClosureNodes.size(); root = null; @@ -2997,7 +2996,7 @@ head = createGroup(true); tail = root; head.next = expr(tail); - tail.next = lookbehindEnd; + tail.next = LookBehindEndNode.INSTANCE; TreeInfo info = new TreeInfo(); head.study(info); if (info.maxValid == false) { @@ -3253,7 +3252,6 @@ * Prev could be a single or a group, so it could be a chain of nodes. */ private Node closure(Node prev) { - Node atom; int ch = peek(); switch (ch) { case '?': @@ -3486,14 +3484,10 @@ * never matches values above Latin-1, and a complemented BitClass always * matches values above Latin-1. */ - static final class BitClass extends BmpCharProperty { + static final class BitClass implements BmpCharPredicate { final boolean[] bits; BitClass() { - this(new boolean[256]); - } - private BitClass(boolean[] bits) { - super( ch -> ch < 256 && bits[ch]); - this.bits = bits; + bits = new boolean[256]; } BitClass add(int c, int flags) { assert c >= 0 && c <= 255; @@ -3509,8 +3503,12 @@ bits[c] = true; return this; } + public boolean is(int ch) { + return ch < 256 && bits[ch]; + } } + /** * Utility method for creating a string slice matcher. */ @@ -3923,7 +3921,7 @@ * boolean property. */ static class CharProperty extends Node { - CharPredicate predicate; + final CharPredicate predicate; CharProperty (CharPredicate predicate) { this.predicate = predicate; @@ -4698,7 +4696,7 @@ * "next". */ static final class BranchConn extends Node { - BranchConn() {}; + BranchConn() {} boolean match(Matcher matcher, int i, CharSequence seq) { return next.match(matcher, i, seq); } @@ -4795,34 +4793,6 @@ matcher.locals[localIndex] = save; return ret; } - boolean matchRef(Matcher matcher, int i, CharSequence seq) { - int save = matcher.locals[localIndex]; - matcher.locals[localIndex] = ~i; // HACK - boolean ret = next.match(matcher, i, seq); - matcher.locals[localIndex] = save; - return ret; - } - } - - /** - * Recursive reference to a group in the regular expression. It calls - * matchRef because if the reference fails to match we would not unset - * the group. - */ - static final class GroupRef extends Node { - GroupHead head; - GroupRef(GroupHead head) { - this.head = head; - } - boolean match(Matcher matcher, int i, CharSequence seq) { - return head.matchRef(matcher, i, seq) - && next.match(matcher, matcher.last, seq); - } - boolean study(TreeInfo info) { - info.maxValid = false; - info.deterministic = false; - return next.study(info); - } } /** @@ -4944,7 +4914,7 @@ } boolean matchInit(Matcher matcher, int i, CharSequence seq) { int save = matcher.locals[countIndex]; - boolean ret = false; + boolean ret; if (posIndex != -1 && matcher.localsPos[posIndex] == null) { matcher.localsPos[posIndex] = new IntHashSet(); } @@ -5168,7 +5138,7 @@ } boolean match(Matcher matcher, int i, CharSequence seq) { int savedTo = matcher.to; - boolean conditionMatched = false; + boolean conditionMatched; // Relax transparent region boundaries for lookahead if (matcher.transparentBounds) @@ -5193,7 +5163,7 @@ } boolean match(Matcher matcher, int i, CharSequence seq) { int savedTo = matcher.to; - boolean conditionMatched = false; + boolean conditionMatched; // Relax transparent region boundaries for lookahead if (matcher.transparentBounds) @@ -5219,11 +5189,15 @@ * For use with lookbehinds; matches the position where the lookbehind * was encountered. */ - static Node lookbehindEnd = new Node() { + static class LookBehindEndNode extends Node { + private LookBehindEndNode() {} // Singleton + + static LookBehindEndNode INSTANCE = new LookBehindEndNode(); + boolean match(Matcher matcher, int i, CharSequence seq) { return i == matcher.lookbehindTo; } - }; + } /** * Zero width positive lookbehind. @@ -5491,7 +5465,7 @@ if (patternLength < 4) { return node; } - int i, j, k; + int i, j; int[] lastOcc = new int[128]; int[] optoSft = new int[patternLength]; // Precalculate part of the bad character shift @@ -5646,7 +5620,7 @@ static interface BmpCharPredicate extends CharPredicate { default CharPredicate and(CharPredicate p) { - if(p instanceof BmpCharPredicate) + if (p instanceof BmpCharPredicate) return (BmpCharPredicate)(ch -> is(ch) && p.is(ch)); return ch -> is(ch) && p.is(ch); } diff -r cfd1e298ca33 -r 28c93b5fb056 test/micro/org/openjdk/bench/java/util/regex/PatternBench.java --- a/test/micro/org/openjdk/bench/java/util/regex/PatternBench.java Wed Jun 05 09:23:09 2019 +0200 +++ b/test/micro/org/openjdk/bench/java/util/regex/PatternBench.java Wed Jun 05 10:07:22 2019 +0200 @@ -45,12 +45,13 @@ public String fileTestString; public String flagsString; - public Pattern graphemePattern; public Pattern jmodPattern; public Pattern jmodCanonicalPattern; - public Pattern pattern; + public String charPatternRegex; + public String[] charPatternStrings; + public Pattern charPattern; @Setup public void setup() { @@ -61,6 +62,10 @@ String jmodRegex = "^.*(?:(?:_the\\.[^/]*)|(?:_[^/]*\\.marker)|(?:[^/]*\\.diz)|(?:[^/]*\\.debuginfo)|(?:[^/]*\\.dSYM/.*)|(?:[^/]*\\.dSYM)|(?:[^/]*\\.pdb)|(?:[^/]*\\.map))$"; jmodCanonicalPattern = Pattern.compile(jmodRegex, Pattern.CANON_EQ); jmodPattern = Pattern.compile(jmodRegex); + + charPatternRegex = "[ a-zA-Z]*foo[ a-zA-Z0-9]*bar[ a-z]*"; + charPatternStrings = new String[] {"avaaafooddddddbariiii", "lorem ipsum dolor foo bar", "fpp brr lorem ipsum dolor foo bar %", "lorem ipsum dolor foo bar lorem ipsum dolor foo bar lorem ipsum dolor foo bar /"}; + charPattern = Pattern.compile(charPatternRegex); } @Benchmark @@ -83,4 +88,30 @@ public boolean normalJmodMatch() { return jmodPattern.matcher(fileTestString).matches(); } + + @Benchmark + @Warmup(iterations = 3) + @Measurement(iterations = 3) + public boolean charPatternMatch() { + return charPattern.matcher(charPatternStrings[0]).matches() + && charPattern.matcher(charPatternStrings[1]).matches() + && charPattern.matcher(charPatternStrings[2]).matches(); + } + + @Benchmark + @Warmup(iterations = 3) + @Measurement(iterations = 3) + public boolean charPatternMatchWithCompile() { + Pattern p = Pattern.compile(charPatternRegex); + return p.matcher(charPatternStrings[0]).matches() + && p.matcher(charPatternStrings[1]).matches() + && p.matcher(charPatternStrings[2]).matches(); + } + + @Benchmark + @Warmup(iterations = 3) + @Measurement(iterations = 3) + public Pattern charPatternCompile() { + return Pattern.compile(charPatternRegex); + } }