8225179: (regex) Minor Pattern cleanup
authorredestad
Wed, 05 Jun 2019 10:07:22 +0200
changeset 55220 28c93b5fb056
parent 55219 cfd1e298ca33
child 55221 27d3b8e5c58b
8225179: (regex) Minor Pattern cleanup Reviewed-by: igerasim
src/java.base/share/classes/java/util/regex/Pattern.java
test/micro/org/openjdk/bench/java/util/regex/PatternBench.java
--- a/src/java.base/share/classes/java/util/regex/Pattern.java	Wed Jun 05 09:23:09 2019 +0200
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java	Wed Jun 05 10:07:22 2019 +0200
@@ -2100,7 +2100,7 @@
     private Node sequence(Node end) {
         Node head = null;
         Node tail = null;
-        Node node = null;
+        Node node;
     LOOP:
         for (;;) {
             int ch = peek();
@@ -2617,7 +2617,6 @@
         CharPredicate prev = null;
         CharPredicate curr = null;
         BitClass bits = new BitClass();
-        BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch];
 
         boolean isNeg = false;
         boolean hasBits = false;
@@ -2658,9 +2657,9 @@
                         if (hasBits) {
                             // bits used, union has high precedence
                             if (prev == null) {
-                                prev = curr = bitsP;
+                                prev = curr = bits;
                             } else {
-                                prev = prev.union(bitsP);
+                                prev = prev.union(bits);
                             }
                             hasBits = false;
                         }
@@ -2689,9 +2688,9 @@
                         if (consume)
                             next();
                         if (prev == null)
-                            prev = bitsP;
+                            prev = bits;
                         else if (hasBits)
-                            prev = prev.union(bitsP);
+                            prev = prev.union(bits);
                         if (isNeg)
                             return prev.negate();
                         return prev;
@@ -2947,8 +2946,8 @@
      */
     private Node group0() {
         boolean capturingGroup = false;
-        Node head = null;
-        Node tail = null;
+        Node head;
+        Node tail;
         int save = flags0;
         int saveTCNCount = topClosureNodes.size();
         root = null;
@@ -2997,7 +2996,7 @@
                 head = createGroup(true);
                 tail = root;
                 head.next = expr(tail);
-                tail.next = lookbehindEnd;
+                tail.next = LookBehindEndNode.INSTANCE;
                 TreeInfo info = new TreeInfo();
                 head.study(info);
                 if (info.maxValid == false) {
@@ -3253,7 +3252,6 @@
      * Prev could be a single or a group, so it could be a chain of nodes.
      */
     private Node closure(Node prev) {
-        Node atom;
         int ch = peek();
         switch (ch) {
         case '?':
@@ -3486,14 +3484,10 @@
      *  never matches values above Latin-1, and a complemented BitClass always
      *  matches values above Latin-1.
      */
-    static final class BitClass extends BmpCharProperty {
+    static final class BitClass implements BmpCharPredicate {
         final boolean[] bits;
         BitClass() {
-            this(new boolean[256]);
-        }
-        private BitClass(boolean[] bits) {
-            super( ch -> ch < 256 && bits[ch]);
-            this.bits = bits;
+            bits = new boolean[256];
         }
         BitClass add(int c, int flags) {
             assert c >= 0 && c <= 255;
@@ -3509,8 +3503,12 @@
             bits[c] = true;
             return this;
         }
+        public boolean is(int ch) {
+            return ch < 256 && bits[ch];
+        }
     }
 
+
     /**
      *  Utility method for creating a string slice matcher.
      */
@@ -3923,7 +3921,7 @@
      * boolean property.
      */
     static class CharProperty extends Node {
-        CharPredicate predicate;
+        final CharPredicate predicate;
 
         CharProperty (CharPredicate predicate) {
             this.predicate = predicate;
@@ -4698,7 +4696,7 @@
      * "next".
      */
     static final class BranchConn extends Node {
-        BranchConn() {};
+        BranchConn() {}
         boolean match(Matcher matcher, int i, CharSequence seq) {
             return next.match(matcher, i, seq);
         }
@@ -4795,34 +4793,6 @@
             matcher.locals[localIndex] = save;
             return ret;
         }
-        boolean matchRef(Matcher matcher, int i, CharSequence seq) {
-            int save = matcher.locals[localIndex];
-            matcher.locals[localIndex] = ~i; // HACK
-            boolean ret = next.match(matcher, i, seq);
-            matcher.locals[localIndex] = save;
-            return ret;
-        }
-    }
-
-    /**
-     * Recursive reference to a group in the regular expression. It calls
-     * matchRef because if the reference fails to match we would not unset
-     * the group.
-     */
-    static final class GroupRef extends Node {
-        GroupHead head;
-        GroupRef(GroupHead head) {
-            this.head = head;
-        }
-        boolean match(Matcher matcher, int i, CharSequence seq) {
-            return head.matchRef(matcher, i, seq)
-                && next.match(matcher, matcher.last, seq);
-        }
-        boolean study(TreeInfo info) {
-            info.maxValid = false;
-            info.deterministic = false;
-            return next.study(info);
-        }
     }
 
     /**
@@ -4944,7 +4914,7 @@
         }
         boolean matchInit(Matcher matcher, int i, CharSequence seq) {
             int save = matcher.locals[countIndex];
-            boolean ret = false;
+            boolean ret;
             if (posIndex != -1 && matcher.localsPos[posIndex] == null) {
                 matcher.localsPos[posIndex] = new IntHashSet();
             }
@@ -5168,7 +5138,7 @@
         }
         boolean match(Matcher matcher, int i, CharSequence seq) {
             int savedTo = matcher.to;
-            boolean conditionMatched = false;
+            boolean conditionMatched;
 
             // Relax transparent region boundaries for lookahead
             if (matcher.transparentBounds)
@@ -5193,7 +5163,7 @@
         }
         boolean match(Matcher matcher, int i, CharSequence seq) {
             int savedTo = matcher.to;
-            boolean conditionMatched = false;
+            boolean conditionMatched;
 
             // Relax transparent region boundaries for lookahead
             if (matcher.transparentBounds)
@@ -5219,11 +5189,15 @@
      * For use with lookbehinds; matches the position where the lookbehind
      * was encountered.
      */
-    static Node lookbehindEnd = new Node() {
+    static class LookBehindEndNode extends Node {
+        private LookBehindEndNode() {} // Singleton
+
+        static LookBehindEndNode INSTANCE = new LookBehindEndNode();
+
         boolean match(Matcher matcher, int i, CharSequence seq) {
             return i == matcher.lookbehindTo;
         }
-    };
+    }
 
     /**
      * Zero width positive lookbehind.
@@ -5491,7 +5465,7 @@
             if (patternLength < 4) {
                 return node;
             }
-            int i, j, k;
+            int i, j;
             int[] lastOcc = new int[128];
             int[] optoSft = new int[patternLength];
             // Precalculate part of the bad character shift
@@ -5646,7 +5620,7 @@
     static interface BmpCharPredicate extends CharPredicate {
 
         default CharPredicate and(CharPredicate p) {
-            if(p instanceof BmpCharPredicate)
+            if (p instanceof BmpCharPredicate)
                 return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
             return ch -> is(ch) && p.is(ch);
         }
--- a/test/micro/org/openjdk/bench/java/util/regex/PatternBench.java	Wed Jun 05 09:23:09 2019 +0200
+++ b/test/micro/org/openjdk/bench/java/util/regex/PatternBench.java	Wed Jun 05 10:07:22 2019 +0200
@@ -45,12 +45,13 @@
     public String fileTestString;
     public String flagsString;
 
-
     public Pattern graphemePattern;
     public Pattern jmodPattern;
     public Pattern jmodCanonicalPattern;
 
-    public Pattern pattern;
+    public String charPatternRegex;
+    public String[] charPatternStrings;
+    public Pattern charPattern;
 
     @Setup
     public void setup() {
@@ -61,6 +62,10 @@
         String jmodRegex = "^.*(?:(?:_the\\.[^/]*)|(?:_[^/]*\\.marker)|(?:[^/]*\\.diz)|(?:[^/]*\\.debuginfo)|(?:[^/]*\\.dSYM/.*)|(?:[^/]*\\.dSYM)|(?:[^/]*\\.pdb)|(?:[^/]*\\.map))$";
         jmodCanonicalPattern = Pattern.compile(jmodRegex, Pattern.CANON_EQ);
         jmodPattern = Pattern.compile(jmodRegex);
+
+        charPatternRegex = "[ a-zA-Z]*foo[ a-zA-Z0-9]*bar[ a-z]*";
+        charPatternStrings = new String[] {"avaaafooddddddbariiii", "lorem ipsum dolor foo bar", "fpp brr lorem ipsum dolor foo bar %", "lorem ipsum dolor foo bar lorem ipsum dolor foo bar lorem ipsum dolor foo bar /"};
+        charPattern = Pattern.compile(charPatternRegex);
     }
 
     @Benchmark
@@ -83,4 +88,30 @@
     public boolean normalJmodMatch() {
         return jmodPattern.matcher(fileTestString).matches();
     }
+
+    @Benchmark
+    @Warmup(iterations = 3)
+    @Measurement(iterations = 3)
+    public boolean charPatternMatch() {
+        return charPattern.matcher(charPatternStrings[0]).matches()
+                && charPattern.matcher(charPatternStrings[1]).matches()
+                && charPattern.matcher(charPatternStrings[2]).matches();
+    }
+
+    @Benchmark
+    @Warmup(iterations = 3)
+    @Measurement(iterations = 3)
+    public boolean charPatternMatchWithCompile() {
+        Pattern p = Pattern.compile(charPatternRegex);
+        return p.matcher(charPatternStrings[0]).matches()
+                && p.matcher(charPatternStrings[1]).matches()
+                && p.matcher(charPatternStrings[2]).matches();
+    }
+
+    @Benchmark
+    @Warmup(iterations = 3)
+    @Measurement(iterations = 3)
+    public Pattern charPatternCompile() {
+        return Pattern.compile(charPatternRegex);
+    }
 }