8008093: Make RegExp engine pluggable
authorhannesw
Fri, 22 Feb 2013 16:31:10 +0100
changeset 16258 0e25f785df4d
parent 16257 781330354161
child 16259 bb504280c322
8008093: Make RegExp engine pluggable Reviewed-by: lagergren, attila
nashorn/src/jdk/nashorn/internal/objects/Global.java
nashorn/src/jdk/nashorn/internal/objects/NativeRegExp.java
nashorn/src/jdk/nashorn/internal/objects/NativeRegExpExecResult.java
nashorn/src/jdk/nashorn/internal/objects/NativeString.java
nashorn/src/jdk/nashorn/internal/parser/AbstractParser.java
nashorn/src/jdk/nashorn/internal/runtime/RegExp.java
nashorn/src/jdk/nashorn/internal/runtime/RegExpMatch.java
nashorn/src/jdk/nashorn/internal/runtime/RegExpScanner.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/DefaultRegExp.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExp.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpMatcher.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpResult.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java
--- a/nashorn/src/jdk/nashorn/internal/objects/Global.java	Fri Feb 22 10:39:00 2013 -0400
+++ b/nashorn/src/jdk/nashorn/internal/objects/Global.java	Fri Feb 22 16:31:10 2013 +0100
@@ -50,7 +50,7 @@
 import jdk.nashorn.internal.runtime.NativeJavaPackage;
 import jdk.nashorn.internal.runtime.OptionsObject;
 import jdk.nashorn.internal.runtime.PropertyDescriptor;
-import jdk.nashorn.internal.runtime.RegExpMatch;
+import jdk.nashorn.internal.runtime.regexp.RegExpResult;
 import jdk.nashorn.internal.runtime.Scope;
 import jdk.nashorn.internal.runtime.ScriptFunction;
 import jdk.nashorn.internal.runtime.ScriptObject;
@@ -339,7 +339,7 @@
     private ClassCache classCache;
 
     // Used to store the last RegExp result to support deprecated RegExp constructor properties
-    private RegExpMatch lastRegExpMatch;
+    private RegExpResult lastRegExpResult;
 
     private static final MethodHandle EVAL    = findOwnMH("eval",    Object.class, Object.class, Object.class);
     private static final MethodHandle PRINT   = findOwnMH("print",   Object.class, Object.class, Object[].class);
@@ -1709,12 +1709,12 @@
         return MH.findStatic(MethodHandles.publicLookup(), Global.class, name, MH.type(rtype, types));
     }
 
-    RegExpMatch getLastRegExpMatch() {
-        return lastRegExpMatch;
+    RegExpResult getLastRegExpResult() {
+        return lastRegExpResult;
     }
 
-    void setLastRegExpMatch(RegExpMatch regExpMatch) {
-        this.lastRegExpMatch = regExpMatch;
+    void setLastRegExpResult(final RegExpResult regExpResult) {
+        this.lastRegExpResult = regExpResult;
     }
 
 }
--- a/nashorn/src/jdk/nashorn/internal/objects/NativeRegExp.java	Fri Feb 22 10:39:00 2013 -0400
+++ b/nashorn/src/jdk/nashorn/internal/objects/NativeRegExp.java	Fri Feb 22 16:31:10 2013 +0100
@@ -31,8 +31,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+
 import jdk.nashorn.internal.objects.annotations.Attribute;
 import jdk.nashorn.internal.objects.annotations.Constructor;
 import jdk.nashorn.internal.objects.annotations.Function;
@@ -44,8 +43,10 @@
 import jdk.nashorn.internal.runtime.BitVector;
 import jdk.nashorn.internal.runtime.JSType;
 import jdk.nashorn.internal.runtime.ParserException;
-import jdk.nashorn.internal.runtime.RegExp;
-import jdk.nashorn.internal.runtime.RegExpMatch;
+import jdk.nashorn.internal.runtime.regexp.RegExp;
+import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
+import jdk.nashorn.internal.runtime.regexp.RegExpResult;
+import jdk.nashorn.internal.runtime.regexp.RegExpMatcher;
 import jdk.nashorn.internal.runtime.ScriptFunction;
 import jdk.nashorn.internal.runtime.ScriptObject;
 import jdk.nashorn.internal.runtime.ScriptRuntime;
@@ -59,35 +60,15 @@
     @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
     public Object lastIndex;
 
-    /** Pattern string. */
-    private String input;
-
-    /** Global search flag for this regexp. */
-    private boolean global;
-
-    /** Case insensitive flag for this regexp */
-    private boolean ignoreCase;
-
-    /** Multi-line flag for this regexp */
-    private boolean multiline;
-
-    /** Java regex pattern to use for match. We compile to one of these */
-    private Pattern pattern;
-
-    private BitVector groupsInNegativeLookahead;
+    /** Compiled regexp */
+    private RegExp regexp;
 
     // Reference to global object needed to support static RegExp properties
     private Global globalObject;
 
-    /*
-    public NativeRegExp() {
-        init();
-    }*/
-
     NativeRegExp(final String input, final String flagString) {
-        RegExp regExp = null;
         try {
-            regExp = new RegExp(input, flagString);
+            this.regexp = RegExpFactory.create(input, flagString);
         } catch (final ParserException e) {
             // translate it as SyntaxError object and throw it
             e.throwAsEcmaException();
@@ -95,13 +76,6 @@
         }
 
         this.setLastIndex(0);
-        this.input = regExp.getInput();
-        this.global = regExp.isGlobal();
-        this.ignoreCase = regExp.isIgnoreCase();
-        this.multiline = regExp.isMultiline();
-        this.pattern = regExp.getPattern();
-        this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
-
         init();
     }
 
@@ -110,24 +84,8 @@
     }
 
     NativeRegExp(final NativeRegExp regExp) {
-        this.input      = regExp.getInput();
-        this.global     = regExp.getGlobal();
-        this.multiline  = regExp.getMultiline();
-        this.ignoreCase = regExp.getIgnoreCase();
         this.lastIndex  = regExp.getLastIndexObject();
-        this.pattern    = regExp.getPattern();
-        this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
-
-        init();
-    }
-
-    NativeRegExp(final Pattern pattern) {
-        this.input      = pattern.pattern();
-        this.multiline  = (pattern.flags() & Pattern.MULTILINE) != 0;
-        this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
-        this.lastIndex  = 0;
-        this.pattern    = pattern;
-
+        this.regexp      = regExp.getRegExp();
         init();
     }
 
@@ -232,16 +190,59 @@
         return new NativeRegExp(patternString, flagString);
     }
 
+    /**
+     * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped.
+     *
+     * @param string pattern string
+     * @return flat regexp
+     */
+    static NativeRegExp flatRegExp(String string) {
+        // escape special characters
+        StringBuilder sb = null;
+        final int length = string.length();
+
+        for (int i = 0; i < length; i++) {
+            final char c = string.charAt(i);
+            switch (c) {
+                case '^':
+                case '$':
+                case '\\':
+                case '.':
+                case '*':
+                case '+':
+                case '?':
+                case '(':
+                case ')':
+                case '[':
+                case '{':
+                case '|':
+                    if (sb == null) {
+                        sb = new StringBuilder(length * 2);
+                        sb.append(string, 0, i);
+                    }
+                    sb.append('\\');
+                    sb.append(c);
+                    break;
+                default:
+                    if (sb != null) {
+                        sb.append(c);
+                    }
+                    break;
+            }
+        }
+        return new NativeRegExp(sb == null ? string : sb.toString(), "");
+    }
+
     private String getFlagString() {
-        final StringBuilder sb = new StringBuilder();
+        final StringBuilder sb = new StringBuilder(3);
 
-        if (global) {
+        if (regexp.isGlobal()) {
             sb.append('g');
         }
-        if (ignoreCase) {
+        if (regexp.isIgnoreCase()) {
             sb.append('i');
         }
-        if (multiline) {
+        if (regexp.isMultiline()) {
             sb.append('m');
         }
 
@@ -255,7 +256,7 @@
 
     @Override
     public String toString() {
-        return "/" + input + "/" + getFlagString();
+        return "/" + regexp.getSource() + "/" + getFlagString();
     }
 
     /**
@@ -270,13 +271,8 @@
     public static Object compile(final Object self, final Object pattern, final Object flags) {
         final NativeRegExp regExp   = checkRegExp(self);
         final NativeRegExp compiled = newRegExp(pattern, flags);
-        // copy over fields to 'self'
-        regExp.setInput(compiled.getInput());
-        regExp.setGlobal(compiled.getGlobal());
-        regExp.setIgnoreCase(compiled.getIgnoreCase());
-        regExp.setMultiline(compiled.getMultiline());
-        regExp.setPattern(compiled.getPattern());
-        regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
+        // copy over regexp to 'self'
+        regExp.setRegExp(compiled.getRegExp());
 
         // Some implementations return undefined. Some return 'self'. Since return
         // value is most likely be ignored, we can play safe and return 'self'.
@@ -326,7 +322,7 @@
      */
     @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
     public static Object source(final Object self) {
-        return checkRegExp(self).input;
+        return checkRegExp(self).getRegExp().getSource();
     }
 
     /**
@@ -337,7 +333,7 @@
      */
     @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
     public static Object global(final Object self) {
-        return checkRegExp(self).global;
+        return checkRegExp(self).getRegExp().isGlobal();
     }
 
     /**
@@ -348,7 +344,7 @@
      */
     @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
     public static Object ignoreCase(final Object self) {
-        return checkRegExp(self).ignoreCase;
+        return checkRegExp(self).getRegExp().isIgnoreCase();
     }
 
     /**
@@ -359,7 +355,7 @@
      */
     @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
     public static Object multiline(final Object self) {
-        return checkRegExp(self).multiline;
+        return checkRegExp(self).getRegExp().isMultiline();
     }
 
     /**
@@ -369,7 +365,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input")
     public static Object getLastInput(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getInput();
     }
 
@@ -390,7 +386,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch")
     public static Object getLastMatch(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(0);
     }
 
@@ -401,7 +397,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen")
     public static Object getLastParen(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getLastParen();
     }
 
@@ -412,7 +408,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext")
     public static Object getLeftContext(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getInput().substring(0, match.getIndex());
     }
 
@@ -423,7 +419,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext")
     public static Object getRightContext(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getInput().substring(match.getIndex() + match.length());
     }
 
@@ -434,7 +430,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1")
     public static Object getGroup1(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(1);
     }
 
@@ -445,7 +441,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2")
     public static Object getGroup2(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(2);
     }
 
@@ -456,7 +452,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3")
     public static Object getGroup3(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(3);
     }
 
@@ -467,7 +463,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4")
     public static Object getGroup4(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(4);
     }
 
@@ -478,7 +474,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5")
     public static Object getGroup5(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(5);
     }
 
@@ -489,7 +485,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6")
     public static Object getGroup6(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(6);
     }
 
@@ -500,7 +496,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7")
     public static Object getGroup7(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(7);
     }
 
@@ -511,7 +507,7 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8")
     public static Object getGroup8(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(8);
     }
 
@@ -522,34 +518,30 @@
      */
     @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9")
     public static Object getGroup9(Object self) {
-        final RegExpMatch match = Global.instance().getLastRegExpMatch();
+        final RegExpResult match = Global.instance().getLastRegExpResult();
         return match == null ? "" : match.getGroup(9);
     }
 
-    private RegExpMatch execInner(final String string) {
-        if (this.pattern == null) {
-            return null; // never matches or similar, e.g. a[]
-        }
+    private RegExpResult execInner(final String string) {
 
-        final Matcher matcher = pattern.matcher(string);
-        final int start = this.global ? getLastIndex() : 0;
-
+        final int start = regexp.isGlobal() ? getLastIndex() : 0;
         if (start < 0 || start > string.length()) {
             setLastIndex(0);
             return null;
         }
 
-        if (!matcher.find(start)) {
+        final RegExpMatcher matcher = regexp.match(string);
+        if (matcher == null || !matcher.search(start)) {
             setLastIndex(0);
             return null;
         }
 
-        if (global) {
+        if (regexp.isGlobal()) {
             setLastIndex(matcher.end());
         }
 
-        final RegExpMatch match = new RegExpMatch(string, matcher.start(), groups(matcher));
-        globalObject.setLastRegExpMatch(match);
+        final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher));
+        globalObject.setLastRegExpResult(match);
         return match;
     }
 
@@ -557,9 +549,11 @@
      * Convert java.util.regex.Matcher groups to JavaScript groups.
      * That is, replace null and groups that didn't match with undefined.
      */
-    private Object[] groups(final Matcher matcher) {
+    private Object[] groups(final RegExpMatcher matcher) {
         final int groupCount = matcher.groupCount();
         final Object[] groups = new Object[groupCount + 1];
+        final BitVector groupsInNegativeLookahead  = regexp.getGroupsInNegativeLookahead();
+
         for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
             final int groupStart = matcher.start(i);
             if (lastGroupStart > groupStart
@@ -586,7 +580,7 @@
      * @return NativeArray of matches, string or null.
      */
     public Object exec(final String string) {
-        final RegExpMatch match = execInner(string);
+        final RegExpResult match = execInner(string);
 
         if (match == null) {
             return null;
@@ -617,7 +611,12 @@
      * @return String with substitutions.
      */
     Object replace(final String string, final String replacement, final ScriptFunction function) {
-        final Matcher matcher = pattern.matcher(string);
+        final RegExpMatcher matcher = regexp.match(string);
+
+        if (matcher == null) {
+            return string;
+        }
+
         /*
          * $$ -> $
          * $& -> the matched substring
@@ -628,8 +627,8 @@
          */
         String replace = replacement;
 
-        if (!global) {
-            if (!matcher.find()) {
+        if (!regexp.isGlobal()) {
+            if (!matcher.search(0)) {
                 return string;
             }
 
@@ -642,45 +641,39 @@
             return sb.toString();
         }
 
-        int end = 0; // a.k.a. lastAppendPosition
         setLastIndex(0);
 
-        boolean found;
-        try {
-            found = matcher.find(end);
-        } catch (final IndexOutOfBoundsException e) {
-            found = false;
-        }
-
-        if (!found) {
+        if (!matcher.search(0)) {
             return string;
         }
 
+        int thisIndex = 0;
         int previousLastIndex = 0;
         final StringBuilder sb = new StringBuilder();
+
         do {
             if (function != null) {
                 replace = callReplaceValue(function, matcher, string);
             }
-            appendReplacement(matcher, string, replace, sb, end);
-            end = matcher.end();
+
+            appendReplacement(matcher, string, replace, sb, thisIndex);
 
             // ECMA 15.5.4.10 String.prototype.match(regexp)
-            final int thisIndex = end;
+            thisIndex = matcher.end();
             if (thisIndex == previousLastIndex) {
                 setLastIndex(thisIndex + 1);
                 previousLastIndex = thisIndex + 1;
             } else {
                 previousLastIndex = thisIndex;
             }
-        } while (matcher.find());
+        } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex));
 
-        sb.append(string, end, string.length());
+        sb.append(string, thisIndex, string.length());
 
         return sb.toString();
     }
 
-    private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
+    private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
         // Process substitution string to replace group references with groups
         int cursor = 0;
         final StringBuilder result = new StringBuilder();
@@ -748,7 +741,7 @@
         sb.append(result);
     }
 
-    private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
+    private String callReplaceValue(final ScriptFunction function, final RegExpMatcher matcher, final String string) {
         final Object[] groups = groups(matcher);
         final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
 
@@ -782,7 +775,7 @@
             return new NativeArray();
         }
 
-        RegExpMatch match;
+        RegExpResult match;
         final int inputLength = input.length();
         int lastLength = -1;
         int lastLastIndex = 0;
@@ -834,7 +827,7 @@
      * @return Index of match.
      */
     Object search(final String string) {
-        final RegExpMatch match = execInner(string);
+        final RegExpResult match = execInner(string);
 
         if (match == null) {
             return -1;
@@ -884,52 +877,20 @@
         }
     }
 
-    private String getInput() {
-        return input;
-    }
-
-    private void setInput(final String input) {
-        this.input = input;
+    private void setGlobal(final boolean global) {
+        regexp.setGlobal(global);
     }
 
     boolean getGlobal() {
-        return global;
-    }
-
-    private void setGlobal(final boolean global) {
-        this.global = global;
-    }
-
-    private boolean getIgnoreCase() {
-        return ignoreCase;
-    }
-
-    private void setIgnoreCase(final boolean ignoreCase) {
-        this.ignoreCase = ignoreCase;
-    }
-
-    private boolean getMultiline() {
-        return multiline;
+        return regexp.isGlobal();
     }
 
-    private void setMultiline(final boolean multiline) {
-        this.multiline = multiline;
-    }
-
-    private Pattern getPattern() {
-        return pattern;
+    private RegExp getRegExp() {
+        return regexp;
     }
 
-    private void setPattern(final Pattern pattern) {
-        this.pattern = pattern;
-    }
-
-    private BitVector getGroupsInNegativeLookahead() {
-        return groupsInNegativeLookahead;
-    }
-
-    private void setGroupsInNegativeLookahead(final BitVector groupsInNegativeLookahead) {
-        this.groupsInNegativeLookahead = groupsInNegativeLookahead;
+    private void setRegExp(final RegExp regexp) {
+        this.regexp = regexp;
     }
 
 }
--- a/nashorn/src/jdk/nashorn/internal/objects/NativeRegExpExecResult.java	Fri Feb 22 10:39:00 2013 -0400
+++ b/nashorn/src/jdk/nashorn/internal/objects/NativeRegExpExecResult.java	Fri Feb 22 16:31:10 2013 +0100
@@ -31,7 +31,7 @@
 import jdk.nashorn.internal.objects.annotations.ScriptClass;
 import jdk.nashorn.internal.objects.annotations.Setter;
 import jdk.nashorn.internal.runtime.JSType;
-import jdk.nashorn.internal.runtime.RegExpMatch;
+import jdk.nashorn.internal.runtime.regexp.RegExpResult;
 import jdk.nashorn.internal.runtime.ScriptObject;
 import jdk.nashorn.internal.runtime.arrays.ArrayData;
 
@@ -49,11 +49,11 @@
     @Property
     public Object input;
 
-    NativeRegExpExecResult(final RegExpMatch match) {
+    NativeRegExpExecResult(final RegExpResult result) {
         setProto(Global.instance().getArrayPrototype());
-        this.setArray(ArrayData.allocate(match.getGroups().clone()));
-        this.index = match.getIndex();
-        this.input = match.getInput();
+        this.setArray(ArrayData.allocate(result.getGroups().clone()));
+        this.index = result.getIndex();
+        this.input = result.getInput();
     }
 
     /**
--- a/nashorn/src/jdk/nashorn/internal/objects/NativeString.java	Fri Feb 22 10:39:00 2013 -0400
+++ b/nashorn/src/jdk/nashorn/internal/objects/NativeString.java	Fri Feb 22 16:31:10 2013 +0100
@@ -38,7 +38,6 @@
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.regex.Pattern;
 import jdk.internal.dynalink.CallSiteDescriptor;
 import jdk.internal.dynalink.linker.GuardedInvocation;
 import jdk.internal.dynalink.linker.LinkRequest;
@@ -712,7 +711,7 @@
         if (string instanceof NativeRegExp) {
             nativeRegExp = (NativeRegExp) string;
         } else {
-            nativeRegExp = new NativeRegExp(Pattern.compile(JSType.toString(string), Pattern.LITERAL));
+            nativeRegExp = NativeRegExp.flatRegExp(JSType.toString(string));
         }
 
         if (replacement instanceof ScriptFunction) {
--- a/nashorn/src/jdk/nashorn/internal/parser/AbstractParser.java	Fri Feb 22 10:39:00 2013 -0400
+++ b/nashorn/src/jdk/nashorn/internal/parser/AbstractParser.java	Fri Feb 22 16:31:10 2013 +0100
@@ -37,7 +37,7 @@
 import jdk.nashorn.internal.runtime.ErrorManager;
 import jdk.nashorn.internal.runtime.JSErrorType;
 import jdk.nashorn.internal.runtime.ParserException;
-import jdk.nashorn.internal.runtime.RegExp;
+import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
 import jdk.nashorn.internal.runtime.Source;
 
 /**
@@ -427,7 +427,7 @@
             if (value instanceof RegexToken) {
                 final RegexToken regex = (RegexToken)value;
                 try {
-                    RegExp.validate(regex.getExpression(), regex.getOptions());
+                    RegExpFactory.validate(regex.getExpression(), regex.getOptions());
                 } catch (final ParserException e) {
                     error(e.getMessage());
                 }
--- a/nashorn/src/jdk/nashorn/internal/runtime/RegExp.java	Fri Feb 22 10:39:00 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package jdk.nashorn.internal.runtime;
-
-import static java.util.regex.Pattern.CASE_INSENSITIVE;
-import static java.util.regex.Pattern.MULTILINE;
-import static java.util.regex.Pattern.UNICODE_CASE;
-
-import java.util.HashSet;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
-
-/**
- * This class is used to represent a parsed regular expression. Accepts input
- * pattern string and flagString. This is used by AbstractParser to validate
- * RegExp literals as well as by NativeRegExp to parse RegExp constructor arguments.
- */
-public final class RegExp {
-    /** Pattern string. */
-    private final String input;
-
-    /** Global search flag for this regexp.*/
-    private boolean global;
-
-    /** Case insensitive flag for this regexp */
-    private boolean ignoreCase;
-
-    /** Multi-line flag for this regexp */
-    private boolean multiline;
-
-    /** Java regexp pattern to use for match. We compile to one of these */
-    private Pattern pattern;
-
-    /** BitVector that keeps track of groups in negative lookahead */
-    private BitVector groupsInNegativeLookahead;
-
-    /**
-     * Creates RegExpLiteral object from given input and flagString.
-     *
-     * @param input RegExp pattern string
-     * @param flagString RegExp flags
-     * @throws ParserException if flagString is invalid or input string has syntax error.
-     */
-    public RegExp(final String input, final String flagString) throws ParserException {
-        this.input = input;
-        final HashSet<Character> usedFlags = new HashSet<>();
-        int flags = 0;
-
-        for (final char ch : flagString.toCharArray()) {
-            if (usedFlags.contains(ch)) {
-                throwParserException("repeated.flag", Character.toString(ch));
-            }
-
-            switch (ch) {
-            case 'g':
-                this.global = true;
-                usedFlags.add(ch);
-                break;
-            case 'i':
-                this.ignoreCase = true;
-                flags |= CASE_INSENSITIVE | UNICODE_CASE;
-                usedFlags.add(ch);
-                break;
-            case 'm':
-                this.multiline = true;
-                flags |= MULTILINE;
-                usedFlags.add(ch);
-                break;
-            default:
-                throwParserException("unsupported.flag", Character.toString(ch));
-            }
-        }
-
-        try {
-            RegExpScanner parsed;
-
-            try {
-                parsed = RegExpScanner.scan(input);
-            } catch (final PatternSyntaxException e) {
-                // refine the exception with a better syntax error, if this
-                // passes, just rethrow what we have
-                Pattern.compile(input, flags);
-                throw e;
-            }
-
-            if (parsed != null) {
-                this.pattern = Pattern.compile(parsed.getJavaPattern(), flags);
-                this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
-            }
-        } catch (final PatternSyntaxException e2) {
-            throwParserException("syntax", e2.getMessage());
-        }
-
-    }
-
-    /**
-     * @return the input
-     */
-    public String getInput() {
-        return input;
-    }
-
-    /**
-     * @return the global
-     */
-    public boolean isGlobal() {
-        return global;
-    }
-
-    /**
-     * @return the ignoreCase
-     */
-    public boolean isIgnoreCase() {
-        return ignoreCase;
-    }
-
-    /**
-     * @return the multiline
-     */
-    public boolean isMultiline() {
-        return multiline;
-    }
-
-    /**
-     * @return the pattern
-     */
-    public Pattern getPattern() {
-        return pattern;
-    }
-
-    /**
-     * @return the groupsInNegativeLookahead
-     */
-    public BitVector getGroupsInNegativeLookahead() {
-        return groupsInNegativeLookahead;
-    }
-
-    /**
-     * Validation method for RegExp input and flagString - we don't care about the RegExp object
-     *
-     * @param input        regexp input
-     * @param flagString   flag string
-     *
-     * @throws ParserException if invalid regexp and flags
-     */
-    @SuppressWarnings({"unused", "ResultOfObjectAllocationIgnored"})
-    public static void validate(final String input, final String flagString) throws ParserException {
-        new RegExp(input, flagString);
-    }
-
-    private static void throwParserException(final String key, final String str) throws ParserException {
-        throw new ParserException(ECMAErrors.getMessage("parser.error.regex." + key, str));
-    }
-}
--- a/nashorn/src/jdk/nashorn/internal/runtime/RegExpMatch.java	Fri Feb 22 10:39:00 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package jdk.nashorn.internal.runtime;
-
-/**
- * Match tuple to keep track of ongoing regexp match.
- */
-public final class RegExpMatch {
-    final Object[] groups;
-    final int      index;
-    final String   input;
-
-    /**
-     * Constructor
-     *
-     * @param input  regexp input
-     * @param index  index of match
-     * @param groups groups vector
-     */
-    public RegExpMatch(final String input, final int index, final Object[] groups) {
-        this.input  = input;
-        this.index  = index;
-        this.groups = groups;
-    }
-
-    /**
-     * Get the groups for the match
-     * @return group vector
-     */
-    public Object[] getGroups() {
-        return groups;
-    }
-
-    /**
-     * Get the input for the map
-     * @return input
-     */
-    public String getInput() {
-        return input;
-    }
-
-    /**
-     * Get the index for the match
-     * @return index
-     */
-    public int getIndex() {
-        return index;
-    }
-
-    /**
-     * Get the length of the match
-     * @return length
-     */
-    public int length() {
-        return ((String)groups[0]).length();
-    }
-
-    /**
-     * Get the group with the given index or the empty string if group index is not valid.
-     * @param index the group index
-     * @return the group or ""
-     */
-    public Object getGroup(int index) {
-        return index >= 0 && index < groups.length ? groups[index] : "";
-    }
-
-    /**
-     * Get the last parenthesis group, or the empty string if none exists.
-     * @return the last group or ""
-     */
-    public Object getLastParen() {
-        return groups.length > 1 ? groups[groups.length - 1] : "";
-    }
-
-}
--- a/nashorn/src/jdk/nashorn/internal/runtime/RegExpScanner.java	Fri Feb 22 10:39:00 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1411 +0,0 @@
-/*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package jdk.nashorn.internal.runtime;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.PatternSyntaxException;
-import jdk.nashorn.internal.parser.Lexer;
-import jdk.nashorn.internal.parser.Scanner;
-
-/**
- * Scan a JavaScript regexp, converting to Java regex if necessary.
- *
- */
-final class RegExpScanner extends Scanner {
-
-    /**
-     * String builder to accumulate the result - this contains verbatim parsed JavaScript.
-     * to get the java equivalent we need to create a Pattern token and return its toString()
-     */
-    private final StringBuilder sb;
-
-    /** An optional error message if one occurred during parse. */
-    private String errorMessage;
-
-    /** Is this the special case of a regexp that never matches anything */
-    private boolean neverMatches;
-
-    /** The resulting java.util.regex pattern string. */
-    private String javaPattern;
-
-    /** Expected token table */
-    private final Map<Character, Integer> expected = new HashMap<>();
-
-    /** Capturing parenthesis that have been found so far. */
-    private final List<Capture> caps = new LinkedList<>();
-
-    /** Forward references to capturing parenthesis to be resolved later.*/
-    private final Map<Integer, Token> forwardReferences = new LinkedHashMap<>();
-
-    /** Current level of zero-width negative lookahead assertions. */
-    private int negativeLookaheadLevel;
-
-    private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?";
-
-    private static class Capture {
-        /**
-         * Zero-width negative lookaheads enclosing the capture.
-         */
-        private final int negativeLookaheadLevel;
-        /**
-         * Captures that live inside a negative lookahead are dead after the
-         * lookahead and will be undefined if referenced from outside.
-         */
-        private boolean isDead;
-
-        Capture(final int negativeLookaheadLevel) {
-            this.negativeLookaheadLevel = negativeLookaheadLevel;
-        }
-
-        public int getNegativeLookaheadLevel() {
-            return negativeLookaheadLevel;
-        }
-
-        public boolean isDead() {
-            return isDead;
-        }
-
-        public void setDead() {
-            this.isDead = true;
-        }
-    }
-
-    /**
-     * This is a token - the JavaScript regexp is scanned into a token tree
-     * A token has other tokens as children as well as "atoms", i.e. Strings.
-     *
-     */
-    private static class Token {
-
-        private enum Type {
-            PATTERN,
-            DISJUNCTION,
-            ALTERNATIVE,
-            TERM,
-            ASSERTION,
-            QUANTIFIER,
-            QUANTIFIER_PREFIX,
-            ATOM,
-            PATTERN_CHARACTER,
-            ATOM_ESCAPE,
-            CHARACTER_ESCAPE,
-            CONTROL_ESCAPE,
-            CONTROL_LETTER,
-            IDENTITY_ESCAPE,
-            DECIMAL_ESCAPE,
-            CHARACTERCLASS_ESCAPE,
-            CHARACTERCLASS,
-            CLASSRANGES,
-            NON_EMPTY_CLASSRANGES,
-            NON_EMPTY_CLASSRANGES_NODASH,
-            CLASSATOM,
-            CLASSATOM_NODASH,
-            CLASS_ESCAPE,
-            DECIMALDIGITS,
-            HEX_ESCAPESEQUENCE,
-            UNICODE_ESCAPESEQUENCE,
-        }
-
-        /**
-         * Token tyoe
-         */
-        private final Token.Type type;
-
-        /**
-         * Child nodes
-         */
-        private final List<Object> children;
-
-        /**
-         * Parent node
-         */
-        private Token parent;
-
-        /**
-         * Dead code flag
-         */
-        private boolean isDead;
-
-        private static final Map<Type, ToString> toStringMap = new HashMap<>();
-        private static final ToString DEFAULT_TOSTRING = new ToString();
-
-        private static String unicode(final int value) {
-            final StringBuilder sb = new StringBuilder();
-            final String hex = Integer.toHexString(value);
-            sb.append('u');
-            for (int i = 0; i < 4 - hex.length(); i++) {
-                sb.append('0');
-            }
-            sb.append(hex);
-
-            return sb.toString();
-        }
-
-        static {
-            toStringMap.put(Type.CHARACTERCLASS, new ToString() {
-                @Override
-                public String toString(final Token token) {
-                    return super.toString(token).replace("\\b", "\b");
-                }
-            });
-
-            // for some reason java regexps don't like control characters on the
-            // form "\\ca".match([string with ascii 1 at char0]). Translating
-            // them to unicode does it though.
-            toStringMap.put(Type.CHARACTER_ESCAPE, new ToString() {
-                @Override
-                public String toString(final Token token) {
-                    final String str = super.toString(token);
-                    if (str.length() == 2) {
-                        return Token.unicode(Character.toLowerCase(str.charAt(1)) - 'a' + 1);
-                    }
-                    return str;
-                }
-            });
-
-            toStringMap.put(Type.DECIMAL_ESCAPE, new ToString() {
-                @Override
-                public String toString(final Token token) {
-                    final String str = super.toString(token);
-
-                    if ("\0".equals(str)) {
-                        return str;
-                    }
-
-                    int value;
-
-                    if (!token.hasParentOfType(Type.CLASSRANGES)) {
-                        return str;
-                    }
-
-                    value = Integer.parseInt(str, 8); //throws exception that leads to SyntaxError if not octal
-                    if (value > 0xff) {
-                        throw new NumberFormatException(str);
-                    }
-
-                    return Token.unicode(value);
-                }
-            });
-
-        }
-
-        /**
-         * JavaScript Token to Java regex substring framework.
-         *
-         */
-        private static class ToString {
-            String toString(final Token token) {
-                final StringBuilder sb = new StringBuilder();
-                for (final Object child : token.getChildren()) {
-                    sb.append(child);
-                }
-
-                //perform global substitutions that hold true for any evaluated form
-                String str = sb.toString();
-                switch (str) {
-                case "\\s":
-                    str = "[" + Lexer.getWhitespaceRegExp() + "]";
-                    break;
-                case "\\S":
-                    str = "[^" + Lexer.getWhitespaceRegExp() + "]";
-                    break;
-                case "[^]":
-                    str = "[\\s\\S]";
-                    break;
-                default:
-                    break;
-                }
-                return str;
-            }
-        }
-
-        /**
-         * Token iterator. Doesn't return "atom" children. i.e. string representations,
-         * just tokens
-         *
-         */
-        private static class TokenIterator implements Iterator<Token> {
-            private final List<Token> preorder;
-
-            private void init(final Token root) {
-                preorder.add(root);
-                for (final Object child : root.getChildren()) {
-                    if (child instanceof Token) {
-                        init((Token)child);
-                    }
-                }
-            }
-
-            TokenIterator(final Token root) {
-                preorder = new ArrayList<>();
-                init(root);
-            }
-
-            @Override
-            public boolean hasNext() {
-                return !preorder.isEmpty();
-            }
-
-            @Override
-            public Token next() {
-                return preorder.remove(0);
-            }
-
-            @Override
-            public void remove() {
-                next();
-            }
-        }
-
-        /**
-         * Constructor
-         * @param type the token type
-         */
-        Token(final Token.Type type) {
-            this.type = type;
-            children = new ArrayList<>();
-        }
-
-        /**
-         * Add a an "atom" child to a token
-         * @param child the child to add
-         * @return the token (for chaining)
-         */
-        public Token add(final String child) {
-            children.add(child);
-            return this;
-        }
-
-        /**
-         * Add a child to a token
-         * @param child the child
-         * @return the token (for chaining)
-         */
-        public Token add(final Token child) {
-            if (child != null) {
-                children.add(child);
-                child.setParent(this);
-            }
-            return this;
-        }
-
-        /**
-         * Remove a child from a token
-         * @param child the child to remove
-         * @return true if successful
-         */
-        public boolean remove(final Token child) {
-            return children.remove(child);
-        }
-
-        /**
-         * Remove the last child from a token
-         * @return the removed child
-         */
-        public Object removeLast() {
-            return children.remove(children.size() - 1);
-        }
-
-        /**
-         * Flag this token as dead code
-         * @param isDead is it dead or not
-         */
-        private void setIsDead(final boolean isDead) {
-            this.isDead = isDead;
-        }
-
-        /**
-         * Is this token dead code
-         * @return boolean
-         */
-        private boolean getIsDead() {
-            return isDead;
-        }
-
-        /**
-         * Get the parent of this token
-         * @return parent token
-         */
-        public Token getParent() {
-            return parent;
-        }
-
-        public boolean hasParentOfType(final Token.Type parentType) {
-            for (Token p = getParent(); p != null; p = p.getParent()) {
-                if (p.getType() == parentType) {
-                    return true;
-                }
-            }
-            return false;
-        }
-
-        public boolean hasChildOfType(final Token.Type childType) {
-            for (final Iterator<Token> iter = iterator() ; iter.hasNext() ; ) {
-                if (iter.next().getType() == childType) {
-                    return true;
-                }
-            }
-            return false;
-        }
-
-        /**
-         * Set the parent of this token
-         * @param parent
-         */
-        private void setParent(final Token parent) {
-            this.parent = parent;
-        }
-
-        /**
-         * Get the children of this token
-         * @return an array of children, never null
-         */
-        public Object[] getChildren() {
-            return children.toArray();
-        }
-
-        /**
-         * Reset this token, remove all children
-         */
-        public void reset() {
-            children.clear();
-        }
-
-        /**
-         * Get a preorder token iterator with this token as root
-         * @return an iterator
-         */
-        public Iterator<Token> iterator() {
-            return new TokenIterator(this);
-        }
-
-        /**
-         * Get the type of this token
-         * @return type
-         */
-        public Type getType() {
-            return type;
-        }
-
-        /**
-         * Turn this token into Java regexp compatible text
-         * @return part of a java regexp
-         */
-        @Override
-        public String toString() {
-            ToString t = toStringMap.get(getType());
-            if (t == null) {
-                t = DEFAULT_TOSTRING;
-            }
-            return t.toString(this);
-        }
-    }
-
-    /**
-     * Constructor
-     * @param string the JavaScript regexp to parse
-     */
-    private RegExpScanner(final String string) {
-        super(string);
-        sb = new StringBuilder(limit);
-        reset(0);
-        expected.put(']', 0);
-        expected.put('}', 0);
-    }
-
-    private void processForwardReferences() {
-        if (neverMatches()) {
-            return;
-        }
-
-        for (final Map.Entry<Integer, Token> fwdRef : forwardReferences.entrySet()) {
-            if (fwdRef.getKey().intValue() > caps.size()) {
-                neverMatches = true;
-                break;
-            }
-
-            fwdRef.getValue().setIsDead(true);
-        }
-
-        forwardReferences.clear();
-    }
-
-    /**
-     * Scan a JavaScript regexp string returning a Java safe regex string.
-     *
-     * @param string
-     *            JavaScript regexp string.
-     * @return Java safe regex string.
-     */
-    public static RegExpScanner scan(final String string) {
-        final RegExpScanner scanner = new RegExpScanner(string);
-
-        Token pattern;
-
-        try {
-            pattern = scanner.pattern();
-        } catch (final Exception e) {
-            throw new PatternSyntaxException(e.getMessage(), string, scanner.sb.length());
-        }
-
-        scanner.processForwardReferences();
-        if (scanner.neverMatches()) {
-            return null; // never matches
-        }
-
-        // go over the code and remove dead code
-        final Iterator<Token> iter = pattern.iterator();
-        while (iter.hasNext()) {
-            final Token next = iter.next();
-            if (next.getIsDead()) {
-                next.getParent().remove(next);
-            }
-        }
-
-        // turn the pattern into a string, p, the java equivalent string for our js regexp
-        final String p = pattern.toString();
-        // if builder contains all tokens that were sent in, we know
-        // we correctly parsed the entire JavaScript regexp without syntax errors
-        if (!string.equals(scanner.getStringBuilder().toString())) {
-            throw new PatternSyntaxException(string, p, p.length() + 1);
-        }
-
-        scanner.javaPattern = p;
-        return scanner;
-     }
-
-    /**
-     * Does this regexp ever match anything? Use of e.g. [], which is legal in JavaScript,
-     * is an example where we never match
-     *
-     * @return boolean
-     */
-    private boolean neverMatches() {
-        return neverMatches;
-    }
-
-    /**
-     * This is used to set better error messages that can be reused
-     * in NativeRegExp for augmenting e.g. SyntaxErrors.
-     *
-     * @return an error message or null if no extra info
-     */
-    public String getErrorMessage() {
-        return errorMessage;
-    }
-
-    final StringBuilder getStringBuilder() {
-        return sb;
-    }
-
-    String getJavaPattern() {
-        return javaPattern;
-    }
-
-    BitVector getGroupsInNegativeLookahead() {
-        BitVector vec = null;
-        for (int i = 0; i < caps.size(); i++) {
-            final Capture cap = caps.get(i);
-            if (cap.getNegativeLookaheadLevel() > 0) {
-                if (vec == null) {
-                    vec = new BitVector(caps.size() + 1);
-                }
-                vec.set(i + 1);
-            }
-        }
-        return vec;
-    }
-
-    /**
-     * Commit n characters to the builder and to a given token
-     * @param token Uncommitted token.
-     * @param n     Number of characters.
-     * @return Committed token
-     */
-    private Token commit(final Token token, final int n) {
-        final int startIn = position;
-
-        switch (n) {
-        case 1:
-            sb.append(ch0);
-            skip(1);
-            break;
-        case 2:
-            sb.append(ch0);
-            sb.append(ch1);
-            skip(2);
-            break;
-        case 3:
-            sb.append(ch0);
-            sb.append(ch1);
-            sb.append(ch2);
-            skip(3);
-            break;
-        default:
-            assert false : "Should not reach here";
-        }
-
-        if (token == null) {
-            return null;
-        }
-
-        return token.add(sb.substring(startIn, sb.length()));
-    }
-
-    /**
-     * Restart the buffers back at an earlier position.
-     *
-     * @param startIn
-     *            Position in the input stream.
-     * @param startOut
-     *            Position in the output stream.
-     */
-    private void restart(final int startIn, final int startOut) {
-        reset(startIn);
-        sb.setLength(startOut);
-    }
-
-    private void push(final char ch) {
-        expected.put(ch, expected.get(ch) + 1);
-    }
-
-    private void pop(final char ch) {
-        expected.put(ch, Math.min(0, expected.get(ch) - 1));
-    }
-
-    /*
-     * Recursive descent tokenizer starts below.
-     */
-
-    /*
-     * Pattern ::
-     *      Disjunction
-     */
-    private Token pattern() {
-        final Token token = new Token(Token.Type.PATTERN);
-
-        final Token child = disjunction();
-        return token.add(child);
-    }
-
-    /*
-     * Disjunction ::
-     *      Alternative
-     *      Alternative | Disjunction
-     */
-    private Token disjunction() {
-        final Token token = new Token(Token.Type.DISJUNCTION);
-
-        while (true) {
-            token.add(alternative());
-
-            if (ch0 == '|') {
-                commit(token, 1);
-            } else {
-                break;
-            }
-        }
-
-        return token;
-    }
-
-    /*
-     * Alternative ::
-     *      [empty]
-     *      Alternative Term
-     */
-    private Token alternative() {
-        final Token token = new Token(Token.Type.ALTERNATIVE);
-
-        Token child;
-        while ((child = term()) != null) {
-            token.add(child);
-        }
-
-        return token;
-    }
-
-    /*
-     * Term ::
-     *      Assertion
-     *      Atom
-     *      Atom Quantifier
-     */
-    private Token term() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.TERM);
-        Token child;
-
-        child = assertion();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        child = atom();
-        if (child != null) {
-            boolean emptyCharacterClass = false;
-            if ("[]".equals(child.toString())) {
-                emptyCharacterClass = true;
-            }
-
-            token.add(child);
-
-            final Token quantifier = quantifier();
-            if (quantifier != null) {
-                token.add(quantifier);
-            }
-
-            if (emptyCharacterClass) {
-                if (quantifier == null) {
-                    neverMatches = true; //never matches ever.
-                } else {
-                    //if we can get away with max zero, remove this entire token
-                    final String qs = quantifier.toString();
-                    if ("+".equals(qs) || "*".equals(qs) || qs.startsWith("{0,")) {
-                        token.setIsDead(true);
-                    }
-                }
-            }
-
-            return token;
-        }
-
-        restart(startIn, startOut);
-        return null;
-    }
-
-    /*
-     * Assertion ::
-     *      ^
-     *      $
-     *      \b
-     *      \B
-     *      ( ? = Disjunction )
-     *      ( ? ! Disjunction )
-     */
-    private Token assertion() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.ASSERTION);
-
-        switch (ch0) {
-        case '^':
-        case '$':
-            return commit(token, 1);
-
-        case '\\':
-            if (ch1 == 'b' || ch1 == 'B') {
-                return commit(token, 2);
-            }
-            break;
-
-        case '(':
-            if (ch1 != '?') {
-                break;
-            }
-            if (ch2 != '=' && ch2 != '!') {
-                break;
-            }
-            final boolean isNegativeLookahead = (ch2 == '!');
-            commit(token, 3);
-
-            if (isNegativeLookahead) {
-                negativeLookaheadLevel++;
-            }
-            final Token disjunction = disjunction();
-            if (isNegativeLookahead) {
-                for (final Capture cap : caps) {
-                    if (cap.getNegativeLookaheadLevel() >= negativeLookaheadLevel) {
-                        cap.setDead();
-                    }
-                }
-                negativeLookaheadLevel--;
-            }
-
-            if (disjunction != null && ch0 == ')') {
-                token.add(disjunction);
-                return commit(token, 1);
-            }
-            break;
-
-        default:
-            break;
-        }
-
-        restart(startIn, startOut);
-
-        return null;
-    }
-
-    /*
-     * Quantifier ::
-     *      QuantifierPrefix
-     *      QuantifierPrefix ?
-     */
-    private Token quantifier() {
-        final Token token = new Token(Token.Type.QUANTIFIER);
-        final Token child = quantifierPrefix();
-        if (child != null) {
-            token.add(child);
-            if (ch0 == '?') {
-                commit(token, 1);
-            }
-            return token;
-        }
-        return null;
-    }
-
-    /*
-     * QuantifierPrefix ::
-     *      *
-     *      +
-     *      ?
-     *      { DecimalDigits }
-     *      { DecimalDigits , }
-     *      { DecimalDigits , DecimalDigits }
-     */
-    private Token quantifierPrefix() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.QUANTIFIER_PREFIX);
-
-        switch (ch0) {
-        case '*':
-        case '+':
-        case '?':
-            return commit(token, 1);
-
-        case '{':
-            commit(token, 1);
-
-            final Token child = decimalDigits();
-            if (child == null) {
-                break; // not a quantifier - back out
-            }
-            push('}');
-            token.add(child);
-
-            if (ch0 == ',') {
-                commit(token, 1);
-                token.add(decimalDigits());
-            }
-
-            if (ch0 == '}') {
-                pop('}');
-                commit(token, 1);
-            }
-
-            return token;
-
-        default:
-            break;
-        }
-
-        restart(startIn, startOut);
-        return null;
-    }
-
-    /*
-     * Atom ::
-     *      PatternCharacter
-     *      .
-     *      \ AtomEscape
-     *      CharacterClass
-     *      ( Disjunction )
-     *      ( ? : Disjunction )
-     *
-     */
-    private Token atom() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.ATOM);
-        Token child;
-
-        child = patternCharacter();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        if (ch0 == '.') {
-            return commit(token, 1);
-        }
-
-        if (ch0 == '\\') {
-            commit(token, 1);
-            child = atomEscape();
-
-            if (child != null) {
-                if (child.hasChildOfType(Token.Type.IDENTITY_ESCAPE)) {
-                    final char idEscape = child.toString().charAt(0);
-                    if (NON_IDENT_ESCAPES.indexOf(idEscape) == -1) {
-                        token.reset();
-                    }
-                }
-
-                token.add(child);
-
-                // forward backreferences always match empty. JavaScript != Java
-                if (child.hasChildOfType(Token.Type.DECIMAL_ESCAPE) && !"\u0000".equals(child.toString())) {
-                    final int refNum = Integer.parseInt(child.toString());
-
-                    if (refNum - 1 < caps.size() && caps.get(refNum - 1).isDead()) {
-                        // reference to dead in-negative-lookahead capture
-                        token.setIsDead(true);
-                    } else if (caps.size() < refNum) {
-                        // forward reference: always matches against empty string (dead token).
-                        // invalid reference (non-existant capture): pattern never matches.
-                        forwardReferences.put(refNum, token);
-                    }
-                }
-
-                return token;
-            }
-        }
-
-        child = characterClass();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        if (ch0 == '(') {
-            boolean capturingParens = true;
-            commit(token, 1);
-            if (ch0 == '?' && ch1 == ':') {
-                capturingParens = false;
-                commit(token, 2);
-            }
-
-            child = disjunction();
-            if (child != null) {
-                token.add(child);
-                if (ch0 == ')') {
-                    final Token atom = commit(token, 1);
-                    if (capturingParens) {
-                        caps.add(new Capture(negativeLookaheadLevel));
-                    }
-                    return atom;
-                }
-            }
-        }
-
-        restart(startIn, startOut);
-        return null;
-    }
-
-    /*
-     * PatternCharacter ::
-     *      SourceCharacter but not any of: ^$\.*+?()[]{}|
-     */
-    @SuppressWarnings("fallthrough")
-    private Token patternCharacter() {
-        if (atEOF()) {
-            return null;
-        }
-
-        switch (ch0) {
-        case '^':
-        case '$':
-        case '\\':
-        case '.':
-        case '*':
-        case '+':
-        case '?':
-        case '(':
-        case ')':
-        case '[':
-        case '|':
-            return null;
-
-        case '}':
-        case ']':
-            final int n = expected.get(ch0);
-            if (n != 0) {
-                return null;
-            }
-
-       case '{':
-           // if not a valid quantifier escape curly brace to match itself
-           // this ensures compatibility with other JS implementations
-           final Token quant = quantifierPrefix();
-           return (quant == null) ? commit(new Token(Token.Type.PATTERN_CHARACTER).add("\\"), 1) : null;
-
-        default:
-            return commit(new Token(Token.Type.PATTERN_CHARACTER), 1); // SOURCECHARACTER
-        }
-    }
-
-    /*
-     * AtomEscape ::
-     *      DecimalEscape
-     *      CharacterEscape
-     *      CharacterClassEscape
-     */
-    private Token atomEscape() {
-        final Token token = new Token(Token.Type.ATOM_ESCAPE);
-        Token child;
-
-        child = decimalEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        child = characterClassEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        child = characterEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-
-        return null;
-    }
-
-    /*
-     * CharacterEscape ::
-     *      ControlEscape
-     *      c ControlLetter
-     *      HexEscapeSequence
-     *      UnicodeEscapeSequence
-     *      IdentityEscape
-     */
-    private Token characterEscape() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-
-        final Token token = new Token(Token.Type.CHARACTER_ESCAPE);
-        Token child;
-
-        child = controlEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        if (ch0 == 'c') {
-            commit(token, 1);
-            child = controlLetter();
-            if (child != null) {
-                return token.add(child);
-            }
-            restart(startIn, startOut);
-        }
-
-        child = hexEscapeSequence();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        child = unicodeEscapeSequence();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        child = identityEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        restart(startIn, startOut);
-
-        return null;
-    }
-
-    private boolean scanEscapeSequence(final char leader, final int length, final Token token) {
-        final int startIn  = position;
-        final int startOut = sb.length();
-
-        if (ch0 != leader) {
-            return false;
-        }
-
-        commit(token, 1);
-        for (int i = 0; i < length; i++) {
-            final char ch0l = Character.toLowerCase(ch0);
-            if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {
-                commit(token, 1);
-            } else {
-                restart(startIn, startOut);
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    private Token hexEscapeSequence() {
-        final Token token = new Token(Token.Type.HEX_ESCAPESEQUENCE);
-        if (scanEscapeSequence('x', 2, token)) {
-            return token;
-        }
-        return null;
-    }
-
-    private Token unicodeEscapeSequence() {
-        final Token token = new Token(Token.Type.UNICODE_ESCAPESEQUENCE);
-        if (scanEscapeSequence('u', 4, token)) {
-            return token;
-        }
-        return null;
-    }
-
-    /*
-     * ControlEscape ::
-     *      one of fnrtv
-     */
-    private Token controlEscape() {
-        switch (ch0) {
-        case 'f':
-        case 'n':
-        case 'r':
-        case 't':
-        case 'v':
-            return commit(new Token(Token.Type.CONTROL_ESCAPE), 1);
-
-        default:
-            return null;
-        }
-    }
-
-    /*
-     * ControlLetter ::
-     *      one of abcdefghijklmnopqrstuvwxyz
-     *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
-     */
-    private Token controlLetter() {
-        final char c = Character.toUpperCase(ch0);
-        if (c >= 'A' && c <= 'Z') {
-            final Token token = new Token(Token.Type.CONTROL_LETTER);
-            commit(token, 1);
-            return token;
-        }
-        return null;
-        /*
-        Token token = new Token(Token.Type.CONTROL_LETTER);
-        commit(null, 1);//add original char to builder not to token
-        this.neverMatches = c < 'A' || c > 'Z';
-        return token.add(""+c);*/
-    }
-
-    /*
-     * IdentityEscape ::
-     *      SourceCharacter but not IdentifierPart
-     *      <ZWJ>  (200c)
-     *      <ZWNJ> (200d)
-     */
-    private Token identityEscape() {
-        final Token token = new Token(Token.Type.IDENTITY_ESCAPE);
-        commit(token, 1);
-        return token;
-    }
-
-    /*
-     * DecimalEscape ::
-     *      DecimalIntegerLiteral [lookahead DecimalDigit]
-     */
-    private Token decimalEscape() {
-        final Token token = new Token(Token.Type.DECIMAL_ESCAPE);
-        final int startIn  = position;
-        final int startOut = sb.length();
-
-        if (ch0 == '0' && !isDecimalDigit(ch1)) {
-            commit(token, 1);
-            token.removeLast();
-            //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);
-            return token.add("\u0000");
-        }
-
-        if (isDecimalDigit(ch0)) {
-            while (isDecimalDigit(ch0)) {
-                commit(token, 1);
-            }
-            return token;
-        }
-
-        restart(startIn, startOut);
-
-        return null;
-    }
-
-    /*
-     * CharacterClassEscape ::
-     *  one of dDsSwW
-     */
-    private Token characterClassEscape() {
-        switch (ch0) {
-        case 's':
-        case 'S':
-        case 'd':
-        case 'D':
-        case 'w':
-        case 'W':
-            return commit(new Token(Token.Type.CHARACTERCLASS_ESCAPE), 1);
-
-        default:
-            return null;
-        }
-    }
-
-    /*
-     * CharacterClass ::
-     *      [ [lookahead {^}] ClassRanges ]
-     *      [ ^ ClassRanges ]
-     */
-    private Token characterClass() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.CHARACTERCLASS);
-
-        if (ch0 == '[') {
-            push(']');
-            commit(token, 1);
-
-            if (ch0 == '^') {
-                commit(token, 1);
-            }
-
-            final Token child = classRanges();
-            if (child != null && ch0 == ']') {
-                pop(']');
-                token.add(child);
-                return commit(token, 1);
-            }
-        }
-
-        restart(startIn, startOut);
-        return null;
-    }
-
-    /*
-     * ClassRanges ::
-     *      [empty]
-     *      NonemptyClassRanges
-     */
-    private Token classRanges() {
-        return new Token(Token.Type.CLASSRANGES).add(nonemptyClassRanges());
-    }
-
-    /*
-     * NonemptyClassRanges ::
-     *      ClassAtom
-     *      ClassAtom NonemptyClassRangesNoDash
-     *      ClassAtom - ClassAtom ClassRanges
-     */
-    private Token nonemptyClassRanges() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES);
-        Token child;
-
-        child = classAtom();
-        if (child != null) {
-            token.add(child);
-
-            if (ch0 == '-') {
-                commit(token, 1);
-
-                final Token child1 = classAtom();
-                final Token child2 = classRanges();
-                if (child1 != null && child2 != null) {
-                    token.add(child1);
-                    token.add(child2);
-
-                    return token;
-                }
-            }
-
-            child = nonemptyClassRangesNoDash();
-            if (child != null) {
-                token.add(child);
-                return token;
-            }
-
-            return token;
-        }
-
-        restart(startIn, startOut);
-        return null;
-    }
-
-    /*
-     * NonemptyClassRangesNoDash ::
-     *      ClassAtom
-     *      ClassAtomNoDash NonemptyClassRangesNoDash
-     *      ClassAtomNoDash - ClassAtom ClassRanges
-     */
-    private Token nonemptyClassRangesNoDash() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES_NODASH);
-        Token child;
-
-        child = classAtomNoDash();
-        if (child != null) {
-            token.add(child);
-
-            // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom
-            if (ch0 == '-') {
-               commit(token, 1);
-
-               final Token child1 = classAtom();
-               final Token child2 = classRanges();
-               if (child1 != null && child2 != null) {
-                   token.add(child1);
-                   return token.add(child2);
-               }
-               //fallthru
-           }
-
-            child = nonemptyClassRangesNoDash();
-            if (child != null) {
-                token.add(child);
-            }
-            return token; // still a class atom
-        }
-
-        child = classAtom();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        restart(startIn, startOut);
-        return null;
-    }
-
-    /*
-     * ClassAtom : - ClassAtomNoDash
-     */
-    private Token classAtom() {
-        final Token token = new Token(Token.Type.CLASSATOM);
-
-        if (ch0 == '-') {
-            return commit(token, 1);
-        }
-
-        final Token child = classAtomNoDash();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        return null;
-    }
-
-    /*
-     * ClassAtomNoDash ::
-     *      SourceCharacter but not one of \ or ] or -
-     *      \ ClassEscape
-     */
-    private Token classAtomNoDash() {
-        final int startIn  = position;
-        final int startOut = sb.length();
-        final Token token  = new Token(Token.Type.CLASSATOM_NODASH);
-
-        switch (ch0) {
-        case ']':
-        case '-':
-        case '\0':
-            return null;
-
-        case '[':
-            // unescaped left square bracket - add escape
-            return commit(token.add("\\"), 1);
-
-        case '\\':
-            commit(token, 1);
-            final Token child = classEscape();
-            if (child != null) {
-                return token.add(child);
-            }
-
-            restart(startIn, startOut);
-            return null;
-
-        default:
-            return commit(token, 1);
-        }
-    }
-
-    /*
-     * ClassEscape ::
-     *      DecimalEscape
-     *      b
-     *      CharacterEscape
-     *      CharacterClassEscape
-     */
-    private Token classEscape() {
-        final Token token = new Token(Token.Type.CLASS_ESCAPE);
-        Token child;
-
-        child = decimalEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        if (ch0 == 'b') {
-            return commit(token, 1);
-        }
-
-        child = characterEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        child = characterClassEscape();
-        if (child != null) {
-            return token.add(child);
-        }
-
-        return null;
-    }
-
-    /*
-     * DecimalDigits
-     */
-    private Token decimalDigits() {
-        if (!isDecimalDigit(ch0)) {
-            return null;
-        }
-
-        final Token token = new Token(Token.Type.DECIMALDIGITS);
-        while (isDecimalDigit(ch0)) {
-            commit(token, 1);
-        }
-
-        return token;
-    }
-
-    private static boolean isDecimalDigit(final char ch) {
-        return ch >= '0' && ch <= '9';
-    }
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/DefaultRegExp.java	Fri Feb 22 16:31:10 2013 +0100
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+import jdk.nashorn.internal.runtime.ParserException;
+
+import static java.util.regex.Pattern.CASE_INSENSITIVE;
+import static java.util.regex.Pattern.MULTILINE;
+import static java.util.regex.Pattern.UNICODE_CASE;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * Default regular expression implementation based on java.util.regex package.
+ *
+ * Note that this class is not thread-safe as it stores the current match result
+ * and the string being matched in instance fields.
+ */
+public class DefaultRegExp extends RegExp {
+
+    /** Java regexp pattern to use for match. We compile to one of these */
+    private Pattern pattern;
+
+    /** The matcher */
+    private RegExpMatcher matcher;
+
+    /**
+     * Construct a Regular expression from the given {@code source} and {@code flags} strings.
+     *
+     * @param source RegExp source string
+     * @param flags RegExp flag string
+     * @throws ParserException if flags is invalid or source string has syntax error.
+     */
+    public DefaultRegExp(final String source, final String flags) throws ParserException {
+        super(source, flags);
+
+        int intFlags = 0;
+
+        if (isIgnoreCase()) {
+            intFlags |= CASE_INSENSITIVE | UNICODE_CASE;
+        }
+        if (isMultiline()) {
+            intFlags |= MULTILINE;
+        }
+
+        try {
+            RegExpScanner parsed;
+
+            try {
+                parsed = RegExpScanner.scan(source);
+            } catch (final PatternSyntaxException e) {
+                // refine the exception with a better syntax error, if this
+                // passes, just rethrow what we have
+                Pattern.compile(source, intFlags);
+                throw e;
+            }
+
+            if (parsed != null) {
+                this.pattern = Pattern.compile(parsed.getJavaPattern(), intFlags);
+                this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
+            }
+        } catch (final PatternSyntaxException e2) {
+            throwParserException("syntax", e2.getMessage());
+        }
+    }
+
+    @Override
+    public RegExpMatcher match(final String str) {
+        if (pattern == null) {
+            return null; // never matches or similar, e.g. a[]
+        }
+
+        RegExpMatcher matcher = this.matcher;
+
+        if (matcher == null || matcher.getInput() != str) {
+            matcher = new DefaultMatcher(str);
+            this.matcher = matcher;
+        }
+
+        return matcher;
+    }
+
+    class DefaultMatcher implements RegExpMatcher {
+        final String input;
+        final Matcher matcher;
+
+        DefaultMatcher(final String input) {
+            this.input = input;
+            this.matcher = pattern.matcher(input);
+        }
+
+        @Override
+        public boolean search(final int start) {
+            return matcher.find(start);
+        }
+
+        @Override
+        public String getInput() {
+            return input;
+        }
+
+        @Override
+        public int start() {
+            return matcher.start();
+        }
+
+        @Override
+        public int start(final int group) {
+            return matcher.start(group);
+        }
+
+        @Override
+        public int end() {
+            return matcher.end();
+        }
+
+        @Override
+        public int end(final int group) {
+            return matcher.end(group);
+        }
+
+        @Override
+        public String group() {
+            return matcher.group();
+        }
+
+        @Override
+        public String group(final int group) {
+            return matcher.group(group);
+        }
+
+        @Override
+        public int groupCount() {
+            return matcher.groupCount();
+        }
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExp.java	Fri Feb 22 16:31:10 2013 +0100
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+import jdk.nashorn.internal.runtime.BitVector;
+import jdk.nashorn.internal.runtime.ECMAErrors;
+import jdk.nashorn.internal.runtime.ParserException;
+
+import java.util.regex.MatchResult;
+
+/**
+ * This is the base class for representing a parsed regular expression.
+ *
+ * Instances of this class are created by a {@link RegExpFactory}.
+ */
+public abstract class RegExp {
+
+    /** Pattern string. */
+    private final String source;
+
+    /** Global search flag for this regexp.*/
+    private boolean global;
+
+    /** Case insensitive flag for this regexp */
+    private boolean ignoreCase;
+
+    /** Multi-line flag for this regexp */
+    private boolean multiline;
+
+    /** BitVector that keeps track of groups in negative lookahead */
+    protected BitVector groupsInNegativeLookahead;
+
+    /**
+     * Constructor.
+     *
+     * @param source the source string
+     * @param flags the flags string
+     */
+    protected RegExp(final String source, final String flags) {
+        this.source = source;
+        for (int i = 0; i < flags.length(); i++) {
+            final char ch = flags.charAt(i);
+            switch (ch) {
+            case 'g':
+                if (this.global) {
+                    throwParserException("repeated.flag", "g");
+                }
+                this.global = true;
+                break;
+            case 'i':
+                if (this.ignoreCase) {
+                    throwParserException("repeated.flag", "i");
+                }
+                this.ignoreCase = true;
+                break;
+            case 'm':
+                if (this.multiline) {
+                    throwParserException("repeated.flag", "m");
+                }
+                this.multiline = true;
+                break;
+            default:
+                throwParserException("unsupported.flag", Character.toString(ch));
+            }
+        }
+    }
+
+    /**
+     * Get the source pattern of this regular expression.
+     *
+     * @return the source string
+     */
+    public String getSource() {
+        return source;
+    }
+
+    /**
+     * Set the global flag of this regular expression to {@code global}.
+     *
+     * @param global the new global flag
+     */
+    public void setGlobal(final boolean global) {
+        this.global = global;
+    }
+
+    /**
+     * Get the global flag of this regular expression.
+     *
+     * @return the global flag
+     */
+    public boolean isGlobal() {
+        return global;
+    }
+
+    /**
+     * Get the ignore-case flag of this regular expression.
+     *
+     * @return the ignore-case flag
+     */
+    public boolean isIgnoreCase() {
+        return ignoreCase;
+    }
+
+    /**
+     * Get the multiline flag of this regular expression.
+     *
+     * @return the multiline flag
+     */
+    public boolean isMultiline() {
+        return multiline;
+    }
+
+    /**
+     * Get a bitset indicating which of the groups in this regular expression are inside a negative lookahead.
+     *
+     * @return the groups-in-negative-lookahead bitset
+     */
+    public BitVector getGroupsInNegativeLookahead() {
+        return groupsInNegativeLookahead;
+    }
+
+    /**
+     * Match this regular expression against {@code str}, starting at index {@code start}
+     * and return a {@link MatchResult} with the result.
+     *
+     * @param str the string
+     * @return the matcher
+     */
+    public abstract RegExpMatcher match(String str);
+
+    /**
+     * Throw a regexp parser exception.
+     *
+     * @param key the message key
+     * @param str string argument
+     * @throws jdk.nashorn.internal.runtime.ParserException
+     */
+    protected static void throwParserException(final String key, final String str) throws ParserException {
+        throw new ParserException(ECMAErrors.getMessage("parser.error.regex." + key, str));
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java	Fri Feb 22 16:31:10 2013 +0100
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+import jdk.nashorn.internal.parser.Lexer;
+import jdk.nashorn.internal.runtime.ParserException;
+
+/**
+ * Factory class for regular expressions. This class creates instances of {@link DefaultRegExp}.
+ */
+public class RegExpFactory {
+
+
+    private final static RegExpFactory instance = new RegExpFactory();
+
+    /**
+     * Creates a Regular expression from the given {@code pattern} and {@code flags} strings.
+     *
+     * @param pattern RegExp pattern string
+     * @param flags RegExp flags string
+     * @throws ParserException if flags is invalid or pattern string has syntax error.
+     */
+    protected RegExp compile(final String pattern, final String flags) throws ParserException {
+        return new DefaultRegExp(pattern, flags);
+    }
+
+    /**
+     * Replace a regexp token as suitable for regexp instances created by this factory.
+     *
+     * @param str a regular expression token
+     * @return the replacement token
+     */
+    protected String replaceToken(final String str) {
+        switch (str) {
+            case "\\s":
+                return "[" + Lexer.getWhitespaceRegExp() + "]";
+            case "\\S":
+                return "[^" + Lexer.getWhitespaceRegExp() + "]";
+            case "[^]":
+                return "[\\s\\S]";
+            default:
+                return str;
+        }
+    }
+
+    /**
+     * Compile a regexp with the given {@code source} and {@code flags}.
+     *
+     * @param pattern RegExp pattern string
+     * @param flags  flag string
+     *
+     * @throws ParserException if invalid source or flags
+     */
+    public static RegExp create(final String pattern, final String flags) {
+        return instance.compile(pattern,  flags);
+    }
+
+    /**
+     * Replace a regexp token as needed by the currently installed factory instance.
+     *
+     * @param token a regexp token
+     * @return the replacement token
+     */
+    public static String replace(final String token) {
+        return instance.replaceToken(token);
+    }
+
+    /**
+     * Validate a regexp with the given {@code source} and {@code flags}.
+     *
+     * @param pattern RegExp pattern string
+     * @param flags  flag string
+     *
+     * @throws ParserException if invalid source or flags
+     */
+    // @SuppressWarnings({"unused"})
+    public static void validate(final String pattern, final String flags) throws ParserException {
+        instance.compile(pattern, flags);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpMatcher.java	Fri Feb 22 16:31:10 2013 +0100
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+import java.util.regex.MatchResult;
+
+/**
+ * Interface for matching a regular expression against a string and retrieving the
+ * match result. Extends {@link MatchResult}.
+ */
+public interface RegExpMatcher extends MatchResult {
+
+    /**
+     * Searches for pattern starting at {@code start}. Returns {@code true} if a match was found.
+     *
+     * @param start the start index in the input string
+     * @return {@code true} if a match was found
+     */
+    boolean search(int start);
+
+    /**
+     * Get the input string.
+     *
+     * @return the input string
+     */
+    String getInput();
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpResult.java	Fri Feb 22 16:31:10 2013 +0100
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+/**
+ * Match tuple to keep track of ongoing regexp match.
+ */
+public final class RegExpResult {
+    final Object[] groups;
+    final int      index;
+    final String   input;
+
+    /**
+     * Constructor
+     *
+     * @param input  regexp input
+     * @param index  index of match
+     * @param groups groups vector
+     */
+    public RegExpResult(final String input, final int index, final Object[] groups) {
+        this.input  = input;
+        this.index  = index;
+        this.groups = groups;
+    }
+
+    /**
+     * Get the groups for the match
+     * @return group vector
+     */
+    public Object[] getGroups() {
+        return groups;
+    }
+
+    /**
+     * Get the input for the map
+     * @return input
+     */
+    public String getInput() {
+        return input;
+    }
+
+    /**
+     * Get the index for the match
+     * @return index
+     */
+    public int getIndex() {
+        return index;
+    }
+
+    /**
+     * Get the length of the match
+     * @return length
+     */
+    public int length() {
+        return ((String)groups[0]).length();
+    }
+
+    /**
+     * Get the group with the given index or the empty string if group index is not valid.
+     * @param index the group index
+     * @return the group or ""
+     */
+    public Object getGroup(int index) {
+        return index >= 0 && index < groups.length ? groups[index] : "";
+    }
+
+    /**
+     * Get the last parenthesis group, or the empty string if none exists.
+     * @return the last group or ""
+     */
+    public Object getLastParen() {
+        return groups.length > 1 ? groups[groups.length - 1] : "";
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Fri Feb 22 16:31:10 2013 +0100
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.PatternSyntaxException;
+
+import jdk.nashorn.internal.parser.Scanner;
+import jdk.nashorn.internal.runtime.BitVector;
+
+/**
+ * Scan a JavaScript regexp, converting to Java regex if necessary.
+ *
+ */
+final class RegExpScanner extends Scanner {
+
+    /**
+     * String builder to accumulate the result - this contains verbatim parsed JavaScript.
+     * to get the java equivalent we need to create a Pattern token and return its toString()
+     */
+    private final StringBuilder sb;
+
+    /** Is this the special case of a regexp that never matches anything */
+    private boolean neverMatches;
+
+    /** The resulting java.util.regex pattern string. */
+    private String javaPattern;
+
+    /** Expected token table */
+    private final Map<Character, Integer> expected = new HashMap<>();
+
+    /** Capturing parenthesis that have been found so far. */
+    private final List<Capture> caps = new LinkedList<>();
+
+    /** Forward references to capturing parenthesis to be resolved later.*/
+    private final Map<Integer, Token> forwardReferences = new LinkedHashMap<>();
+
+    /** Current level of zero-width negative lookahead assertions. */
+    private int negativeLookaheadLevel;
+
+    private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?";
+
+    private static class Capture {
+        /**
+         * Zero-width negative lookaheads enclosing the capture.
+         */
+        private final int negativeLookaheadLevel;
+        /**
+         * Captures that live inside a negative lookahead are dead after the
+         * lookahead and will be undefined if referenced from outside.
+         */
+        private boolean isDead;
+
+        Capture(final int negativeLookaheadLevel) {
+            this.negativeLookaheadLevel = negativeLookaheadLevel;
+        }
+
+        public int getNegativeLookaheadLevel() {
+            return negativeLookaheadLevel;
+        }
+
+        public boolean isDead() {
+            return isDead;
+        }
+
+        public void setDead() {
+            this.isDead = true;
+        }
+    }
+
+    /**
+     * This is a token - the JavaScript regexp is scanned into a token tree
+     * A token has other tokens as children as well as "atoms", i.e. Strings.
+     */
+    private static class Token {
+
+        private enum Type {
+            PATTERN,
+            DISJUNCTION,
+            ALTERNATIVE,
+            TERM,
+            ASSERTION,
+            QUANTIFIER,
+            QUANTIFIER_PREFIX,
+            ATOM,
+            PATTERN_CHARACTER,
+            ATOM_ESCAPE,
+            CHARACTER_ESCAPE,
+            CONTROL_ESCAPE,
+            CONTROL_LETTER,
+            IDENTITY_ESCAPE,
+            DECIMAL_ESCAPE,
+            CHARACTERCLASS_ESCAPE,
+            CHARACTERCLASS,
+            CLASSRANGES,
+            NON_EMPTY_CLASSRANGES,
+            NON_EMPTY_CLASSRANGES_NODASH,
+            CLASSATOM,
+            CLASSATOM_NODASH,
+            CLASS_ESCAPE,
+            DECIMALDIGITS,
+            HEX_ESCAPESEQUENCE,
+            UNICODE_ESCAPESEQUENCE,
+        }
+
+        /**
+         * Token tyoe
+         */
+        private final Token.Type type;
+
+        /**
+         * Child nodes
+         */
+        private final List<Object> children;
+
+        /**
+         * Parent node
+         */
+        private Token parent;
+
+        /**
+         * Dead code flag
+         */
+        private boolean isDead;
+
+        private static final Map<Type, ToString> toStringMap = new HashMap<>();
+        private static final ToString DEFAULT_TOSTRING = new ToString();
+
+        private static String unicode(final int value) {
+            final StringBuilder sb = new StringBuilder();
+            final String hex = Integer.toHexString(value);
+            sb.append('u');
+            for (int i = 0; i < 4 - hex.length(); i++) {
+                sb.append('0');
+            }
+            sb.append(hex);
+
+            return sb.toString();
+        }
+
+        static {
+            toStringMap.put(Type.CHARACTERCLASS, new ToString() {
+                @Override
+                public String toString(final Token token) {
+                    return super.toString(token).replace("\\b", "\b");
+                }
+            });
+
+            // for some reason java regexps don't like control characters on the
+            // form "\\ca".match([string with ascii 1 at char0]). Translating
+            // them to unicode does it though.
+            toStringMap.put(Type.CHARACTER_ESCAPE, new ToString() {
+                @Override
+                public String toString(final Token token) {
+                    final String str = super.toString(token);
+                    if (str.length() == 2) {
+                        return Token.unicode(Character.toLowerCase(str.charAt(1)) - 'a' + 1);
+                    }
+                    return str;
+                }
+            });
+
+            toStringMap.put(Type.DECIMAL_ESCAPE, new ToString() {
+                @Override
+                public String toString(final Token token) {
+                    final String str = super.toString(token);
+
+                    if ("\0".equals(str)) {
+                        return str;
+                    }
+
+                    int value;
+
+                    if (!token.hasParentOfType(Type.CLASSRANGES)) {
+                        return str;
+                    }
+
+                    value = Integer.parseInt(str, 8); //throws exception that leads to SyntaxError if not octal
+                    if (value > 0xff) {
+                        throw new NumberFormatException(str);
+                    }
+
+                    return Token.unicode(value);
+                }
+            });
+
+        }
+
+        /**
+         * JavaScript Token to Java regex substring framework.
+         */
+        private static class ToString {
+            String toString(final Token token) {
+                final Object[] children = token.getChildren();
+
+                // Allow the installed regexp factory to perform global substitutions.
+                switch (children.length) {
+                    case 0:
+                        return "";
+                    case 1:
+                        return RegExpFactory.replace(children[0].toString());
+                    default:
+                        final StringBuilder sb = new StringBuilder();
+                        for (final Object child : children) {
+                            sb.append(child);
+                        }
+                        return RegExpFactory.replace(sb.toString());
+                }
+            }
+        }
+
+        /**
+         * Token iterator. Doesn't return "atom" children. i.e. string representations,
+         * just tokens
+         *
+         */
+        private static class TokenIterator implements Iterator<Token> {
+            private final List<Token> preorder;
+
+            private void init(final Token root) {
+                preorder.add(root);
+                for (final Object child : root.getChildren()) {
+                    if (child instanceof Token) {
+                        init((Token)child);
+                    }
+                }
+            }
+
+            TokenIterator(final Token root) {
+                preorder = new ArrayList<>();
+                init(root);
+            }
+
+            @Override
+            public boolean hasNext() {
+                return !preorder.isEmpty();
+            }
+
+            @Override
+            public Token next() {
+                return preorder.remove(0);
+            }
+
+            @Override
+            public void remove() {
+                next();
+            }
+        }
+
+        /**
+         * Constructor
+         * @param type the token type
+         */
+        Token(final Token.Type type) {
+            this.type = type;
+            children = new ArrayList<>();
+        }
+
+        /**
+         * Add a an "atom" child to a token
+         * @param child the child to add
+         * @return the token (for chaining)
+         */
+        public Token add(final String child) {
+            children.add(child);
+            return this;
+        }
+
+        /**
+         * Add a child to a token
+         * @param child the child
+         * @return the token (for chaining)
+         */
+        public Token add(final Token child) {
+            if (child != null) {
+                children.add(child);
+                child.setParent(this);
+            }
+            return this;
+        }
+
+        /**
+         * Remove a child from a token
+         * @param child the child to remove
+         * @return true if successful
+         */
+        public boolean remove(final Token child) {
+            return children.remove(child);
+        }
+
+        /**
+         * Remove the last child from a token
+         * @return the removed child
+         */
+        public Object removeLast() {
+            return children.remove(children.size() - 1);
+        }
+
+        /**
+         * Flag this token as dead code
+         * @param isDead is it dead or not
+         */
+        private void setIsDead(final boolean isDead) {
+            this.isDead = isDead;
+        }
+
+        /**
+         * Is this token dead code
+         * @return boolean
+         */
+        private boolean getIsDead() {
+            return isDead;
+        }
+
+        /**
+         * Get the parent of this token
+         * @return parent token
+         */
+        public Token getParent() {
+            return parent;
+        }
+
+        public boolean hasParentOfType(final Token.Type parentType) {
+            for (Token p = getParent(); p != null; p = p.getParent()) {
+                if (p.getType() == parentType) {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        public boolean hasChildOfType(final Token.Type childType) {
+            for (final Iterator<Token> iter = iterator() ; iter.hasNext() ; ) {
+                if (iter.next().getType() == childType) {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        /**
+         * Set the parent of this token
+         * @param parent
+         */
+        private void setParent(final Token parent) {
+            this.parent = parent;
+        }
+
+        /**
+         * Get the children of this token
+         * @return an array of children, never null
+         */
+        public Object[] getChildren() {
+            return children.toArray();
+        }
+
+        /**
+         * Reset this token, remove all children
+         */
+        public void reset() {
+            children.clear();
+        }
+
+        /**
+         * Get a preorder token iterator with this token as root
+         * @return an iterator
+         */
+        public Iterator<Token> iterator() {
+            return new TokenIterator(this);
+        }
+
+        /**
+         * Get the type of this token
+         * @return type
+         */
+        public Type getType() {
+            return type;
+        }
+
+        /**
+         * Turn this token into Java regexp compatible text
+         * @return part of a java regexp
+         */
+        @Override
+        public String toString() {
+            ToString t = toStringMap.get(getType());
+            if (t == null) {
+                t = DEFAULT_TOSTRING;
+            }
+            return t.toString(this);
+        }
+    }
+
+    /**
+     * Constructor
+     * @param string the JavaScript regexp to parse
+     */
+    private RegExpScanner(final String string) {
+        super(string);
+        sb = new StringBuilder(limit);
+        reset(0);
+        expected.put(']', 0);
+        expected.put('}', 0);
+    }
+
+    private void processForwardReferences() {
+        if (neverMatches()) {
+            return;
+        }
+
+        for (final Map.Entry<Integer, Token> fwdRef : forwardReferences.entrySet()) {
+            if (fwdRef.getKey().intValue() > caps.size()) {
+                neverMatches = true;
+                break;
+            }
+
+            fwdRef.getValue().setIsDead(true);
+        }
+
+        forwardReferences.clear();
+    }
+
+    /**
+     * Scan a JavaScript regexp string returning a Java safe regex string.
+     *
+     * @param string
+     *            JavaScript regexp string.
+     * @return Java safe regex string.
+     */
+    public static RegExpScanner scan(final String string) {
+        final RegExpScanner scanner = new RegExpScanner(string);
+
+        Token pattern;
+
+        try {
+            pattern = scanner.pattern();
+        } catch (final Exception e) {
+            throw new PatternSyntaxException(e.getMessage(), string, scanner.sb.length());
+        }
+
+        scanner.processForwardReferences();
+        if (scanner.neverMatches()) {
+            return null; // never matches
+        }
+
+        // go over the code and remove dead code
+        final Iterator<Token> iter = pattern.iterator();
+        while (iter.hasNext()) {
+            final Token next = iter.next();
+            if (next.getIsDead()) {
+                next.getParent().remove(next);
+            }
+        }
+
+        // turn the pattern into a string, p, the java equivalent string for our js regexp
+        final String p = pattern.toString();
+        // if builder contains all tokens that were sent in, we know
+        // we correctly parsed the entire JavaScript regexp without syntax errors
+        if (!string.equals(scanner.getStringBuilder().toString())) {
+            throw new PatternSyntaxException(string, p, p.length() + 1);
+        }
+
+        scanner.javaPattern = p;
+        return scanner;
+     }
+
+    /**
+     * Does this regexp ever match anything? Use of e.g. [], which is legal in JavaScript,
+     * is an example where we never match
+     *
+     * @return boolean
+     */
+    private boolean neverMatches() {
+        return neverMatches;
+    }
+
+    final StringBuilder getStringBuilder() {
+        return sb;
+    }
+
+    String getJavaPattern() {
+        return javaPattern;
+    }
+
+    BitVector getGroupsInNegativeLookahead() {
+        BitVector vec = null;
+        for (int i = 0; i < caps.size(); i++) {
+            final Capture cap = caps.get(i);
+            if (cap.getNegativeLookaheadLevel() > 0) {
+                if (vec == null) {
+                    vec = new BitVector(caps.size() + 1);
+                }
+                vec.set(i + 1);
+            }
+        }
+        return vec;
+    }
+
+    /**
+     * Commit n characters to the builder and to a given token
+     * @param token Uncommitted token.
+     * @param n     Number of characters.
+     * @return Committed token
+     */
+    private Token commit(final Token token, final int n) {
+        final int startIn = position;
+
+        switch (n) {
+        case 1:
+            sb.append(ch0);
+            skip(1);
+            break;
+        case 2:
+            sb.append(ch0);
+            sb.append(ch1);
+            skip(2);
+            break;
+        case 3:
+            sb.append(ch0);
+            sb.append(ch1);
+            sb.append(ch2);
+            skip(3);
+            break;
+        default:
+            assert false : "Should not reach here";
+        }
+
+        if (token == null) {
+            return null;
+        }
+
+        return token.add(sb.substring(startIn, sb.length()));
+    }
+
+    /**
+     * Restart the buffers back at an earlier position.
+     *
+     * @param startIn
+     *            Position in the input stream.
+     * @param startOut
+     *            Position in the output stream.
+     */
+    private void restart(final int startIn, final int startOut) {
+        reset(startIn);
+        sb.setLength(startOut);
+    }
+
+    private void push(final char ch) {
+        expected.put(ch, expected.get(ch) + 1);
+    }
+
+    private void pop(final char ch) {
+        expected.put(ch, Math.min(0, expected.get(ch) - 1));
+    }
+
+    /*
+     * Recursive descent tokenizer starts below.
+     */
+
+    /*
+     * Pattern ::
+     *      Disjunction
+     */
+    private Token pattern() {
+        final Token token = new Token(Token.Type.PATTERN);
+
+        final Token child = disjunction();
+        return token.add(child);
+    }
+
+    /*
+     * Disjunction ::
+     *      Alternative
+     *      Alternative | Disjunction
+     */
+    private Token disjunction() {
+        final Token token = new Token(Token.Type.DISJUNCTION);
+
+        while (true) {
+            token.add(alternative());
+
+            if (ch0 == '|') {
+                commit(token, 1);
+            } else {
+                break;
+            }
+        }
+
+        return token;
+    }
+
+    /*
+     * Alternative ::
+     *      [empty]
+     *      Alternative Term
+     */
+    private Token alternative() {
+        final Token token = new Token(Token.Type.ALTERNATIVE);
+
+        Token child;
+        while ((child = term()) != null) {
+            token.add(child);
+        }
+
+        return token;
+    }
+
+    /*
+     * Term ::
+     *      Assertion
+     *      Atom
+     *      Atom Quantifier
+     */
+    private Token term() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.TERM);
+        Token child;
+
+        child = assertion();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        child = atom();
+        if (child != null) {
+            boolean emptyCharacterClass = false;
+            if ("[]".equals(child.toString())) {
+                emptyCharacterClass = true;
+            }
+
+            token.add(child);
+
+            final Token quantifier = quantifier();
+            if (quantifier != null) {
+                token.add(quantifier);
+            }
+
+            if (emptyCharacterClass) {
+                if (quantifier == null) {
+                    neverMatches = true; //never matches ever.
+                } else {
+                    //if we can get away with max zero, remove this entire token
+                    final String qs = quantifier.toString();
+                    if ("+".equals(qs) || "*".equals(qs) || qs.startsWith("{0,")) {
+                        token.setIsDead(true);
+                    }
+                }
+            }
+
+            return token;
+        }
+
+        restart(startIn, startOut);
+        return null;
+    }
+
+    /*
+     * Assertion ::
+     *      ^
+     *      $
+     *      \b
+     *      \B
+     *      ( ? = Disjunction )
+     *      ( ? ! Disjunction )
+     */
+    private Token assertion() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.ASSERTION);
+
+        switch (ch0) {
+        case '^':
+        case '$':
+            return commit(token, 1);
+
+        case '\\':
+            if (ch1 == 'b' || ch1 == 'B') {
+                return commit(token, 2);
+            }
+            break;
+
+        case '(':
+            if (ch1 != '?') {
+                break;
+            }
+            if (ch2 != '=' && ch2 != '!') {
+                break;
+            }
+            final boolean isNegativeLookahead = (ch2 == '!');
+            commit(token, 3);
+
+            if (isNegativeLookahead) {
+                negativeLookaheadLevel++;
+            }
+            final Token disjunction = disjunction();
+            if (isNegativeLookahead) {
+                for (final Capture cap : caps) {
+                    if (cap.getNegativeLookaheadLevel() >= negativeLookaheadLevel) {
+                        cap.setDead();
+                    }
+                }
+                negativeLookaheadLevel--;
+            }
+
+            if (disjunction != null && ch0 == ')') {
+                token.add(disjunction);
+                return commit(token, 1);
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        restart(startIn, startOut);
+
+        return null;
+    }
+
+    /*
+     * Quantifier ::
+     *      QuantifierPrefix
+     *      QuantifierPrefix ?
+     */
+    private Token quantifier() {
+        final Token token = new Token(Token.Type.QUANTIFIER);
+        final Token child = quantifierPrefix();
+        if (child != null) {
+            token.add(child);
+            if (ch0 == '?') {
+                commit(token, 1);
+            }
+            return token;
+        }
+        return null;
+    }
+
+    /*
+     * QuantifierPrefix ::
+     *      *
+     *      +
+     *      ?
+     *      { DecimalDigits }
+     *      { DecimalDigits , }
+     *      { DecimalDigits , DecimalDigits }
+     */
+    private Token quantifierPrefix() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.QUANTIFIER_PREFIX);
+
+        switch (ch0) {
+        case '*':
+        case '+':
+        case '?':
+            return commit(token, 1);
+
+        case '{':
+            commit(token, 1);
+
+            final Token child = decimalDigits();
+            if (child == null) {
+                break; // not a quantifier - back out
+            }
+            push('}');
+            token.add(child);
+
+            if (ch0 == ',') {
+                commit(token, 1);
+                token.add(decimalDigits());
+            }
+
+            if (ch0 == '}') {
+                pop('}');
+                commit(token, 1);
+            }
+
+            return token;
+
+        default:
+            break;
+        }
+
+        restart(startIn, startOut);
+        return null;
+    }
+
+    /*
+     * Atom ::
+     *      PatternCharacter
+     *      .
+     *      \ AtomEscape
+     *      CharacterClass
+     *      ( Disjunction )
+     *      ( ? : Disjunction )
+     *
+     */
+    private Token atom() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.ATOM);
+        Token child;
+
+        child = patternCharacter();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        if (ch0 == '.') {
+            return commit(token, 1);
+        }
+
+        if (ch0 == '\\') {
+            commit(token, 1);
+            child = atomEscape();
+
+            if (child != null) {
+                if (child.hasChildOfType(Token.Type.IDENTITY_ESCAPE)) {
+                    final char idEscape = child.toString().charAt(0);
+                    if (NON_IDENT_ESCAPES.indexOf(idEscape) == -1) {
+                        token.reset();
+                    }
+                }
+
+                token.add(child);
+
+                // forward backreferences always match empty. JavaScript != Java
+                if (child.hasChildOfType(Token.Type.DECIMAL_ESCAPE) && !"\u0000".equals(child.toString())) {
+                    final int refNum = Integer.parseInt(child.toString());
+
+                    if (refNum - 1 < caps.size() && caps.get(refNum - 1).isDead()) {
+                        // reference to dead in-negative-lookahead capture
+                        token.setIsDead(true);
+                    } else if (caps.size() < refNum) {
+                        // forward reference: always matches against empty string (dead token).
+                        // invalid reference (non-existant capture): pattern never matches.
+                        forwardReferences.put(refNum, token);
+                    }
+                }
+
+                return token;
+            }
+        }
+
+        child = characterClass();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        if (ch0 == '(') {
+            boolean capturingParens = true;
+            commit(token, 1);
+            if (ch0 == '?' && ch1 == ':') {
+                capturingParens = false;
+                commit(token, 2);
+            }
+
+            child = disjunction();
+            if (child != null) {
+                token.add(child);
+                if (ch0 == ')') {
+                    final Token atom = commit(token, 1);
+                    if (capturingParens) {
+                        caps.add(new Capture(negativeLookaheadLevel));
+                    }
+                    return atom;
+                }
+            }
+        }
+
+        restart(startIn, startOut);
+        return null;
+    }
+
+    /*
+     * PatternCharacter ::
+     *      SourceCharacter but not any of: ^$\.*+?()[]{}|
+     */
+    @SuppressWarnings("fallthrough")
+    private Token patternCharacter() {
+        if (atEOF()) {
+            return null;
+        }
+
+        switch (ch0) {
+        case '^':
+        case '$':
+        case '\\':
+        case '.':
+        case '*':
+        case '+':
+        case '?':
+        case '(':
+        case ')':
+        case '[':
+        case '|':
+            return null;
+
+        case '}':
+        case ']':
+            final int n = expected.get(ch0);
+            if (n != 0) {
+                return null;
+            }
+
+       case '{':
+           // if not a valid quantifier escape curly brace to match itself
+           // this ensures compatibility with other JS implementations
+           final Token quant = quantifierPrefix();
+           return (quant == null) ? commit(new Token(Token.Type.PATTERN_CHARACTER).add("\\"), 1) : null;
+
+        default:
+            return commit(new Token(Token.Type.PATTERN_CHARACTER), 1); // SOURCECHARACTER
+        }
+    }
+
+    /*
+     * AtomEscape ::
+     *      DecimalEscape
+     *      CharacterEscape
+     *      CharacterClassEscape
+     */
+    private Token atomEscape() {
+        final Token token = new Token(Token.Type.ATOM_ESCAPE);
+        Token child;
+
+        child = decimalEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        child = characterClassEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        child = characterEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+
+        return null;
+    }
+
+    /*
+     * CharacterEscape ::
+     *      ControlEscape
+     *      c ControlLetter
+     *      HexEscapeSequence
+     *      UnicodeEscapeSequence
+     *      IdentityEscape
+     */
+    private Token characterEscape() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+
+        final Token token = new Token(Token.Type.CHARACTER_ESCAPE);
+        Token child;
+
+        child = controlEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        if (ch0 == 'c') {
+            commit(token, 1);
+            child = controlLetter();
+            if (child != null) {
+                return token.add(child);
+            }
+            restart(startIn, startOut);
+        }
+
+        child = hexEscapeSequence();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        child = unicodeEscapeSequence();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        child = identityEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        restart(startIn, startOut);
+
+        return null;
+    }
+
+    private boolean scanEscapeSequence(final char leader, final int length, final Token token) {
+        final int startIn  = position;
+        final int startOut = sb.length();
+
+        if (ch0 != leader) {
+            return false;
+        }
+
+        commit(token, 1);
+        for (int i = 0; i < length; i++) {
+            final char ch0l = Character.toLowerCase(ch0);
+            if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {
+                commit(token, 1);
+            } else {
+                restart(startIn, startOut);
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    private Token hexEscapeSequence() {
+        final Token token = new Token(Token.Type.HEX_ESCAPESEQUENCE);
+        if (scanEscapeSequence('x', 2, token)) {
+            return token;
+        }
+        return null;
+    }
+
+    private Token unicodeEscapeSequence() {
+        final Token token = new Token(Token.Type.UNICODE_ESCAPESEQUENCE);
+        if (scanEscapeSequence('u', 4, token)) {
+            return token;
+        }
+        return null;
+    }
+
+    /*
+     * ControlEscape ::
+     *      one of fnrtv
+     */
+    private Token controlEscape() {
+        switch (ch0) {
+        case 'f':
+        case 'n':
+        case 'r':
+        case 't':
+        case 'v':
+            return commit(new Token(Token.Type.CONTROL_ESCAPE), 1);
+
+        default:
+            return null;
+        }
+    }
+
+    /*
+     * ControlLetter ::
+     *      one of abcdefghijklmnopqrstuvwxyz
+     *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
+     */
+    private Token controlLetter() {
+        final char c = Character.toUpperCase(ch0);
+        if (c >= 'A' && c <= 'Z') {
+            final Token token = new Token(Token.Type.CONTROL_LETTER);
+            commit(token, 1);
+            return token;
+        }
+        return null;
+        /*
+        Token token = new Token(Token.Type.CONTROL_LETTER);
+        commit(null, 1);//add original char to builder not to token
+        this.neverMatches = c < 'A' || c > 'Z';
+        return token.add(""+c);*/
+    }
+
+    /*
+     * IdentityEscape ::
+     *      SourceCharacter but not IdentifierPart
+     *      <ZWJ>  (200c)
+     *      <ZWNJ> (200d)
+     */
+    private Token identityEscape() {
+        final Token token = new Token(Token.Type.IDENTITY_ESCAPE);
+        commit(token, 1);
+        return token;
+    }
+
+    /*
+     * DecimalEscape ::
+     *      DecimalIntegerLiteral [lookahead DecimalDigit]
+     */
+    private Token decimalEscape() {
+        final Token token = new Token(Token.Type.DECIMAL_ESCAPE);
+        final int startIn  = position;
+        final int startOut = sb.length();
+
+        if (ch0 == '0' && !isDecimalDigit(ch1)) {
+            commit(token, 1);
+            token.removeLast();
+            //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);
+            return token.add("\u0000");
+        }
+
+        if (isDecimalDigit(ch0)) {
+            while (isDecimalDigit(ch0)) {
+                commit(token, 1);
+            }
+            return token;
+        }
+
+        restart(startIn, startOut);
+
+        return null;
+    }
+
+    /*
+     * CharacterClassEscape ::
+     *  one of dDsSwW
+     */
+    private Token characterClassEscape() {
+        switch (ch0) {
+        case 's':
+        case 'S':
+        case 'd':
+        case 'D':
+        case 'w':
+        case 'W':
+            return commit(new Token(Token.Type.CHARACTERCLASS_ESCAPE), 1);
+
+        default:
+            return null;
+        }
+    }
+
+    /*
+     * CharacterClass ::
+     *      [ [lookahead {^}] ClassRanges ]
+     *      [ ^ ClassRanges ]
+     */
+    private Token characterClass() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.CHARACTERCLASS);
+
+        if (ch0 == '[') {
+            push(']');
+            commit(token, 1);
+
+            if (ch0 == '^') {
+                commit(token, 1);
+            }
+
+            final Token child = classRanges();
+            if (child != null && ch0 == ']') {
+                pop(']');
+                token.add(child);
+                return commit(token, 1);
+            }
+        }
+
+        restart(startIn, startOut);
+        return null;
+    }
+
+    /*
+     * ClassRanges ::
+     *      [empty]
+     *      NonemptyClassRanges
+     */
+    private Token classRanges() {
+        return new Token(Token.Type.CLASSRANGES).add(nonemptyClassRanges());
+    }
+
+    /*
+     * NonemptyClassRanges ::
+     *      ClassAtom
+     *      ClassAtom NonemptyClassRangesNoDash
+     *      ClassAtom - ClassAtom ClassRanges
+     */
+    private Token nonemptyClassRanges() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES);
+        Token child;
+
+        child = classAtom();
+        if (child != null) {
+            token.add(child);
+
+            if (ch0 == '-') {
+                commit(token, 1);
+
+                final Token child1 = classAtom();
+                final Token child2 = classRanges();
+                if (child1 != null && child2 != null) {
+                    token.add(child1);
+                    token.add(child2);
+
+                    return token;
+                }
+            }
+
+            child = nonemptyClassRangesNoDash();
+            if (child != null) {
+                token.add(child);
+                return token;
+            }
+
+            return token;
+        }
+
+        restart(startIn, startOut);
+        return null;
+    }
+
+    /*
+     * NonemptyClassRangesNoDash ::
+     *      ClassAtom
+     *      ClassAtomNoDash NonemptyClassRangesNoDash
+     *      ClassAtomNoDash - ClassAtom ClassRanges
+     */
+    private Token nonemptyClassRangesNoDash() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES_NODASH);
+        Token child;
+
+        child = classAtomNoDash();
+        if (child != null) {
+            token.add(child);
+
+            // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom
+            if (ch0 == '-') {
+               commit(token, 1);
+
+               final Token child1 = classAtom();
+               final Token child2 = classRanges();
+               if (child1 != null && child2 != null) {
+                   token.add(child1);
+                   return token.add(child2);
+               }
+               //fallthru
+           }
+
+            child = nonemptyClassRangesNoDash();
+            if (child != null) {
+                token.add(child);
+            }
+            return token; // still a class atom
+        }
+
+        child = classAtom();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        restart(startIn, startOut);
+        return null;
+    }
+
+    /*
+     * ClassAtom : - ClassAtomNoDash
+     */
+    private Token classAtom() {
+        final Token token = new Token(Token.Type.CLASSATOM);
+
+        if (ch0 == '-') {
+            return commit(token, 1);
+        }
+
+        final Token child = classAtomNoDash();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        return null;
+    }
+
+    /*
+     * ClassAtomNoDash ::
+     *      SourceCharacter but not one of \ or ] or -
+     *      \ ClassEscape
+     */
+    private Token classAtomNoDash() {
+        final int startIn  = position;
+        final int startOut = sb.length();
+        final Token token  = new Token(Token.Type.CLASSATOM_NODASH);
+
+        switch (ch0) {
+        case ']':
+        case '-':
+        case '\0':
+            return null;
+
+        case '[':
+            // unescaped left square bracket - add escape
+            return commit(token.add("\\"), 1);
+
+        case '\\':
+            commit(token, 1);
+            final Token child = classEscape();
+            if (child != null) {
+                return token.add(child);
+            }
+
+            restart(startIn, startOut);
+            return null;
+
+        default:
+            return commit(token, 1);
+        }
+    }
+
+    /*
+     * ClassEscape ::
+     *      DecimalEscape
+     *      b
+     *      CharacterEscape
+     *      CharacterClassEscape
+     */
+    private Token classEscape() {
+        final Token token = new Token(Token.Type.CLASS_ESCAPE);
+        Token child;
+
+        child = decimalEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        if (ch0 == 'b') {
+            return commit(token, 1);
+        }
+
+        child = characterEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        child = characterClassEscape();
+        if (child != null) {
+            return token.add(child);
+        }
+
+        return null;
+    }
+
+    /*
+     * DecimalDigits
+     */
+    private Token decimalDigits() {
+        if (!isDecimalDigit(ch0)) {
+            return null;
+        }
+
+        final Token token = new Token(Token.Type.DECIMALDIGITS);
+        while (isDecimalDigit(ch0)) {
+            commit(token, 1);
+        }
+
+        return token;
+    }
+
+    private static boolean isDecimalDigit(final char ch) {
+        return ch >= '0' && ch <= '9';
+    }
+}