8006028: Integrate Joni regexp engine with Nashorn
Reviewed-by: lagergren, attila
--- a/nashorn/THIRD_PARTY_README Fri Feb 22 16:31:10 2013 +0100
+++ b/nashorn/THIRD_PARTY_README Fri Feb 22 17:00:22 2013 +0100
@@ -98,3 +98,26 @@
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- end of LICENSE ---
+
+%% This notice is provided with respect to Joni library which is included
+with the Nashorn technology.
+
+--- begin of LICENSE ---
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+--- end of LICENSE ---
--- a/nashorn/docs/DEVELOPER_README Fri Feb 22 16:31:10 2013 +0100
+++ b/nashorn/docs/DEVELOPER_README Fri Feb 22 17:00:22 2013 +0100
@@ -338,6 +338,15 @@
this system property.
+SYSTEM_PROPERTY: nashorn.regexp.impl=[jdk|joni]
+
+This property defines the regular expression engine to be used by
+Nashorn. The default implementation is "jdk" which is based on the
+JDK's java.util.regex package. Set this property to "joni" to install
+an implementation based on Joni, the regular expression engine used by
+the JRuby project.
+
+
===============
2. The loggers.
===============
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/JoniRegExp.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.nashorn.internal.runtime.regexp;
+
+import jdk.nashorn.internal.runtime.ParserException;
+import jdk.nashorn.internal.runtime.regexp.joni.Matcher;
+import jdk.nashorn.internal.runtime.regexp.joni.Option;
+import jdk.nashorn.internal.runtime.regexp.joni.Regex;
+import jdk.nashorn.internal.runtime.regexp.joni.Region;
+import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.JOniException;
+
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * Regular expression implementation based on the Joni engine from the JRuby project.
+ */
+public class JoniRegExp extends RegExp {
+
+ /** Compiled Joni Regex */
+ private Regex regex;
+
+ /** Matcher */
+ private RegExpMatcher matcher;
+
+ /**
+ * Construct a Regular expression from the given {@code pattern} and {@code flags} strings.
+ *
+ * @param pattern RegExp pattern string
+ * @param flags RegExp flag string
+ * @throws ParserException if flags is invalid or pattern string has syntax error.
+ */
+ public JoniRegExp(final String pattern, final String flags) throws ParserException {
+ super(pattern, flags);
+
+ int option = Option.SINGLELINE;
+
+ if (this.isIgnoreCase()) {
+ option |= Option.IGNORECASE;
+ }
+ if (this.isMultiline()) {
+ option &= ~Option.SINGLELINE;
+ option |= Option.NEGATE_SINGLELINE;
+ }
+
+ try {
+ RegExpScanner parsed;
+
+ try {
+ parsed = RegExpScanner.scan(pattern);
+ } catch (final PatternSyntaxException e) {
+ // refine the exception with a better syntax error, if this
+ // passes, just rethrow what we have
+ Pattern.compile(pattern, 0);
+ throw e;
+ }
+
+ if (parsed != null) {
+ char[] javaPattern = parsed.getJavaPattern().toCharArray();
+ this.regex = new Regex(javaPattern, 0, javaPattern.length, option, Syntax.JAVASCRIPT);
+ this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
+ }
+ } catch (final PatternSyntaxException e2) {
+ throwParserException("syntax", e2.getMessage());
+ } catch (JOniException e2) {
+ throwParserException("syntax", e2.getMessage());
+ }
+ }
+
+ @Override
+ public RegExpMatcher match(final String input) {
+ if (regex == null) {
+ return null;
+ }
+
+ RegExpMatcher matcher = this.matcher;
+
+ if (matcher == null || input != matcher.getInput()) {
+ matcher = new JoniMatcher(input);
+ this.matcher = matcher;
+ }
+
+ return matcher;
+ }
+
+ /**
+ * RegExp Factory class for Joni regexp engine.
+ */
+ public static class Factory extends RegExpFactory {
+
+ @Override
+ protected RegExp compile(final String pattern, final String flags) throws ParserException {
+ return new JoniRegExp(pattern, flags);
+ }
+
+ @Override
+ protected String replaceToken(final String str) {
+ return str.equals("[^]") ? "[\\s\\S]" : str;
+ }
+ }
+
+ class JoniMatcher implements RegExpMatcher {
+ final String input;
+ final Matcher matcher;
+
+ JoniMatcher(final String input) {
+ this.input = input;
+ this.matcher = regex.matcher(input.toCharArray());
+ }
+
+ @Override
+ public boolean search(final int start) {
+ return matcher.search(start, input.length(), Option.NONE) > -1;
+ }
+
+ @Override
+ public String getInput() {
+ return input;
+ }
+
+ @Override
+ public int start() {
+ return matcher.getBegin();
+ }
+
+ @Override
+ public int start(final int group) {
+ return group == 0 ? start() : matcher.getRegion().beg[group];
+ }
+
+ @Override
+ public int end() {
+ return matcher.getEnd();
+ }
+
+ @Override
+ public int end(final int group) {
+ return group == 0 ? end() : matcher.getRegion().end[group];
+ }
+
+ @Override
+ public String group() {
+ return input.substring(matcher.getBegin(), matcher.getEnd());
+ }
+
+ @Override
+ public String group(final int group) {
+ if (group == 0) {
+ return group();
+ }
+ final Region region = matcher.getRegion();
+ return input.substring(region.beg[group], region.end[group]);
+ }
+
+ @Override
+ public int groupCount() {
+ final Region region = matcher.getRegion();
+ return region == null ? 0 : region.numRegs - 1;
+ }
+ }
+}
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java Fri Feb 22 16:31:10 2013 +0100
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java Fri Feb 22 17:00:22 2013 +0100
@@ -27,14 +27,34 @@
import jdk.nashorn.internal.parser.Lexer;
import jdk.nashorn.internal.runtime.ParserException;
+import jdk.nashorn.internal.runtime.options.Options;
/**
* Factory class for regular expressions. This class creates instances of {@link DefaultRegExp}.
+ * An alternative factory can be installed using the {@code nashorn.regexp.impl} system property.
*/
public class RegExpFactory {
- private final static RegExpFactory instance = new RegExpFactory();
+ private final static RegExpFactory instance;
+
+ private final static String JDK = "jdk";
+ private final static String JONI = "joni";
+
+ static {
+ final String impl = Options.getStringProperty("nashorn.regexp.impl", JDK);
+ switch (impl) {
+ case JONI:
+ instance = new JoniRegExp.Factory();
+ break;
+ case JDK:
+ instance = new RegExpFactory();
+ break;
+ default:
+ instance = null;
+ throw new InternalError("Unsupported RegExp factory: " + impl);
+ }
+ }
/**
* Creates a Regular expression from the given {@code pattern} and {@code flags} strings.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Analyser.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,2162 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll;
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsClear;
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt;
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAtSimple;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline;
+import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode;
+import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import java.util.HashSet;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.Ptr;
+
+final class Analyser extends Parser {
+
+ protected Analyser(ScanEnvironment env, char[] chars, int p, int end) {
+ super(env, chars, p, end);
+ }
+
+ protected final void compile() {
+ regex.state = RegexState.COMPILING;
+
+ if (Config.DEBUG) {
+ Config.log.println(new String(chars, getBegin(), getEnd()));
+ }
+
+ reset();
+
+ regex.numMem = 0;
+ regex.numRepeat = 0;
+ regex.numNullCheck = 0;
+ //regex.repeatRangeAlloc = 0;
+ regex.repeatRangeLo = null;
+ regex.repeatRangeHi = null;
+ regex.numCombExpCheck = 0;
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) regex.numCombExpCheck = 0;
+
+ parse();
+
+ if (Config.USE_NAMED_GROUP) {
+ /* mixed use named group and no-named group */
+ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) {
+ if (env.numNamed != env.numMem) {
+ root = disableNoNameGroupCapture(root);
+ } else {
+ numberedRefCheck(root);
+ }
+ }
+ } // USE_NAMED_GROUP
+
+ if (Config.USE_NAMED_GROUP) {
+ if (env.numCall > 0) {
+ env.unsetAddrList = new UnsetAddrList(env.numCall);
+ setupSubExpCall(root);
+ // r != 0 ???
+ subexpRecursiveCheckTrav(root);
+ // r < 0 -< err, FOUND_CALLED_NODE = 1
+ subexpInfRecursiveCheckTrav(root);
+ // r != 0 recursion infinite ???
+ regex.numCall = env.numCall;
+ } else {
+ regex.numCall = 0;
+ }
+ } // USE_NAMED_GROUP
+
+ if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) {
+ Config.log.println("<RAW TREE>");
+ Config.log.println(root + "\n");
+ }
+
+ root = setupTree(root, 0);
+ if (Config.DEBUG_PARSE_TREE) {
+ if (Config.DEBUG_PARSE_TREE_RAW) Config.log.println("<TREE>");
+ root.verifyTree(new HashSet<Node>(), env.reg.warnings);
+ Config.log.println(root + "\n");
+ }
+
+ regex.captureHistory = env.captureHistory;
+ regex.btMemStart = env.btMemStart;
+ regex.btMemEnd = env.btMemEnd;
+
+ if (isFindCondition(regex.options)) {
+ regex.btMemEnd = bsAll();
+ } else {
+ regex.btMemEnd = env.btMemEnd;
+ regex.btMemEnd |= regex.captureHistory;
+ }
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) {
+ setupCombExpCheck(root, 0);
+
+ if (Config.USE_SUBEXP_CALL && env.hasRecursion) {
+ env.numCombExpCheck = 0;
+ } else { // USE_SUBEXP_CALL
+ if (env.combExpMaxRegNum > 0) {
+ for (int i=1; i<env.combExpMaxRegNum; i++) {
+ if (bsAt(env.backrefedMem, i)) {
+ env.numCombExpCheck = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ } // USE_SUBEXP_CALL
+ regex.numCombExpCheck = env.numCombExpCheck;
+ } // USE_COMBINATION_EXPLOSION_CHECK
+
+ regex.clearOptimizeInfo();
+
+ if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root);
+
+ env.memNodes = null;
+
+ new ArrayCompiler(this).compile();
+ //new AsmCompiler(this).compile();
+
+ if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
+ regex.stackPopLevel = StackPopLevel.ALL;
+ } else {
+ if (regex.btMemStart != 0) {
+ regex.stackPopLevel = StackPopLevel.MEM_START;
+ } else {
+ regex.stackPopLevel = StackPopLevel.FREE;
+ }
+ }
+
+ if (Config.DEBUG_COMPILE) {
+ if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString());
+ Config.log.println("stack used: " + regex.stackNeeded);
+ if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n");
+ Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
+
+ } // DEBUG_COMPILE
+
+ regex.state = RegexState.NORMAL;
+ }
+
+ private void noNameDisableMapFor_cosAlt(Node node, int[]map, Ptr counter) {
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ can.setCar(noNameDisableMap(can.car, map, counter));
+ } while ((can = can.cdr) != null);
+ }
+
+ private void noNameDisableMapFor_quantifier(Node node, int[]map, Ptr counter) {
+ QuantifierNode qn = (QuantifierNode)node;
+ Node target = qn.target;
+ Node old = target;
+ target = noNameDisableMap(target, map, counter);
+
+ if (target != old) {
+ qn.setTarget(target);
+ if (target.getType() == NodeType.QTFR) qn.reduceNestedQuantifier((QuantifierNode)target);
+ }
+ }
+
+ private Node noNameDisableMapFor_enclose(Node node, int[]map, Ptr counter) {
+ EncloseNode en = (EncloseNode)node;
+ if (en.type == EncloseType.MEMORY) {
+ if (en.isNamedGroup()) {
+ counter.p++;
+ map[en.regNum] = counter.p;
+ en.regNum = counter.p;
+ //en.target = noNameDisableMap(en.target, map, counter);
+ en.setTarget(noNameDisableMap(en.target, map, counter)); // ???
+ } else {
+ node = en.target;
+ en.target = null; // remove first enclose: /(a)(?<b>c)/
+ node = noNameDisableMap(node, map, counter);
+ }
+ } else {
+ //en.target = noNameDisableMap(en.target, map, counter);
+ en.setTarget(noNameDisableMap(en.target, map, counter)); // ???
+ }
+ return node;
+ }
+
+ private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) {
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorNode.PREC_READ:
+ case AnchorNode.PREC_READ_NOT:
+ case AnchorNode.LOOK_BEHIND:
+ case AnchorNode.LOOK_BEHIND_NOT:
+ an.setTarget(noNameDisableMap(an.target, map, counter));
+ }
+ }
+
+ private Node noNameDisableMap(Node node, int[]map, Ptr counter) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ noNameDisableMapFor_cosAlt(node, map, counter);
+ break;
+ case NodeType.QTFR:
+ noNameDisableMapFor_quantifier(node, map, counter);
+ break;
+ case NodeType.ENCLOSE:
+ node = noNameDisableMapFor_enclose(node, map, counter);
+ break;
+ case NodeType.ANCHOR:
+ noNameDisableMapFor_anchor(node, map, counter);
+ break;
+ } // switch
+ return node;
+ }
+
+ private void renumberByMap(Node node, int[]map) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ renumberByMap(can.car, map);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ renumberByMap(((QuantifierNode)node).target, map);
+ break;
+
+ case NodeType.ENCLOSE:
+ renumberByMap(((EncloseNode)node).target, map);
+ break;
+
+ case NodeType.BREF:
+ ((BackRefNode)node).renumber(map);
+ break;
+ } // switch
+ }
+
+ protected final void numberedRefCheck(Node node) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ numberedRefCheck(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ numberedRefCheck(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ENCLOSE:
+ numberedRefCheck(((EncloseNode)node).target);
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (!br.isNameRef()) newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ break;
+ } // switch
+ }
+
+ protected final Node disableNoNameGroupCapture(Node root) {
+ int[]map = new int[env.numMem + 1];
+
+ for (int i=1; i<=env.numMem; i++) map[i] = 0;
+
+ root = noNameDisableMap(root, map, new Ptr(0));
+ renumberByMap(root, map);
+
+ for (int i=1, pos=1; i<=env.numMem; i++) {
+ if (map[i] > 0) {
+ env.memNodes[pos] = env.memNodes[i];
+ pos++;
+ }
+ }
+
+ int loc = env.captureHistory;
+ env.captureHistory = bsClear();
+
+ for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (bsAt(loc, i)) {
+ env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]);
+ }
+ }
+
+ env.numMem = env.numNamed;
+ regex.numMem = env.numNamed;
+
+ regex.renumberNameTable(map);
+
+ return root;
+ }
+
+ private void swap(Node a, Node b) {
+ a.swap(b);
+
+ if (root == b) {
+ root = a;
+ } else if (root == a) {
+ root = b;
+ }
+ }
+
+ // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ private int quantifiersMemoryInfo(Node node) {
+ int info = 0;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ int v = quantifiersMemoryInfo(can.car);
+ if (v > info) info = v;
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ return TargetInfo.IS_EMPTY_REC; /* tiny version */
+ } else {
+ info = quantifiersMemoryInfo(cn.target);
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.upper != 0) {
+ info = quantifiersMemoryInfo(qn.target);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ return TargetInfo.IS_EMPTY_MEM;
+
+ case EncloseType.OPTION:
+ case EncloseNode.STOP_BACKTRACK:
+ info = quantifiersMemoryInfo(en.target);
+ break;
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.BREF:
+ case NodeType.STR:
+ case NodeType.CTYPE:
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return info;
+ }
+
+ private int getMinMatchLength(Node node) {
+ int min = 0;
+
+ switch (node.getType()) {
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (br.isRecursion()) break;
+
+ if (br.back[0] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ min = getMinMatchLength(env.memNodes[br.back[0]]);
+
+ for (int i=1; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ int tmin = getMinMatchLength(env.memNodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ EncloseNode en = (EncloseNode)cn.target;
+ if (en.isMinFixed()) min = en.minLength;
+ } else {
+ min = getMinMatchLength(cn.target);
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.LIST:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ min += getMinMatchLength(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode y = (ConsAltNode)node;
+ do {
+ Node x = y.car;
+ int tmin = getMinMatchLength(x);
+ if (y == node) {
+ min = tmin;
+ } else if (min > tmin) {
+ min = tmin;
+ }
+ } while ((y = y.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ min = ((StringNode)node).length();
+ break;
+
+ case NodeType.CTYPE:
+ min = 1;
+ break;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ min = 1;
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower > 0) {
+ min = getMinMatchLength(qn.target);
+ min = MinMaxLen.distanceMultiply(min, qn.lower);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isMinFixed()) {
+ min = en.minLength;
+ } else {
+ min = getMinMatchLength(en.target);
+ en.minLength = min;
+ en.setMinFixed();
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ min = getMinMatchLength(en.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return min;
+ }
+
+ private int getMaxMatchLength(Node node) {
+ int max = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ int tmax = getMaxMatchLength(ln.car);
+ max = MinMaxLen.distanceAdd(max, tmax);
+ } while ((ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ do {
+ int tmax = getMaxMatchLength(an.car);
+ if (max < tmax) max = tmax;
+ } while ((an = an.cdr) != null);
+ break;
+
+ case NodeType.STR:
+ max = ((StringNode)node).length();
+ break;
+
+ case NodeType.CTYPE:
+ max = 1;
+ break;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ max = 1;
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ if (br.isRecursion()) {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ break;
+ }
+
+ for (int i=0; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ int tmax = getMaxMatchLength(env.memNodes[br.back[i]]);
+ if (max < tmax) max = tmax;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (!cn.isRecursion()) {
+ max = getMaxMatchLength(cn.target);
+ } else {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.upper != 0) {
+ max = getMaxMatchLength(qn.target);
+ if (max != 0) {
+ if (!isRepeatInfinite(qn.upper)) {
+ max = MinMaxLen.distanceMultiply(max, qn.upper);
+ } else {
+ max = MinMaxLen.INFINITE_DISTANCE;
+ }
+ }
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isMaxFixed()) {
+ max = en.maxLength;
+ } else {
+ max = getMaxMatchLength(en.target);
+ en.maxLength = max;
+ en.setMaxFixed();
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ max = getMaxMatchLength(en.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ default:
+ break;
+ } // switch
+
+ return max;
+ }
+
+ private static final int GET_CHAR_LEN_VARLEN = -1;
+ private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2;
+ protected final int getCharLengthTree(Node node) {
+ return getCharLengthTree(node, 0);
+ }
+
+ private int getCharLengthTree(Node node, int level) {
+ level++;
+
+ int len = 0;
+ returnCode = 0;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ int tlen = getCharLengthTree(ln.car, level);
+ if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen);
+ } while (returnCode == 0 && (ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ boolean varLen = false;
+
+ int tlen = getCharLengthTree(an.car, level);
+ while (returnCode == 0 && (an = an.cdr) != null) {
+ int tlen2 = getCharLengthTree(an.car, level);
+ if (returnCode == 0) {
+ if (tlen != tlen2) varLen = true;
+ }
+ }
+
+ if (returnCode == 0) {
+ if (varLen) {
+ if (level == 1) {
+ returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ } else {
+ len = tlen;
+ }
+ }
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ len = sn.length();
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower == qn.upper) {
+ tlen = getCharLengthTree(qn.target, level);
+ if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower);
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (!cn.isRecursion()) {
+ len = getCharLengthTree(cn.target, level);
+ } else {
+ returnCode = GET_CHAR_LEN_VARLEN;
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.CTYPE:
+ len = 1;
+
+ case NodeType.CCLASS:
+ case NodeType.CANY:
+ len = 1;
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch(en.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (en.isCLenFixed()) {
+ len = en.charLength;
+ } else {
+ len = getCharLengthTree(en.target, level);
+ if (returnCode == 0) {
+ en.charLength = len;
+ en.setCLenFixed();
+ }
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ case EncloseType.OPTION:
+ case EncloseType.STOP_BACKTRACK:
+ len = getCharLengthTree(en.target, level);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ break;
+
+ default:
+ returnCode = GET_CHAR_LEN_VARLEN;
+ } // switch
+ return len;
+ }
+
+ /* x is not included y ==> 1 : 0 */
+ private boolean isNotIncluded(Node x, Node y) {
+ Node tmp;
+
+ // !retry:!
+ retry: while(true) {
+
+ int yType = y.getType();
+
+ switch(x.getType()) {
+ case NodeType.CTYPE:
+ switch(yType) {
+ case NodeType.CTYPE:
+ CTypeNode cny = (CTypeNode)y;
+ CTypeNode cnx = (CTypeNode)x;
+ return cny.ctype == cnx.ctype && cny.not != cnx.not;
+
+ case NodeType.CCLASS:
+ // !swap:!
+ tmp = x;
+ x = y;
+ y = tmp;
+ // !goto retry;!
+ continue retry;
+
+ case NodeType.STR:
+ // !goto swap;!
+ tmp = x;
+ x = y;
+ y = tmp;
+ continue retry;
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode xc = (CClassNode)x;
+
+ switch(yType) {
+ case NodeType.CTYPE:
+ switch(((CTypeNode)y).ctype) {
+ case CharacterType.WORD:
+ if (!((CTypeNode)y).not) {
+ if (xc.mbuf == null && !xc.isNot()) {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (xc.bs.at(i)) {
+ if (EncodingHelper.isWord(i)) return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (!EncodingHelper.isWord(i)) {
+ if (!xc.isNot()) {
+ if (xc.bs.at(i)) return false;
+ } else {
+ if (!xc.bs.at(i)) return false;
+ }
+ }
+ }
+ return true;
+ }
+ // break; not reached
+
+ default:
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode yc = (CClassNode)y;
+
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ boolean v = xc.bs.at(i);
+ if ((v && !xc.isNot()) || (!v && xc.isNot())) {
+ v = yc.bs.at(i);
+ if ((v && !yc.isNot()) || (!v && yc.isNot())) return false;
+ }
+ }
+ if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true;
+ return false;
+ // break; not reached
+
+ case NodeType.STR:
+ // !goto swap;!
+ tmp = x;
+ x = y;
+ y = tmp;
+ continue retry;
+
+ default:
+ break;
+
+ } // inner switch
+ break; // case NodeType.CCLASS
+
+ case NodeType.STR:
+ StringNode xs = (StringNode)x;
+ if (xs.length() == 0) break;
+
+ switch (yType) {
+ case NodeType.CTYPE:
+ CTypeNode cy = ((CTypeNode)y);
+ switch (cy.ctype) {
+ case CharacterType.WORD:
+ return !cy.not;
+
+ default:
+ break;
+
+ } // inner switch
+ break;
+
+ case NodeType.CCLASS:
+ CClassNode cc = (CClassNode)y;
+ int code = xs.chars[xs.p];
+ return !cc.isCodeInCC(code);
+
+ case NodeType.STR:
+ StringNode ys = (StringNode)y;
+ int len = xs.length();
+ if (len > ys.length()) len = ys.length();
+ if (xs.isAmbig() || ys.isAmbig()) {
+ /* tiny version */
+ return false;
+ } else {
+ for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) {
+ if (ys.chars[p] != xs.chars[q]) return true;
+ }
+ }
+ break;
+
+ default:
+ break;
+ } // inner switch
+
+ break; // case NodeType.STR
+
+ } // switch
+
+ break;
+ } // retry: while
+ return false;
+ }
+
+ private Node getHeadValueNode(Node node, boolean exact) {
+ Node n = null;
+
+ switch(node.getType()) {
+ case NodeType.BREF:
+ case NodeType.ALT:
+ case NodeType.CANY:
+ break;
+
+ case NodeType.CALL:
+ break; // if (Config.USE_SUBEXP_CALL)
+
+ case NodeType.CTYPE:
+ case NodeType.CCLASS:
+ if (!exact) n = node;
+ break;
+
+ case NodeType.LIST:
+ n = getHeadValueNode(((ConsAltNode)node).car, exact);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.end <= sn.p) break; // ???
+
+ if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
+ // nothing
+ } else {
+ n = node;
+ }
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.lower > 0) {
+ if (qn.headExact != null) {
+ n = qn.headExact;
+ } else {
+ n = getHeadValueNode(qn.target, exact);
+ }
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int options = regex.options;
+ regex.options = en.option;
+ n = getHeadValueNode(en.target, exact);
+ regex.options = options;
+ break;
+
+ case EncloseType.MEMORY:
+ case EncloseType.STOP_BACKTRACK:
+ n = getHeadValueNode(en.target, exact);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact);
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return n;
+ }
+
+ // true: invalid
+ private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) {
+ if ((node.getType2Bit() & typeMask) == 0) return true;
+
+ boolean invalid = false;
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
+ } while (!invalid && (can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if ((en.type & encloseMask) == 0) return true;
+ invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ if ((an.type & anchorMask) == 0) return true;
+
+ if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ return invalid;
+ }
+
+ private static final int RECURSION_EXIST = 1;
+ private static final int RECURSION_INFINITE = 2;
+ private int subexpInfRecursiveCheck(Node node, boolean head) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ int min;
+ ConsAltNode x = (ConsAltNode)node;
+ do {
+ int ret = subexpInfRecursiveCheck(x.car, head);
+ if (ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ min = getMinMatchLength(x.car);
+ if (min != 0) head = false;
+ }
+ } while ((x = x.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ r = RECURSION_EXIST;
+ do {
+ int ret = subexpInfRecursiveCheck(can.car, head);
+ if (ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ r = subexpInfRecursiveCheck(qn.target, head);
+ if (r == RECURSION_EXIST) {
+ if (qn.lower == 0) r = 0;
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpInfRecursiveCheck(an.target, head);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ r = subexpInfRecursiveCheck(((CallNode)node).target, head);
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMark2()) {
+ return 0;
+ } else if (en.isMark1()) {
+ return !head ? RECURSION_EXIST : RECURSION_INFINITE;
+ // throw exception here ???
+ } else {
+ en.setMark2();
+ r = subexpInfRecursiveCheck(en.target, head);
+ en.clearMark2();
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+ return r;
+ }
+
+ protected final int subexpInfRecursiveCheckTrav(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ r = subexpInfRecursiveCheckTrav(can.car);
+ } while (r == 0 && (can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpInfRecursiveCheckTrav(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isRecursion()) {
+ en.setMark1();
+ r = subexpInfRecursiveCheck(en.target, true);
+ if (r > 0) newValueException(ERR_NEVER_ENDING_RECURSION);
+ en.clearMark1();
+ }
+ r = subexpInfRecursiveCheckTrav(en.target);
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private int subexpRecursiveCheck(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ r |= subexpRecursiveCheck(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ r = subexpRecursiveCheck(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpRecursiveCheck(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ CallNode cn = (CallNode)node;
+ r = subexpRecursiveCheck(cn.target);
+ if (r != 0) cn.setRecursion();
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMark2()) {
+ return 0;
+ } else if (en.isMark1()) {
+ return 1; /* recursion */
+ } else {
+ en.setMark2();
+ r = subexpRecursiveCheck(en.target);
+ en.clearMark2();
+ }
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private static final int FOUND_CALLED_NODE = 1;
+ protected final int subexpRecursiveCheckTrav(Node node) {
+ int r = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ int ret = subexpRecursiveCheckTrav(can.car);
+ if (ret == FOUND_CALLED_NODE) {
+ r = FOUND_CALLED_NODE;
+ }
+ // else if (ret < 0) return ret; ???
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ r = subexpRecursiveCheckTrav(qn.target);
+ if (qn.upper == 0) {
+ if (r == FOUND_CALLED_NODE) qn.isRefered = true;
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ r = subexpRecursiveCheckTrav(an.target);
+ break;
+ } // inner switch
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ if (!en.isRecursion()) {
+ if (en.isCalled()) {
+ en.setMark1();
+ r = subexpRecursiveCheck(en.target);
+ if (r != 0) en.setRecursion();
+ en.clearMark1();
+ }
+ }
+ r = subexpRecursiveCheckTrav(en.target);
+ if (en.isCalled()) r |= FOUND_CALLED_NODE;
+ break;
+
+ default:
+ break;
+ } // switch
+
+ return r;
+ }
+
+ private void setCallAttr(CallNode cn) {
+ cn.target = env.memNodes[cn.groupNum]; // no setTarget in call nodes!
+ if (cn.target == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+
+ ((EncloseNode)cn.target).setCalled();
+ env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum);
+ cn.unsetAddrList = env.unsetAddrList;
+ }
+
+ protected final void setupSubExpCall(Node node) {
+
+ switch(node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+ do {
+ setupSubExpCall(ln.car);
+ } while ((ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode can = (ConsAltNode)node;
+ do {
+ setupSubExpCall(can.car);
+ } while ((can = can.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ setupSubExpCall(((QuantifierNode)node).target);
+ break;
+
+ case NodeType.ENCLOSE:
+ setupSubExpCall(((EncloseNode)node).target);
+ break;
+
+ case NodeType.CALL:
+ CallNode cn = (CallNode)node;
+
+ if (cn.groupNum != 0) {
+ int gNum = cn.groupNum;
+
+ if (Config.USE_NAMED_GROUP) {
+ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) {
+ newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+ }
+ } // USE_NAMED_GROUP
+ if (gNum > env.numMem) newValueException(ERR_UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd);
+ setCallAttr(cn);
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd);
+
+ if (ne == null) {
+ newValueException(ERR_UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
+ } else if (ne.backNum > 1) {
+ newValueException(ERR_MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd);
+ } else {
+ cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ?
+ setCallAttr(cn);
+ }
+ }
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND:
+ case AnchorType.LOOK_BEHIND_NOT:
+ setupSubExpCall(an.target);
+ break;
+ }
+ break;
+
+ } // switch
+ }
+
+ /* divide different length alternatives in look-behind.
+ (?<=A|B) ==> (?<=A)|(?<=B)
+ (?<!A|B) ==> (?<!A)(?<!B)
+ */
+ private Node divideLookBehindAlternatives(Node node) {
+ AnchorNode an = (AnchorNode)node;
+ int anchorType = an.type;
+ Node head = an.target;
+ Node np = ((ConsAltNode)head).car;
+
+ swap(node, head);
+
+ Node tmp = node;
+ node = head;
+ head = tmp;
+
+ ((ConsAltNode)node).setCar(head);
+ ((AnchorNode)head).setTarget(np);
+ np = node;
+
+ while ((np = ((ConsAltNode)np).cdr) != null) {
+ AnchorNode insert = new AnchorNode(anchorType);
+ insert.setTarget(((ConsAltNode)np).car);
+ ((ConsAltNode)np).setCar(insert);
+ }
+
+ if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
+ np = node;
+ do {
+ ((ConsAltNode)np).toListNode(); /* alt -> list */
+ } while ((np = ((ConsAltNode)np).cdr) != null);
+ }
+
+ return node;
+ }
+
+ private Node setupLookBehind(Node node) {
+ AnchorNode an = (AnchorNode)node;
+ int len = getCharLengthTree(an.target);
+ switch(returnCode) {
+ case 0:
+ an.charLength = len;
+ break;
+ case GET_CHAR_LEN_VARLEN:
+ newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ break;
+ case GET_CHAR_LEN_TOP_ALT_VARLEN:
+ if (syntax.differentLengthAltLookBehind()) {
+ return divideLookBehindAlternatives(node);
+ } else {
+ newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ }
+ }
+ return node;
+ }
+
+ private void nextSetup(Node node, Node nextNode) {
+ // retry:
+ retry: while(true) {
+
+ int type = node.getType();
+ if (type == NodeType.QTFR) {
+ QuantifierNode qn = (QuantifierNode)node;
+ if (qn.greedy && isRepeatInfinite(qn.upper)) {
+ if (Config.USE_QTFR_PEEK_NEXT) {
+ StringNode n = (StringNode)getHeadValueNode(nextNode, true);
+ /* '\0': for UTF-16BE etc... */
+ if (n != null && n.chars[n.p] != 0) { // ?????????
+ qn.nextHeadExact = n;
+ }
+ } // USE_QTFR_PEEK_NEXT
+ /* automatic posseivation a*b ==> (?>a*)b */
+ if (qn.lower <= 1) {
+ if (qn.target.isSimple()) {
+ Node x = getHeadValueNode(qn.target, false);
+ if (x != null) {
+ Node y = getHeadValueNode(nextNode, false);
+ if (y != null && isNotIncluded(x, y)) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
+ en.setStopBtSimpleRepeat();
+ //en.setTarget(qn.target); // optimize it ??
+ swap(node, en);
+
+ en.setTarget(node);
+ }
+ }
+ }
+ }
+ }
+ } else if (type == NodeType.ENCLOSE) {
+ EncloseNode en = (EncloseNode)node;
+ if (en.isMemory()) {
+ node = en.target;
+ // !goto retry;!
+ continue retry;
+ }
+ }
+
+ break;
+ } // while
+ }
+
+ private void updateStringNodeCaseFoldMultiByte(StringNode sn) {
+ char[] chars = sn.chars;
+ int end = sn.end;
+ value = sn.p;
+ int sp = 0;
+ char buf;
+
+ while (value < end) {
+ int ovalue = value;
+ buf = Character.toLowerCase(chars[value++]);
+
+ if (chars[ovalue] != buf) {
+
+ char[] sbuf = new char[sn.length() << 1];
+ System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p);
+ value = ovalue;
+ while (value < end) {
+ buf = Character.toLowerCase(chars[value++]);
+ if (sp >= sbuf.length) {
+ char[]tmp = new char[sbuf.length << 1];
+ System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
+ sbuf = tmp;
+ }
+ sbuf[sp++] = buf;
+ }
+ sn.set(sbuf, 0, sp);
+ return;
+ }
+ sp++;
+ }
+ }
+
+ private void updateStringNodeCaseFold(Node node) {
+ StringNode sn = (StringNode)node;
+ updateStringNodeCaseFoldMultiByte(sn);
+ }
+
+ private Node expandCaseFoldMakeRemString(char[] chars, int p, int end) {
+ StringNode node = new StringNode(chars, p, end);
+
+ updateStringNodeCaseFold(node);
+ node.setAmbig();
+ node.setDontGetOptInfo();
+ return node;
+ }
+
+ private boolean expandCaseFoldStringAlt(int itemNum, char[] items,
+ char[] chars, int p, int slen, int end, ObjPtr<Node> node) {
+
+ ConsAltNode altNode;
+ node.p = altNode = newAltNode(null, null);
+
+ StringNode snode = new StringNode(chars, p, p + slen);
+ altNode.setCar(snode);
+
+ for (int i=0; i<itemNum; i++) {
+ snode = new StringNode();
+
+ snode.catCode(items[i]);
+
+ ConsAltNode an = newAltNode(null, null);
+ an.setCar(snode);
+ altNode.setCdr(an);
+ altNode = an;
+ }
+ return false;
+ }
+
+ private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
+ private Node expandCaseFoldString(Node node) {
+ StringNode sn = (StringNode)node;
+
+ if (sn.isAmbig() || sn.length() <= 0) return node;
+
+ char[] chars = sn.chars;
+ int p = sn.p;
+ int end = sn.end;
+ int altNum = 1;
+
+ ConsAltNode topRoot = null, root = null;
+ ObjPtr<Node> prevNode = new ObjPtr<Node>();
+ StringNode stringNode = null;
+
+ while (p < end) {
+ char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars[p]);
+
+ if (items.length == 0) {
+ if (stringNode == null) {
+ if (root == null && prevNode.p != null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode.p);
+ }
+
+ prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL);
+
+ if (root != null) ConsAltNode.listAdd(root, stringNode);
+
+ }
+
+ stringNode.cat(chars, p, p + 1);
+ } else {
+ altNum *= (items.length + 1);
+ if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+ if (root == null && prevNode.p != null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode.p);
+ }
+
+ expandCaseFoldStringAlt(items.length, items, chars, p, 1, end, prevNode);
+ if (root != null) ConsAltNode.listAdd(root, prevNode.p);
+ stringNode = null;
+ }
+ p++;
+ }
+
+ if (p < end) {
+ Node srem = expandCaseFoldMakeRemString(chars, p, end);
+
+ if (prevNode.p != null && root == null) {
+ topRoot = root = ConsAltNode.listAdd(null, prevNode.p);
+ }
+
+ if (root == null) {
+ prevNode.p = srem;
+ } else {
+ ConsAltNode.listAdd(root, srem);
+ }
+ }
+ /* ending */
+ Node xnode = topRoot != null ? topRoot : prevNode.p;
+
+ swap(node, xnode);
+ return xnode;
+ }
+
+ private static final int CEC_THRES_NUM_BIG_REPEAT = 512;
+ private static final int CEC_INFINITE_NUM = 0x7fffffff;
+
+ private static final int CEC_IN_INFINITE_REPEAT = (1<<0);
+ private static final int CEC_IN_FINITE_REPEAT = (1<<1);
+ private static final int CEC_CONT_BIG_REPEAT = (1<<2);
+
+ protected final int setupCombExpCheck(Node node, int state) {
+ int r = state;
+ int ret;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode ln = (ConsAltNode)node;
+
+ do {
+ r = setupCombExpCheck(ln.car, r);
+ //prev = ((ConsAltNode)node).car;
+ } while (r >= 0 && (ln = ln.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode an = (ConsAltNode)node;
+ do {
+ ret = setupCombExpCheck(an.car, state);
+ r |= ret;
+ } while (ret >= 0 && (an = an.cdr) != null);
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ int childState = state;
+ int addState = 0;
+ int varNum;
+
+ if (!isRepeatInfinite(qn.upper)) {
+ if (qn.upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ childState |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env.backrefedMem == 0) {
+ if (qn.target.getType() == NodeType.ENCLOSE) {
+ EncloseNode en = (EncloseNode)qn.target;
+ if (en.type == EncloseType.MEMORY) {
+ if (en.target.getType() == NodeType.QTFR) {
+ QuantifierNode q = (QuantifierNode)en.target;
+ if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) {
+ qn.upper = qn.lower == 0 ? 1 : qn.lower;
+ if (qn.upper == 1) childState = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if ((state & CEC_IN_FINITE_REPEAT) != 0) {
+ qn.combExpCheckNum = -1;
+ } else {
+ if (isRepeatInfinite(qn.upper)) {
+ varNum = CEC_INFINITE_NUM;
+ childState |= CEC_IN_INFINITE_REPEAT;
+ } else {
+ varNum = qn.upper - qn.lower;
+ }
+
+ if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn.combExpCheckNum == 0) {
+ env.numCombExpCheck++;
+ qn.combExpCheckNum = env.numCombExpCheck;
+ if (env.currMaxRegNum > env.combExpMaxRegNum) {
+ env.combExpMaxRegNum = env.currMaxRegNum;
+ }
+ }
+ }
+ }
+ r = setupCombExpCheck(qn.target, childState);
+ r |= addState;
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch( en.type) {
+ case EncloseNode.MEMORY:
+ if (env.currMaxRegNum < en.regNum) {
+ env.currMaxRegNum = en.regNum;
+ }
+ r = setupCombExpCheck(en.target, state);
+ break;
+
+ default:
+ r = setupCombExpCheck(en.target, state);
+ } // inner switch
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ env.hasRecursion = true;
+ } else {
+ r = setupCombExpCheck(cn.target, state);
+ }
+ } // USE_SUBEXP_CALL
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ return r;
+ }
+
+ private static final int IN_ALT = (1<<0);
+ private static final int IN_NOT = (1<<1);
+ private static final int IN_REPEAT = (1<<2);
+ private static final int IN_VAR_REPEAT = (1<<3);
+ private static final int EXPAND_STRING_MAX_LENGTH = 100;
+
+ /* setup_tree does the following work.
+ 1. check empty loop. (set qn->target_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+ protected final Node setupTree(Node node, int state) {
+ restart: while (true) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ Node prev = null;
+ do {
+ setupTree(lin.car, state);
+ if (prev != null) {
+ nextSetup(prev, lin.car);
+ }
+ prev = lin.car;
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ setupTree(aln.car, (state | IN_ALT));
+ } while ((aln = aln.cdr) != null);
+ break;
+
+ case NodeType.CCLASS:
+ break;
+
+ case NodeType.STR:
+ if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
+ node = expandCaseFoldString(node);
+ }
+ break;
+
+ case NodeType.CTYPE:
+ case NodeType.CANY:
+ break;
+
+ case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ?
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+ for (int i=0; i<br.backNum; i++) {
+ if (br.back[i] > env.numMem) newValueException(ERR_INVALID_BACKREF);
+ env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]);
+ env.btMemStart = bsOnAt(env.btMemStart, br.back[i]);
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ if (br.isNestLevel()) {
+ env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]);
+ }
+ } // USE_BACKREF_AT_LEVEL
+ ((EncloseNode)env.memNodes[br.back[i]]).setMemBackrefed();
+ }
+ break;
+
+ case NodeType.QTFR:
+ QuantifierNode qn = (QuantifierNode)node;
+ Node target = qn.target;
+
+ if ((state & IN_REPEAT) != 0) qn.setInRepeat();
+
+ if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
+ int d = getMinMatchLength(target);
+ if (d == 0) {
+ qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
+ if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
+ int info = quantifiersMemoryInfo(target);
+ if (info > 0) qn.targetEmptyInfo = info;
+ } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ // strange stuff here (turned off)
+ }
+ }
+
+ state |= IN_REPEAT;
+ if (qn.lower != qn.upper) state |= IN_VAR_REPEAT;
+
+ target = setupTree(target, state);
+
+ /* expand string */
+ if (target.getType() == NodeType.STR) {
+ if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
+ qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
+ StringNode sn = (StringNode)target;
+ int len = sn.length();
+
+ if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
+ StringNode str = qn.convertToString(sn.flag);
+ int n = qn.lower;
+ for (int i = 0; i < n; i++) {
+ str.cat(sn.chars, sn.p, sn.end);
+ }
+ break; /* break case NT_QTFR: */
+ }
+
+ }
+ }
+ if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
+ if (qn.greedy && qn.targetEmptyInfo != 0) {
+ if (target.getType() == NodeType.QTFR) {
+ QuantifierNode tqn = (QuantifierNode)target;
+ if (tqn.headExact != null) {
+ qn.headExact = tqn.headExact;
+ tqn.headExact = null;
+ }
+ } else {
+ qn.headExact = getHeadValueNode(qn.target, true);
+ }
+ }
+ } // USE_OP_PUSH_OR_JUMP_EXACT
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int options = regex.options;
+ regex.options = en.option;
+ setupTree(en.target, state);
+ regex.options = options;
+ break;
+
+ case EncloseType.MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
+ /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+
+ }
+ setupTree(en.target, state);
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ setupTree(en.target, state);
+ if (en.target.getType() == NodeType.QTFR) {
+ QuantifierNode tqn = (QuantifierNode)en.target;
+ if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
+ /* (?>a*), a*+ etc... */
+ if (tqn.target.isSimple()) en.setStopBtSimpleRepeat();
+ }
+ }
+ break;
+
+ } // inner switch
+ break;
+
+ case NodeType.ANCHOR:
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.PREC_READ:
+ setupTree(an.target, state);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ setupTree(an.target, (state | IN_NOT));
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ node = setupLookBehind(node);
+ if (node.getType() != NodeType.ANCHOR) continue restart;
+ setupTree(((AnchorNode)node).target, state);
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ node = setupLookBehind(node);
+ if (node.getType() != NodeType.ANCHOR) continue restart;
+ setupTree(((AnchorNode)node).target, (state | IN_NOT));
+ break;
+
+ } // inner switch
+ break;
+ } // switch
+ return node;
+ } // restart: while
+ }
+
+ private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5;
+ private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd
+ opt.clear();
+ opt.setBoundNode(oenv.mmd);
+
+ switch (node.getType()) {
+ case NodeType.LIST: {
+ OptEnvironment nenv = new OptEnvironment();
+ NodeOptInfo nopt = new NodeOptInfo();
+ nenv.copy(oenv);
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ optimizeNodeLeft(lin.car, nopt, nenv);
+ nenv.mmd.add(nopt.length);
+ opt.concatLeftNode(nopt);
+ } while ((lin = lin.cdr) != null);
+ break;
+ }
+
+ case NodeType.ALT: {
+ NodeOptInfo nopt = new NodeOptInfo();
+ ConsAltNode aln = (ConsAltNode)node;
+ do {
+ optimizeNodeLeft(aln.car, nopt, oenv);
+ if (aln == node) {
+ opt.copy(nopt);
+ } else {
+ opt.altMerge(nopt, oenv);
+ }
+ } while ((aln = aln.cdr) != null);
+ break;
+ }
+
+ case NodeType.STR: {
+ StringNode sn = (StringNode)node;
+
+ int slen = sn.length();
+
+ if (!sn.isAmbig()) {
+ opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
+
+ if (slen > 0) {
+ opt.map.addChar(sn.chars[sn.p]);
+ }
+
+ opt.length.set(slen, slen);
+ } else {
+ int max;
+ if (sn.isDontGetOptInfo()) {
+ max = sn.length();
+ } else {
+ opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
+ opt.exb.ignoreCase = true;
+
+ if (slen > 0) {
+ opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag);
+ }
+
+ max = slen;
+ }
+ opt.length.set(slen, max);
+ }
+
+ if (opt.exb.length == slen) {
+ opt.exb.reachEnd = true;
+ }
+ break;
+ }
+
+ case NodeType.CCLASS: {
+ CClassNode cc = (CClassNode)node;
+ /* no need to check ignore case. (setted in setup_tree()) */
+ if (cc.mbuf != null || cc.isNot()) {
+ opt.length.set(1, 1);
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ boolean z = cc.bs.at(i);
+ if ((z && !cc.isNot()) || (!z && cc.isNot())) {
+ opt.map.addChar(i);
+ }
+ }
+ opt.length.set(1, 1);
+ }
+ break;
+ }
+
+ case NodeType.CTYPE: {
+ int min;
+ int max = 1;
+ if (max == 1) {
+ min = 1;
+ CTypeNode cn = (CTypeNode)node;
+
+ switch (cn.ctype) {
+ case CharacterType.WORD:
+ if (cn.not) {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (!EncodingHelper.isWord(i)) {
+ opt.map.addChar(i);
+ }
+ }
+ } else {
+ for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
+ if (EncodingHelper.isWord(i)) {
+ opt.map.addChar(i);
+ }
+ }
+ }
+ break;
+ } // inner switch
+ } else {
+ min = 1;
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.CANY: {
+ opt.length.set(1, 1);
+ break;
+ }
+
+ case NodeType.ANCHOR: {
+ AnchorNode an = (AnchorNode)node;
+ switch (an.type) {
+ case AnchorType.BEGIN_BUF:
+ case AnchorType.BEGIN_POSITION:
+ case AnchorType.BEGIN_LINE:
+ case AnchorType.END_BUF:
+ case AnchorType.SEMI_END_BUF:
+ case AnchorType.END_LINE:
+ opt.anchor.add(an.type);
+ break;
+
+ case AnchorType.PREC_READ:
+ NodeOptInfo nopt = new NodeOptInfo();
+ optimizeNodeLeft(an.target, nopt, oenv);
+ if (nopt.exb.length > 0) {
+ opt.expr.copy(nopt.exb);
+ } else if (nopt.exm.length > 0) {
+ opt.expr.copy(nopt.exm);
+ }
+ opt.expr.reachEnd = false;
+ if (nopt.map.value > 0) opt.map.copy(nopt.map);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */
+ case AnchorType.LOOK_BEHIND_NOT:
+ break;
+
+ } // inner switch
+ break;
+ }
+
+ case NodeType.BREF: {
+ BackRefNode br = (BackRefNode)node;
+
+ if (br.isRecursion()) {
+ opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
+ break;
+ }
+
+ Node[]nodes = oenv.scanEnv.memNodes;
+
+ int min = getMinMatchLength(nodes[br.back[0]]);
+ int max = getMaxMatchLength(nodes[br.back[0]]);
+
+ for (int i=1; i<br.backNum; i++) {
+ int tmin = getMinMatchLength(nodes[br.back[i]]);
+ int tmax = getMaxMatchLength(nodes[br.back[i]]);
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.CALL: {
+ if (Config.USE_SUBEXP_CALL) {
+ CallNode cn = (CallNode)node;
+ if (cn.isRecursion()) {
+ opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
+ } else {
+ int safe = oenv.options;
+ oenv.options = ((EncloseNode)cn.target).option;
+ optimizeNodeLeft(cn.target, opt, oenv);
+ oenv.options = safe;
+ }
+ } // USE_SUBEXP_CALL
+ break;
+ }
+
+ case NodeType.QTFR: {
+ NodeOptInfo nopt = new NodeOptInfo();
+ QuantifierNode qn = (QuantifierNode)node;
+ optimizeNodeLeft(qn.target, nopt, oenv);
+ if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
+ if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
+ if (isMultiline(oenv.options)) {
+ opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
+ } else {
+ opt.anchor.add(AnchorType.ANYCHAR_STAR);
+ }
+ }
+ } else {
+ if (qn.lower > 0) {
+ opt.copy(nopt);
+ if (nopt.exb.length > 0) {
+ if (nopt.exb.reachEnd) {
+ int i;
+ for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) {
+ opt.exb.concat(nopt.exb);
+ }
+ if (i < qn.lower) {
+ opt.exb.reachEnd = false;
+ }
+ }
+ }
+ if (qn.lower != qn.upper) {
+ opt.exb.reachEnd = false;
+ opt.exm.reachEnd = false;
+ }
+ if (qn.lower > 1) {
+ opt.exm.reachEnd = false;
+ }
+
+ }
+ }
+ int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
+ int max;
+ if (isRepeatInfinite(qn.upper)) {
+ max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
+ } else {
+ max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
+ }
+ opt.length.set(min, max);
+ break;
+ }
+
+ case NodeType.ENCLOSE: {
+ EncloseNode en = (EncloseNode)node;
+ switch (en.type) {
+ case EncloseType.OPTION:
+ int save = oenv.options;
+ oenv.options = en.option;
+ optimizeNodeLeft(en.target, opt, oenv);
+ oenv.options = save;
+ break;
+
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
+ int min = 0;
+ int max = MinMaxLen.INFINITE_DISTANCE;
+ if (en.isMinFixed()) min = en.minLength;
+ if (en.isMaxFixed()) max = en.maxLength;
+ opt.length.set(min, max);
+ } else { // USE_SUBEXP_CALL
+ optimizeNodeLeft(en.target, opt, oenv);
+ if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
+ if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
+ opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
+ }
+ }
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ optimizeNodeLeft(en.target, opt, oenv);
+ break;
+ } // inner switch
+ break;
+ }
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ protected final void setOptimizedInfoFromTree(Node node) {
+ NodeOptInfo opt = new NodeOptInfo();
+ OptEnvironment oenv = new OptEnvironment();
+
+ oenv.options = regex.options;
+ oenv.caseFoldFlag = regex.caseFoldFlag;
+ oenv.scanEnv = env;
+ oenv.mmd.clear(); // ??
+
+ optimizeNodeLeft(node, opt, oenv);
+
+ regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
+ AnchorType.BEGIN_POSITION |
+ AnchorType.ANYCHAR_STAR |
+ AnchorType.ANYCHAR_STAR_ML);
+
+ regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
+ AnchorType.SEMI_END_BUF);
+
+ if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
+ regex.anchorDmin = opt.length.min;
+ regex.anchorDmax = opt.length.max;
+ }
+
+ if (opt.exb.length > 0 || opt.exm.length > 0) {
+ opt.exb.select(opt.exm);
+ if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
+ // !goto set_map;!
+ regex.setOptimizeMapInfo(opt.map);
+ regex.setSubAnchor(opt.map.anchor);
+ } else {
+ regex.setExactInfo(opt.exb);
+ regex.setSubAnchor(opt.exb.anchor);
+ }
+ } else if (opt.map.value > 0) {
+ // !set_map:!
+ regex.setOptimizeMapInfo(opt.map);
+ regex.setSubAnchor(opt.map.anchor);
+ } else {
+ regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
+ if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
+ }
+
+ if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
+ Config.log.println(regex.optimizeInfoToString());
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ApplyCaseFold.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,91 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
+
+final class ApplyCaseFold {
+
+ // i_apply_case_fold
+ public void apply(int from, int[]to, int length, Object o) {
+ ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o;
+
+ ScanEnvironment env = arg.env;
+ CClassNode cc = arg.cc;
+ BitSet bs = cc.bs;
+
+ if (length == 1) {
+ boolean inCC = cc.isCodeInCC(from);
+
+ if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
+ if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
+ if (to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+ cc.addCodeRange(env, to[0], to[0]);
+ } else {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ bs.set(to[0]);
+ }
+ }
+ } else {
+ if (inCC) {
+ if (to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+ if (cc.isNot()) cc.clearNotFlag();
+ cc.addCodeRange(env, to[0], to[0]);
+ } else {
+ if (cc.isNot()) {
+ bs.clear(to[0]);
+ } else {
+ bs.set(to[0]);
+ }
+ }
+ }
+ } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+
+ } else {
+ if (cc.isCodeInCC(from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) {
+ StringNode node = null;
+ for (int i=0; i<length; i++) {
+ if (i == 0) {
+ node = new StringNode();
+ /* char-class expanded multi-char only
+ compare with string folded at match time. */
+ node.setAmbig();
+ }
+ node.catCode(to[i]);
+ }
+
+ ConsAltNode alt = ConsAltNode.newAltNode(node, null);
+
+ if (arg.tail == null) {
+ arg.altRoot = alt;
+ } else {
+ arg.tail.setCdr(alt);
+ }
+ arg.tail = alt;
+ }
+
+ }
+
+ }
+
+ static final ApplyCaseFold INSTANCE = new ApplyCaseFold();
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ApplyCaseFoldArg.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+
+public final class ApplyCaseFoldArg {
+ final ScanEnvironment env;
+ final CClassNode cc;
+ ConsAltNode altRoot;
+ ConsAltNode tail;
+
+ public ApplyCaseFoldArg(ScanEnvironment env, CClassNode cc) {
+ this.env = env;
+ this.cc = cc;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ArrayCompiler.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,1263 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDynamic;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline;
+import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
+
+final class ArrayCompiler extends Compiler {
+ private int[] code;
+ private int codeLength;
+
+ private char[][] templates;
+ private int templateNum;
+
+ ArrayCompiler(Analyser analyser) {
+ super(analyser);
+ }
+
+ @Override
+ protected final void prepare() {
+ int codeSize = Config.USE_STRING_TEMPLATES ? 8 : ((analyser.getEnd() - analyser.getBegin()) * 2 + 2);
+ code = new int[codeSize];
+ codeLength = 0;
+ }
+
+ @Override
+ protected final void finish() {
+ addOpcode(OPCode.END);
+ addOpcode(OPCode.FINISH); // for stack bottom
+
+ regex.code = code;
+ regex.codeLength = codeLength;
+ regex.templates = templates;
+ regex.templateNum = templateNum;
+ regex.factory = MatcherFactory.DEFAULT;
+
+ if (Config.USE_SUBEXP_CALL && analyser.env.unsetAddrList != null) {
+ analyser.env.unsetAddrList.fix(regex);
+ analyser.env.unsetAddrList = null;
+ }
+ }
+
+ @Override
+ protected void compileAltNode(ConsAltNode node) {
+ ConsAltNode aln = node;
+ int len = 0;
+
+ do {
+ len += compileLengthTree(aln.car);
+ if (aln.cdr != null) {
+ len += OPSize.PUSH + OPSize.JUMP;
+ }
+ } while ((aln = aln.cdr) != null);
+
+ int pos = codeLength + len; /* goal position */
+
+ aln = node;
+ do {
+ len = compileLengthTree(aln.car);
+ if (aln.cdr != null) {
+ addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
+ }
+ compileTree(aln.car);
+ if (aln.cdr != null) {
+ len = pos - (codeLength + OPSize.JUMP);
+ addOpcodeRelAddr(OPCode.JUMP, len);
+ }
+ } while ((aln = aln.cdr) != null);
+ }
+
+ private boolean isNeedStrLenOpExact(int op) {
+ return op == OPCode.EXACTN ||
+ op == OPCode.EXACTMB2N ||
+ op == OPCode.EXACTMB3N ||
+ op == OPCode.EXACTMBN ||
+ op == OPCode.EXACTN_IC ||
+ op == OPCode.EXACTN_IC_SB;
+ }
+
+ private boolean opTemplated(int op) {
+ return isNeedStrLenOpExact(op);
+ }
+
+ private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) {
+ int op;
+
+ if (ignoreCase) {
+ switch(strLength) {
+ case 1: op = OPCode.EXACT1_IC; break;
+ default:op = OPCode.EXACTN_IC; break;
+ } // switch
+ } else {
+ switch (mbLength) {
+ case 1:
+ switch (strLength) {
+ case 1: op = OPCode.EXACT1; break;
+ case 2: op = OPCode.EXACT2; break;
+ case 3: op = OPCode.EXACT3; break;
+ case 4: op = OPCode.EXACT4; break;
+ case 5: op = OPCode.EXACT5; break;
+ default:op = OPCode.EXACTN; break;
+ } // inner switch
+ break;
+ case 2:
+ switch (strLength) {
+ case 1: op = OPCode.EXACTMB2N1; break;
+ case 2: op = OPCode.EXACTMB2N2; break;
+ case 3: op = OPCode.EXACTMB2N3; break;
+ default:op = OPCode.EXACTMB2N; break;
+ } // inner switch
+ break;
+ case 3:
+ op = OPCode.EXACTMB3N;
+ break;
+ default:
+ op = OPCode.EXACTMBN;
+ } // switch
+ }
+ return op;
+ }
+
+ private void compileTreeEmptyCheck(Node node, int emptyInfo) {
+ int savedNumNullCheck = regex.numNullCheck;
+
+ if (emptyInfo != 0) {
+ addOpcode(OPCode.NULL_CHECK_START);
+ addMemNum(regex.numNullCheck); /* NULL CHECK ID */
+ regex.numNullCheck++;
+ }
+
+ compileTree(node);
+
+ if (emptyInfo != 0) {
+ switch(emptyInfo) {
+ case TargetInfo.IS_EMPTY:
+ addOpcode(OPCode.NULL_CHECK_END);
+ break;
+ case TargetInfo.IS_EMPTY_MEM:
+ addOpcode(OPCode.NULL_CHECK_END_MEMST);
+ break;
+ case TargetInfo.IS_EMPTY_REC:
+ addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH);
+ break;
+ } // switch
+
+ addMemNum(savedNumNullCheck); /* NULL CHECK ID */
+ }
+ }
+
+ private int addCompileStringlength(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase) {
+ int op = selectStrOpcode(mbLength, strLength, ignoreCase);
+ int len = OPSize.OPCODE;
+
+ if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
+ // string length, template index, template string pointer
+ len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX;
+ } else {
+ if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
+ len += mbLength * strLength;
+ }
+ if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
+ return len;
+ }
+
+ @Override
+ protected final void addCompileString(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase) {
+ int op = selectStrOpcode(mbLength, strLength, ignoreCase);
+ addOpcode(op);
+
+ if (op == OPCode.EXACTMBN) addLength(mbLength);
+
+ if (isNeedStrLenOpExact(op)) {
+ if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) {
+ addLength(mbLength * strLength);
+ } else {
+ addLength(strLength);
+ }
+ }
+
+ if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
+ addInt(templateNum);
+ addInt(p);
+ addTemplate(chars);
+ } else {
+ addChars(chars, p, mbLength * strLength);
+ }
+ }
+
+ private int compileLengthStringNode(Node node) {
+ StringNode sn = (StringNode)node;
+ if (sn.length() <= 0) return 0;
+ boolean ambig = sn.isAmbig();
+
+ int p, prev;
+ p = prev = sn.p;
+ int end = sn.end;
+ char[] chars = sn.chars;
+ p++;
+
+ int slen = 1;
+ int rlen = 0;
+
+ while (p < end) {
+ slen++;
+ p++;
+ }
+ int r = addCompileStringlength(chars, prev, 1, slen, ambig);
+ rlen += r;
+ return rlen;
+ }
+
+ private int compileLengthStringRawNode(StringNode sn) {
+ if (sn.length() <= 0) return 0;
+ return addCompileStringlength(sn.chars, sn.p, 1 /*sb*/, sn.length(), false);
+ }
+
+ private void addMultiByteCClass(CodeRangeBuffer mbuf) {
+ addLength(mbuf.used);
+ addInts(mbuf.p, mbuf.used);
+ }
+
+ private int compileLengthCClassNode(CClassNode cc) {
+ if (cc.isShare()) return OPSize.OPCODE + OPSize.POINTER;
+
+ int len;
+ if (cc.mbuf == null) {
+ len = OPSize.OPCODE + BitSet.BITSET_SIZE;
+ } else {
+ if (cc.bs.isEmpty()) {
+ len = OPSize.OPCODE;
+ } else {
+ len = OPSize.OPCODE + BitSet.BITSET_SIZE;
+ }
+
+ len += OPSize.LENGTH + cc.mbuf.used;
+ }
+ return len;
+ }
+
+ @Override
+ protected void compileCClassNode(CClassNode cc) {
+ if (cc.isShare()) { // shared char class
+ addOpcode(OPCode.CCLASS_NODE);
+ addPointer(cc);
+ return;
+ }
+
+ if (cc.mbuf == null) {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS);
+ }
+ addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
+ } else {
+ if (cc.bs.isEmpty()) {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_MB_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS_MB);
+ }
+ addMultiByteCClass(cc.mbuf);
+ } else {
+ if (cc.isNot()) {
+ addOpcode(OPCode.CCLASS_MIX_NOT);
+ } else {
+ addOpcode(OPCode.CCLASS_MIX);
+ }
+ // store the bit set and mbuf themself!
+ addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
+ addMultiByteCClass(cc.mbuf);
+ }
+ }
+ }
+
+ @Override
+ protected void compileCTypeNode(CTypeNode node) {
+ CTypeNode cn = node;
+ int op;
+ switch (cn.ctype) {
+ case CharacterType.WORD:
+ if (cn.not) {
+ op = OPCode.NOT_WORD;
+ } else {
+ op = OPCode.WORD;
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ return; // not reached
+ } // inner switch
+ addOpcode(op);
+ }
+
+ @Override
+ protected void compileAnyCharNode() {
+ if (isMultiline(regex.options)) {
+ addOpcode(OPCode.ANYCHAR_ML);
+ } else {
+ addOpcode(OPCode.ANYCHAR);
+ }
+ }
+
+ @Override
+ protected void compileCallNode(CallNode node) {
+ addOpcode(OPCode.CALL);
+ node.unsetAddrList.add(codeLength, node.target);
+ addAbsAddr(0); /*dummy addr.*/
+ }
+
+ @Override
+ protected void compileBackrefNode(BackRefNode node) {
+ BackRefNode br = node;
+ if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
+ addOpcode(OPCode.BACKREF_WITH_LEVEL);
+ addOption(regex.options & Option.IGNORECASE);
+ addLength(br.nestLevel);
+ // !goto add_bacref_mems;!
+ addLength(br.backNum);
+ for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
+ return;
+ } else { // USE_BACKREF_AT_LEVEL
+ if (br.backNum == 1) {
+ if (isIgnoreCase(regex.options)) {
+ addOpcode(OPCode.BACKREFN_IC);
+ addMemNum(br.back[0]);
+ } else {
+ switch (br.back[0]) {
+ case 1:
+ addOpcode(OPCode.BACKREF1);
+ break;
+ case 2:
+ addOpcode(OPCode.BACKREF2);
+ break;
+ default:
+ addOpcode(OPCode.BACKREFN);
+ addOpcode(br.back[0]);
+ break;
+ } // switch
+ }
+ } else {
+ if (isIgnoreCase(regex.options)) {
+ addOpcode(OPCode.BACKREF_MULTI_IC);
+ } else {
+ addOpcode(OPCode.BACKREF_MULTI);
+ }
+ // !add_bacref_mems:!
+ addLength(br.backNum);
+ for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
+ }
+ }
+ }
+
+ private static final int REPEAT_RANGE_ALLOC = 8;
+ private void entryRepeatRange(int id, int lower, int upper) {
+ if (regex.repeatRangeLo == null) {
+ regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC];
+ regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC];
+ } else if (id >= regex.repeatRangeLo.length){
+ int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC];
+ System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length);
+ regex.repeatRangeLo = tmp;
+ tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC];
+ System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length);
+ regex.repeatRangeHi = tmp;
+ }
+
+ regex.repeatRangeLo[id] = lower;
+ regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper;
+ }
+
+ private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
+ int numRepeat = regex.numRepeat;
+ addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
+ addMemNum(numRepeat); /* OP_REPEAT ID */
+ regex.numRepeat++;
+ addRelAddr(targetLen + OPSize.REPEAT_INC);
+
+ entryRepeatRange(numRepeat, qn.lower, qn.upper);
+
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+
+ if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) {
+ addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG);
+ } else {
+ addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG);
+ }
+
+ addMemNum(numRepeat); /* OP_REPEAT ID */
+ }
+
+ private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50
+
+ private static boolean cknOn(int ckn) {
+ return ckn > 0;
+ }
+
+ private int compileCECLengthQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+ int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
+ int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0;
+
+ /* anychar repeat */
+ if (qn.target.getType() == NodeType.CANY) {
+ if (qn.greedy && infinite) {
+ if (qn.nextHeadExact != null && !cknOn(ckn)) {
+ return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen;
+ } else {
+ return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen;
+ }
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+
+ int len;
+ if (infinite && qn.lower <= 1) {
+ if (qn.greedy) {
+ if (qn.lower == 1) {
+ len = OPSize.JUMP;
+ } else {
+ len = 0;
+ }
+ len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP;
+ } else {
+ if (qn.lower == 0) {
+ len = OPSize.JUMP;
+ } else {
+ len = 0;
+ }
+ len += modTLen + OPSize.PUSH + cklen;
+ }
+ } else if (qn.upper == 0) {
+ if (qn.isRefered) { /* /(?<n>..){0}/ */
+ len = OPSize.JUMP + tlen;
+ } else {
+ len = 0;
+ }
+ } else if (qn.upper == 1 && qn.greedy) {
+ if (qn.lower == 0) {
+ if (cknOn(ckn)) {
+ len = OPSize.STATE_CHECK_PUSH + tlen;
+ } else {
+ len = OPSize.PUSH + tlen;
+ }
+ } else {
+ len = tlen;
+ }
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ len = OPSize.PUSH + cklen + OPSize.JUMP + tlen;
+ } else {
+ len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
+
+ if (cknOn(ckn)) {
+ len += OPSize.STATE_CHECK;
+ }
+ }
+ return len;
+ }
+
+ @Override
+ protected void compileCECQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
+
+ if (qn.isAnyCharStar()) {
+ compileTreeNTimes(qn.target, qn.lower);
+ if (qn.nextHeadExact != null && !cknOn(ckn)) {
+ if (isMultiline(regex.options)) {
+ addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
+ } else {
+ addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT);
+ }
+ if (cknOn(ckn)) {
+ addStateCheckNum(ckn);
+ }
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ addChars(sn.chars, sn.p, 1);
+ return;
+ } else {
+ if (isMultiline(regex.options)) {
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_ANYCHAR_ML_STAR);
+ } else {
+ addOpcode(OPCode.ANYCHAR_ML_STAR);
+ }
+ } else {
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_ANYCHAR_STAR);
+ } else {
+ addOpcode(OPCode.ANYCHAR_STAR);
+ }
+ }
+ if (cknOn(ckn)) {
+ addStateCheckNum(ckn);
+ }
+ return;
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+ if (infinite && qn.lower <= 1) {
+ if (qn.greedy) {
+ if (qn.lower == 1) {
+ addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH :
+ OPSize.PUSH);
+ }
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(modTLen + OPSize.JUMP);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
+ }
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ?
+ OPSize.STATE_CHECK_PUSH :
+ OPSize.PUSH)));
+ } else {
+ if (qn.lower == 0) {
+ addOpcodeRelAddr(OPCode.JUMP, modTLen);
+ }
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP);
+ addStateCheckNum(ckn);
+ addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP));
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
+ }
+ }
+ } else if (qn.upper == 0) {
+ if (qn.isRefered) { /* /(?<n>..){0}/ */
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } // else r=0 ???
+ } else if (qn.upper == 1 && qn.greedy) {
+ if (qn.lower == 0) {
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(tlen);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, tlen);
+ }
+ }
+ compileTree(qn.target);
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK_PUSH);
+ addStateCheckNum(ckn);
+ addRelAddr(OPSize.JUMP);
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
+ }
+
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else {
+ compileRangeRepeatNode(qn, modTLen, emptyInfo);
+ if (cknOn(ckn)) {
+ addOpcode(OPCode.STATE_CHECK);
+ addStateCheckNum(ckn);
+ }
+ }
+ }
+
+ private int compileNonCECLengthQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ /* anychar repeat */
+ if (qn.target.getType() == NodeType.CANY) {
+ if (qn.greedy && infinite) {
+ if (qn.nextHeadExact != null) {
+ return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
+ } else {
+ return OPSize.ANYCHAR_STAR + tlen * qn.lower;
+ }
+ }
+ }
+
+ int modTLen = 0;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+
+ int len;
+ if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ len = OPSize.JUMP;
+ } else {
+ len = tlen * qn.lower;
+ }
+
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP;
+ } else if (qn.nextHeadExact != null) {
+ len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP;
+ } else {
+ len += OPSize.PUSH + modTLen + OPSize.JUMP;
+ }
+ } else {
+ len += OPSize.JUMP + modTLen + OPSize.PUSH;
+ }
+
+ } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
+ len = OPSize.JUMP + tlen;
+ } else if (!infinite && qn.greedy &&
+ (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) {
+ len = tlen * qn.lower;
+ len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower);
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ len = OPSize.PUSH + OPSize.JUMP + tlen;
+ } else {
+ len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
+ }
+ return len;
+ }
+
+ @Override
+ protected void compileNonCECQuantifierNode(QuantifierNode qn) {
+ boolean infinite = isRepeatInfinite(qn.upper);
+ int emptyInfo = qn.targetEmptyInfo;
+
+ int tlen = compileLengthTree(qn.target);
+
+ if (qn.isAnyCharStar()) {
+ compileTreeNTimes(qn.target, qn.lower);
+ if (qn.nextHeadExact != null) {
+ if (isMultiline(regex.options)) {
+ addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
+ } else {
+ addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT);
+ }
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ addChars(sn.chars, sn.p, 1);
+ return;
+ } else {
+ if (isMultiline(regex.options)) {
+ addOpcode(OPCode.ANYCHAR_ML_STAR);
+ } else {
+ addOpcode(OPCode.ANYCHAR_STAR);
+ }
+ return;
+ }
+ }
+
+ int modTLen;
+ if (emptyInfo != 0) {
+ modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
+ } else {
+ modTLen = tlen;
+ }
+ if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1);
+ } else if (qn.nextHeadExact != null) {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT);
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH);
+ }
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP);
+ }
+ } else {
+ compileTreeNTimes(qn.target, qn.lower);
+ }
+
+ if (qn.greedy) {
+ if (qn.headExact != null) {
+ addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP);
+ StringNode sn = (StringNode)qn.headExact;
+ addChars(sn.chars, sn.p, 1);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1));
+ } else if (qn.nextHeadExact != null) {
+ addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP);
+ StringNode sn = (StringNode)qn.nextHeadExact;
+ addChars(sn.chars, sn.p, 1);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT));
+ } else {
+ addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH));
+ }
+ } else {
+ addOpcodeRelAddr(OPCode.JUMP, modTLen);
+ compileTreeEmptyCheck(qn.target, emptyInfo);
+ addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
+ }
+ } else if (qn.upper == 0 && qn.isRefered) { /* /(?<n>..){0}/ */
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else if (!infinite && qn.greedy &&
+ (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ int n = qn.upper - qn.lower;
+ compileTreeNTimes(qn.target, qn.lower);
+
+ for (int i=0; i<n; i++) {
+ addOpcodeRelAddr(OPCode.PUSH, (n - i) * tlen + (n - i - 1) * OPSize.PUSH);
+ compileTree(qn.target);
+ }
+ } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
+ addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
+ addOpcodeRelAddr(OPCode.JUMP, tlen);
+ compileTree(qn.target);
+ } else {
+ compileRangeRepeatNode(qn, modTLen, emptyInfo);
+ }
+ }
+
+ private int compileLengthOptionNode(EncloseNode node) {
+ int prev = regex.options;
+ regex.options = node.option;
+ int tlen = compileLengthTree(node.target);
+ regex.options = prev;
+
+ if (isDynamic(prev ^ node.option)) {
+ return OPSize.SET_OPTION_PUSH + OPSize.SET_OPTION + OPSize.FAIL + tlen + OPSize.SET_OPTION;
+ } else {
+ return tlen;
+ }
+ }
+
+ @Override
+ protected void compileOptionNode(EncloseNode node) {
+ int prev = regex.options;
+
+ if (isDynamic(prev ^ node.option)) {
+ addOpcodeOption(OPCode.SET_OPTION_PUSH, node.option);
+ addOpcodeOption(OPCode.SET_OPTION, prev);
+ addOpcode(OPCode.FAIL);
+ }
+
+ regex.options = node.option;
+ compileTree(node.target);
+ regex.options = prev;
+
+ if (isDynamic(prev ^ node.option)) {
+ addOpcodeOption(OPCode.SET_OPTION, prev);
+ }
+ }
+
+ private int compileLengthEncloseNode(EncloseNode node) {
+ if (node.isOption()) {
+ return compileLengthOptionNode(node);
+ }
+
+ int tlen;
+ if (node.target != null) {
+ tlen = compileLengthTree(node.target);
+ } else {
+ tlen = 0;
+ }
+
+ int len;
+ switch (node.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL && node.isCalled()) {
+ len = OPSize.MEMORY_START_PUSH + tlen + OPSize.CALL + OPSize.JUMP + OPSize.RETURN;
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
+ } else {
+ len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
+ }
+ } else { // USE_SUBEXP_CALL
+ if (bsAt(regex.btMemStart, node.regNum)) {
+ len = OPSize.MEMORY_START_PUSH;
+ } else {
+ len = OPSize.MEMORY_START;
+ }
+ len += tlen + (bsAt(regex.btMemEnd, node.regNum) ? OPSize.MEMORY_END_PUSH : OPSize.MEMORY_END);
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ if (node.isStopBtSimpleRepeat()) {
+ QuantifierNode qn = (QuantifierNode)node.target;
+ tlen = compileLengthTree(qn.target);
+ len = tlen * qn.lower + OPSize.PUSH + tlen + OPSize.POP + OPSize.JUMP;
+ } else {
+ len = OPSize.PUSH_STOP_BT + tlen + OPSize.POP_STOP_BT;
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ return 0; // not reached
+ } // switch
+ return len;
+ }
+
+ @Override
+ protected void compileEncloseNode(EncloseNode node) {
+ int len;
+ switch (node.type) {
+ case EncloseType.MEMORY:
+ if (Config.USE_SUBEXP_CALL) {
+ if (node.isCalled()) {
+ addOpcode(OPCode.CALL);
+ node.callAddr = codeLength + OPSize.ABSADDR + OPSize.JUMP;
+ node.setAddrFixed();
+ addAbsAddr(node.callAddr);
+ len = compileLengthTree(node.target);
+ len += OPSize.MEMORY_START_PUSH + OPSize.RETURN;
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ len += node.isRecursion() ? OPSize.MEMORY_END_PUSH_REC : OPSize.MEMORY_END_PUSH;
+ } else {
+ len += node.isRecursion() ? OPSize.MEMORY_END_REC : OPSize.MEMORY_END;
+ }
+ addOpcodeRelAddr(OPCode.JUMP, len);
+ }
+ } // USE_SUBEXP_CALL
+
+ if (bsAt(regex.btMemStart, node.regNum)) {
+ addOpcode(OPCode.MEMORY_START_PUSH);
+ } else {
+ addOpcode(OPCode.MEMORY_START);
+ }
+
+ addMemNum(node.regNum);
+ compileTree(node.target);
+
+ if (Config.USE_SUBEXP_CALL && node.isCalled()) {
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ addOpcode(node.isRecursion() ? OPCode.MEMORY_END_PUSH_REC : OPCode.MEMORY_END_PUSH);
+ } else {
+ addOpcode(node.isRecursion() ? OPCode.MEMORY_END_REC : OPCode.MEMORY_END);
+ }
+ addMemNum(node.regNum);
+ addOpcode(OPCode.RETURN);
+ } else { // USE_SUBEXP_CALL
+ if (bsAt(regex.btMemEnd, node.regNum)) {
+ addOpcode(OPCode.MEMORY_END_PUSH);
+ } else {
+ addOpcode(OPCode.MEMORY_END);
+ }
+ addMemNum(node.regNum);
+ }
+ break;
+
+ case EncloseType.STOP_BACKTRACK:
+ if (node.isStopBtSimpleRepeat()) {
+ QuantifierNode qn = (QuantifierNode)node.target;
+
+ compileTreeNTimes(qn.target, qn.lower);
+
+ len = compileLengthTree(qn.target);
+ addOpcodeRelAddr(OPCode.PUSH, len + OPSize.POP + OPSize.JUMP);
+ compileTree(qn.target);
+ addOpcode(OPCode.POP);
+ addOpcodeRelAddr(OPCode.JUMP, -(OPSize.PUSH + len + OPSize.POP + OPSize.JUMP));
+ } else {
+ addOpcode(OPCode.PUSH_STOP_BT);
+ compileTree(node.target);
+ addOpcode(OPCode.POP_STOP_BT);
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ break;
+ } // switch
+ }
+
+ private int compileLengthAnchorNode(AnchorNode node) {
+ int tlen;
+ if (node.target != null) {
+ tlen = compileLengthTree(node.target);
+ } else {
+ tlen = 0;
+ }
+
+ int len;
+ switch (node.type) {
+ case AnchorType.PREC_READ:
+ len = OPSize.PUSH_POS + tlen + OPSize.POP_POS;
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ len = OPSize.PUSH_POS_NOT + tlen + OPSize.FAIL_POS;
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ len = OPSize.LOOK_BEHIND + tlen;
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ len = OPSize.PUSH_LOOK_BEHIND_NOT + tlen + OPSize.FAIL_LOOK_BEHIND_NOT;
+ break;
+
+ default:
+ len = OPSize.OPCODE;
+ break;
+ } // switch
+ return len;
+ }
+
+ @Override
+ protected void compileAnchorNode(AnchorNode node) {
+ int len;
+ int n;
+
+ switch (node.type) {
+ case AnchorType.BEGIN_BUF: addOpcode(OPCode.BEGIN_BUF); break;
+ case AnchorType.END_BUF: addOpcode(OPCode.END_BUF); break;
+ case AnchorType.BEGIN_LINE: addOpcode(OPCode.BEGIN_LINE); break;
+ case AnchorType.END_LINE: addOpcode(OPCode.END_LINE); break;
+ case AnchorType.SEMI_END_BUF: addOpcode(OPCode.SEMI_END_BUF); break;
+ case AnchorType.BEGIN_POSITION: addOpcode(OPCode.BEGIN_POSITION); break;
+
+ case AnchorType.WORD_BOUND:
+ addOpcode(OPCode.WORD_BOUND);
+ break;
+
+ case AnchorType.NOT_WORD_BOUND:
+ addOpcode(OPCode.NOT_WORD_BOUND);
+ break;
+
+ case AnchorType.WORD_BEGIN:
+ if (Config.USE_WORD_BEGIN_END)
+ addOpcode(OPCode.WORD_BEGIN);
+ break;
+
+ case AnchorType.WORD_END:
+ if (Config.USE_WORD_BEGIN_END)
+ addOpcode(OPCode.WORD_END);
+ break;
+
+ case AnchorType.PREC_READ:
+ addOpcode(OPCode.PUSH_POS);
+ compileTree(node.target);
+ addOpcode(OPCode.POP_POS);
+ break;
+
+ case AnchorType.PREC_READ_NOT:
+ len = compileLengthTree(node.target);
+ addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS);
+ compileTree(node.target);
+ addOpcode(OPCode.FAIL_POS);
+ break;
+
+ case AnchorType.LOOK_BEHIND:
+ addOpcode(OPCode.LOOK_BEHIND);
+ if (node.charLength < 0) {
+ n = analyser.getCharLengthTree(node.target);
+ if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ } else {
+ n = node.charLength;
+ }
+ addLength(n);
+ compileTree(node.target);
+ break;
+
+ case AnchorType.LOOK_BEHIND_NOT:
+ len = compileLengthTree(node.target);
+ addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT);
+ if (node.charLength < 0) {
+ n = analyser.getCharLengthTree(node.target);
+ if (analyser.returnCode != 0) newSyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
+ } else {
+ n = node.charLength;
+ }
+ addLength(n);
+ compileTree(node.target);
+ addOpcode(OPCode.FAIL_LOOK_BEHIND_NOT);
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ private int compileLengthTree(Node node) {
+ int len = 0;
+
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ len += compileLengthTree(lin.car);
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ ConsAltNode aln = (ConsAltNode)node;
+ int n = 0;
+ do {
+ len += compileLengthTree(aln.car);
+ n++;
+ } while ((aln = aln.cdr) != null);
+ len += (OPSize.PUSH + OPSize.JUMP) * (n - 1);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.isRaw()) {
+ len = compileLengthStringRawNode(sn);
+ } else {
+ len = compileLengthStringNode(sn);
+ }
+ break;
+
+ case NodeType.CCLASS:
+ len = compileLengthCClassNode((CClassNode)node);
+ break;
+
+ case NodeType.CTYPE:
+ case NodeType.CANY:
+ len = OPSize.OPCODE;
+ break;
+
+ case NodeType.BREF:
+ BackRefNode br = (BackRefNode)node;
+
+ if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
+ len = OPSize.OPCODE + OPSize.OPTION + OPSize.LENGTH +
+ OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
+ } else { // USE_BACKREF_AT_LEVEL
+ if (br.backNum == 1) {
+ len = ((!isIgnoreCase(regex.options) && br.back[0] <= 2)
+ ? OPSize.OPCODE : (OPSize.OPCODE + OPSize.MEMNUM));
+ } else {
+ len = OPSize.OPCODE + OPSize.LENGTH + (OPSize.MEMNUM * br.backNum);
+ }
+ }
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ len = OPSize.CALL;
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ len = compileCECLengthQuantifierNode((QuantifierNode)node);
+ } else {
+ len = compileNonCECLengthQuantifierNode((QuantifierNode)node);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ len = compileLengthEncloseNode((EncloseNode)node);
+ break;
+
+ case NodeType.ANCHOR:
+ len = compileLengthAnchorNode((AnchorNode)node);
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } //switch
+ return len;
+ }
+
+ private void ensure(int size) {
+ if (size >= code.length) {
+ int length = code.length << 1;
+ while (length <= size) length <<= 1;
+ int[]tmp = new int[length];
+ System.arraycopy(code, 0, tmp, 0, code.length);
+ code = tmp;
+ }
+ }
+
+ private void addInt(int i) {
+ if (codeLength >= code.length) {
+ int[]tmp = new int[code.length << 1];
+ System.arraycopy(code, 0, tmp, 0, code.length);
+ code = tmp;
+ }
+ code[codeLength++] = i;
+ }
+
+ void setInt(int i, int offset) {
+ ensure(offset);
+ regex.code[offset] = i;
+ }
+
+ private void addObject(Object o) {
+ if (regex.operands == null) {
+ regex.operands = new Object[4];
+ } else if (regex.operandLength >= regex.operands.length) {
+ Object[]tmp = new Object[regex.operands.length << 1];
+ System.arraycopy(regex.operands, 0, tmp, 0, regex.operands.length);
+ regex.operands = tmp;
+ }
+ addInt(regex.operandLength);
+ regex.operands[regex.operandLength++] = o;
+ }
+
+ private void addChars(char[] chars, int p ,int length) {
+ ensure(codeLength + length);
+ int end = p + length;
+
+ while (p < end) code[codeLength++] = chars[p++];
+ }
+
+ private void addInts(int[]ints, int length) {
+ ensure(codeLength + length);
+ System.arraycopy(ints, 0, code, codeLength, length);
+ codeLength += length;
+ }
+
+ private void addOpcode(int opcode) {
+ addInt(opcode);
+
+ switch(opcode) {
+ case OPCode.ANYCHAR_STAR:
+ case OPCode.ANYCHAR_STAR_SB:
+ case OPCode.ANYCHAR_ML_STAR:
+ case OPCode.ANYCHAR_ML_STAR_SB:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
+ case OPCode.STATE_CHECK_ANYCHAR_STAR:
+ case OPCode.STATE_CHECK_ANYCHAR_STAR_SB:
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:
+ case OPCode.MEMORY_START_PUSH:
+ case OPCode.MEMORY_END_PUSH:
+ case OPCode.MEMORY_END_PUSH_REC:
+ case OPCode.MEMORY_END_REC:
+ case OPCode.NULL_CHECK_START:
+ case OPCode.NULL_CHECK_END_MEMST_PUSH:
+ case OPCode.PUSH:
+ case OPCode.STATE_CHECK_PUSH:
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP:
+ case OPCode.STATE_CHECK:
+ case OPCode.PUSH_OR_JUMP_EXACT1:
+ case OPCode.PUSH_IF_PEEK_NEXT:
+ case OPCode.REPEAT:
+ case OPCode.REPEAT_NG:
+ case OPCode.REPEAT_INC_SG:
+ case OPCode.REPEAT_INC_NG:
+ case OPCode.REPEAT_INC_NG_SG:
+ case OPCode.PUSH_POS:
+ case OPCode.PUSH_POS_NOT:
+ case OPCode.PUSH_STOP_BT:
+ case OPCode.PUSH_LOOK_BEHIND_NOT:
+ case OPCode.CALL:
+ case OPCode.RETURN: // it will appear only with CALL though
+ regex.stackNeeded = true;
+ }
+ }
+
+ private void addStateCheckNum(int num) {
+ addInt(num);
+ }
+
+ private void addRelAddr(int addr) {
+ addInt(addr);
+ }
+
+ private void addAbsAddr(int addr) {
+ addInt(addr);
+ }
+
+ private void addLength(int length) {
+ addInt(length);
+ }
+
+ private void addMemNum(int num) {
+ addInt(num);
+ }
+
+ private void addPointer(Object o) {
+ addObject(o);
+ }
+
+ private void addOption(int option) {
+ addInt(option);
+ }
+
+ private void addOpcodeRelAddr(int opcode, int addr) {
+ addOpcode(opcode);
+ addRelAddr(addr);
+ }
+
+ private void addOpcodeOption(int opcode, int option) {
+ addOpcode(opcode);
+ addOption(option);
+ }
+
+ private void addTemplate(char[] chars) {
+ if (templateNum == 0) {
+ templates = new char[2][];
+ } else if (templateNum == templates.length) {
+ char[][] tmp = new char[templateNum * 2][];
+ System.arraycopy(templates, 0, tmp, 0, templateNum);
+ templates = tmp;
+ }
+ templates[templateNum++] = chars;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/AsmCompiler.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,109 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
+
+final class AsmCompiler extends AsmCompilerSupport {
+
+ public AsmCompiler(Analyser analyser) {
+ super(analyser);
+ }
+
+ @Override
+ protected void prepare() {
+ REG_NUM++;
+ prepareMachine();
+ prepareMachineInit();
+ prepareMachineMatch();
+
+ prepareFactory();
+ prepareFactoryInit();
+ }
+
+ @Override
+ protected void finish() {
+ setupFactoryInit();
+
+ setupMachineInit();
+ setupMachineMatch();
+
+ setupClasses();
+ }
+
+ @Override
+ protected void compileAltNode(ConsAltNode node) {
+ }
+
+ @Override
+ protected void addCompileString(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase) {
+ String template = installTemplate(chars, p, strLength);
+ }
+
+ @Override
+ protected void compileCClassNode(CClassNode node) {
+ if (node.bs != null) {
+ String bitsetName = installBitSet(node.bs.bits);
+ }
+ }
+
+ @Override
+ protected void compileCTypeNode(CTypeNode node) {
+ }
+
+ @Override
+ protected void compileAnyCharNode() {
+ }
+
+ @Override
+ protected void compileBackrefNode(BackRefNode node) {
+ }
+
+ @Override
+ protected void compileCallNode(CallNode node) {
+ }
+
+ @Override
+ protected void compileCECQuantifierNode(QuantifierNode node) {
+ }
+
+ @Override
+ protected void compileNonCECQuantifierNode(QuantifierNode node) {
+ }
+
+ @Override
+ protected void compileOptionNode(EncloseNode node) {
+ }
+
+ @Override
+ protected void compileEncloseNode(EncloseNode node) {
+ }
+
+ @Override
+ protected void compileAnchorNode(AnchorNode node) {
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/AsmCompilerSupport.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,267 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AsmConstants;
+import jdk.internal.org.objectweb.asm.ClassWriter;
+import jdk.internal.org.objectweb.asm.MethodVisitor;
+import jdk.internal.org.objectweb.asm.Opcodes;
+
+abstract class AsmCompilerSupport extends Compiler implements Opcodes, AsmConstants {
+ protected ClassWriter factory; // matcher allocator, also bit set, code rage and string template container
+ protected MethodVisitor factoryInit;// factory constructor
+ protected String factoryName;
+
+ protected ClassWriter machine; // matcher
+ protected MethodVisitor machineInit;// matcher constructor
+ protected MethodVisitor match; // actual matcher implementation (the matchAt method)
+ protected String machineName;
+
+ // we will? try to manage visitMaxs ourselves for efficiency
+ protected int maxStack = 1;
+ protected int maxVars = LAST_INDEX;
+
+ // for field generation
+ protected int bitsets, ranges, templates;
+
+ // simple class name postfix scheme for now
+ static int REG_NUM = 0;
+
+ // dummy class loader for now
+ private static final class DummyClassLoader extends ClassLoader {
+ public Class<?> defineClass(String name, byte[] bytes) {
+ return super.defineClass(name, bytes, 0, bytes.length);
+ }
+ };
+
+ private static final DummyClassLoader loader = new DummyClassLoader();
+
+ AsmCompilerSupport(Analyser analyser) {
+ super(analyser);
+ }
+
+ protected final void prepareFactory() {
+ factory = new ClassWriter(ClassWriter.COMPUTE_MAXS);
+ factoryName = "jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory" + REG_NUM;
+
+ factory.visit(V1_4, ACC_PUBLIC + ACC_FINAL, factoryName, null, "jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory", null);
+
+ MethodVisitor create = factory.visitMethod(ACC_SYNTHETIC, "create", "(Lorg/joni/Regex;[BII)Lorg/joni/Matcher;", null, null);
+ create.visitTypeInsn(NEW, machineName);
+ create.visitInsn(DUP); // instance
+ create.visitVarInsn(ALOAD, 1); // Regex
+ create.visitVarInsn(ALOAD, 2); // bytes[]
+ create.visitVarInsn(ILOAD, 3); // p
+ create.visitVarInsn(ILOAD, 4); // end
+ create.visitMethodInsn(INVOKESPECIAL, machineName, "<init>", "(Lorg/joni/Regex;[BII)V");
+ create.visitInsn(ARETURN);
+ create.visitMaxs(0, 0);
+ //create.visitMaxs(6, 5);
+ create.visitEnd();
+ }
+
+ protected final void prepareFactoryInit() {
+ factoryInit = factory.visitMethod(ACC_PUBLIC, "<init>", "()V", null, null);
+ factoryInit.visitVarInsn(ALOAD, 0);
+ factoryInit.visitMethodInsn(INVOKESPECIAL, "jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory", "<init>", "()V");
+ }
+
+ protected final void setupFactoryInit() {
+ factoryInit.visitInsn(RETURN);
+ factoryInit.visitMaxs(0, 0);
+ //init.visitMaxs(1, 1);
+ factoryInit.visitEnd();
+ }
+
+ protected final void prepareMachine() {
+ machine = new ClassWriter(ClassWriter.COMPUTE_MAXS);
+ machineName = "jdk/nashorn/internal/runtime/regexp/joni/NativeMachine" + REG_NUM;
+ }
+
+ protected final void prepareMachineInit() {
+ machine.visit(V1_4, ACC_PUBLIC + ACC_FINAL, machineName, null, "jdk/nashorn/internal/runtime/regexp/joni/NativeMachine", null);
+ machineInit = machine.visitMethod(ACC_PROTECTED, "<init>", "(Lorg/joni/Regex;[BII)V", null, null);
+ machineInit.visitVarInsn(ALOAD, THIS); // this
+ machineInit.visitVarInsn(ALOAD, 1); // Regex
+ machineInit.visitVarInsn(ALOAD, 2); // bytes[]
+ machineInit.visitVarInsn(ILOAD, 3); // p
+ machineInit.visitVarInsn(ILOAD, 4); // end
+ machineInit.visitMethodInsn(INVOKESPECIAL, "jdk/nashorn/internal/runtime/regexp/joni/NativeMachine", "<init>", "(Lorg/joni/Regex;[BII)V");
+ }
+
+ protected final void setupMachineInit() {
+ if (bitsets + ranges + templates > 0) { // ok, some of these are in use, we'd like to cache the factory
+ machine.visitField(ACC_PRIVATE + ACC_FINAL, "factory", "L" + factoryName + ";", null, null);
+ machineInit.visitVarInsn(ALOAD, THIS); // this
+ machineInit.visitVarInsn(ALOAD, 1); // this, Regex
+ machineInit.visitFieldInsn(GETFIELD, "jdk/nashorn/internal/runtime/regexp/joni/Regex", "factory", "Lorg/joni/MatcherFactory;"); // this, factory
+ machineInit.visitTypeInsn(CHECKCAST, factoryName);
+ machineInit.visitFieldInsn(PUTFIELD, machineName, "factory", "L" + factoryName + ";"); // []
+ }
+
+ machineInit.visitInsn(RETURN);
+ machineInit.visitMaxs(0, 0);
+ //init.visitMaxs(5, 5);
+ machineInit.visitEnd();
+ }
+
+ protected final void prepareMachineMatch() {
+ match = machine.visitMethod(ACC_SYNTHETIC, "matchAt", "(III)I", null, null);
+ move(S, SSTART); // s = sstart
+ load("bytes", "[B"); //
+ astore(BYTES); // byte[]bytes = this.bytes
+ }
+
+ protected final void setupMachineMatch() {
+ match.visitInsn(ICONST_M1);
+ match.visitInsn(IRETURN);
+
+ match.visitMaxs(maxStack, maxVars);
+ match.visitEnd();
+ }
+
+ protected final void setupClasses() {
+ byte[]factoryCode = factory.toByteArray();
+ byte[]machineCode = machine.toByteArray();
+
+ if (Config.DEBUG_ASM) {
+ try {
+ FileOutputStream fos;
+ fos = new FileOutputStream(factoryName.substring(factoryName.lastIndexOf('/') + 1) + ".class");
+ fos.write(factoryCode);
+ fos.close();
+ fos = new FileOutputStream(machineName.substring(machineName.lastIndexOf('/') + 1) + ".class");
+ fos.write(machineCode);
+ fos.close();
+ } catch (IOException ioe) {
+ ioe.printStackTrace(Config.err);
+ }
+ }
+
+ loader.defineClass(machineName.replace('/', '.'), machineCode);
+ Class<?> cls = loader.defineClass(factoryName.replace('/', '.'), factoryCode);
+ try {
+ regex.factory = (MatcherFactory)cls.newInstance();
+ } catch(Exception e) {
+ e.printStackTrace(Config.err);
+ }
+ }
+
+ protected final void aload(int var) {
+ match.visitVarInsn(ALOAD, var);
+ }
+
+ protected final void astore(int var) {
+ match.visitVarInsn(ASTORE, var);
+ }
+
+ protected final void loadThis() {
+ match.visitVarInsn(ALOAD, THIS);
+ }
+
+ protected final void load(int var) {
+ match.visitVarInsn(ILOAD, var);
+ }
+
+ protected final void store(int var) {
+ match.visitVarInsn(ISTORE, var);
+ }
+
+ protected final void move(int to, int from) {
+ load(from);
+ store(to);
+ }
+
+ protected final void load(String field, String singature) {
+ loadThis();
+ match.visitFieldInsn(GETFIELD, machineName, field, singature);
+ }
+
+ protected final void load(String field) {
+ load(field, "I");
+ }
+
+ protected final void store(String field, String singature) {
+ loadThis();
+ match.visitFieldInsn(PUTFIELD, machineName, field, singature);
+ }
+
+ protected final void store(String field) {
+ store(field, "I");
+ }
+
+ protected final String installTemplate(char[] arr, int p, int length) {
+ String templateName = TEMPLATE + ++templates;
+ installArray(templateName, arr, p, length);
+ return templateName;
+ }
+
+ protected final String installCodeRange(int[]arr) {
+ String coreRangeName = CODERANGE + ++ranges;
+ installArray(coreRangeName, arr);
+ return coreRangeName;
+ }
+
+ protected final String installBitSet(int[]arr) {
+ String bitsetName = BITSET + ++bitsets;
+ installArray(bitsetName, arr);
+ return bitsetName;
+ }
+
+ private void installArray(String name, int[]arr) {
+ factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[I", null, null);
+ factoryInit.visitVarInsn(ALOAD, THIS); // this;
+ loadInt(factoryInit, arr.length); // this, length
+ factoryInit.visitIntInsn(NEWARRAY, T_INT); // this, arr
+ for (int i=0;i < arr.length; i++) buildArray(i, arr[i], IASTORE);
+ factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[I");
+ }
+
+ private void installArray(String name, char[]arr, int p, int length) {
+ factory.visitField(ACC_PRIVATE + ACC_FINAL, name, "[B", null, null);
+ factoryInit.visitVarInsn(ALOAD, THIS); // this;
+ loadInt(factoryInit, arr.length); // this, length
+ factoryInit.visitIntInsn(NEWARRAY, T_BYTE); // this, arr
+ for (int i=p, j=0; i < p + length; i++, j++) buildArray(j, arr[i] & 0xff, BASTORE);
+ factoryInit.visitFieldInsn(PUTFIELD, factoryName, name, "[B");
+ }
+
+ private void buildArray(int index, int value, int type) {
+ factoryInit.visitInsn(DUP); // ... arr, arr
+ loadInt(factoryInit, index); // ... arr, arr, index
+ loadInt(factoryInit, value); // ... arr, arr, index, value
+ factoryInit.visitInsn(type); // ... arr
+ }
+
+ private void loadInt(MethodVisitor mv, int value) {
+ if (value >= -1 && value <= 5) {
+ mv.visitInsn(value + ICONST_0); // ICONST_0 == 3
+ } else if (value >= 6 && value <= 127 || value >= -128 && value <= -2) {
+ mv.visitIntInsn(BIPUSH, value);
+ } else if (value >= 128 && value <= 32767 || value >= -32768 && value <= -129) {
+ mv.visitIntInsn(SIPUSH, value);
+ } else {
+ mv.visitLdcInsn(new Integer(value));
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/BitSet.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,115 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public final class BitSet {
+ static final int BITS_PER_BYTE = 8;
+ public static final int SINGLE_BYTE_SIZE = (1 << BITS_PER_BYTE);
+ private static final int BITS_IN_ROOM = 4 * BITS_PER_BYTE;
+ static final int BITSET_SIZE = (SINGLE_BYTE_SIZE / BITS_IN_ROOM);
+ static final int ROOM_SHIFT = log2(BITS_IN_ROOM);
+
+ final int[] bits = new int[BITSET_SIZE];
+
+ private static final int BITS_TO_STRING_WRAP = 4;
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("BitSet");
+ for (int i=0; i<SINGLE_BYTE_SIZE; i++) {
+ if ((i % (SINGLE_BYTE_SIZE / BITS_TO_STRING_WRAP)) == 0) buffer.append("\n ");
+ buffer.append(at(i) ? "1" : "0");
+ }
+ return buffer.toString();
+ }
+
+ public boolean at(int pos) {
+ return (bits[pos >>> ROOM_SHIFT] & bit(pos)) != 0;
+ }
+
+ public void set(int pos) {
+ bits[pos >>> ROOM_SHIFT] |= bit(pos);
+ }
+
+ public void clear(int pos) {
+ bits[pos >>> ROOM_SHIFT] &= ~bit(pos);
+ }
+
+ public void invert(int pos) {
+ bits[pos >>> ROOM_SHIFT] ^= bit(pos);
+ }
+
+ public void clear() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i]=0;
+ }
+
+ public boolean isEmpty() {
+ for (int i=0; i<BITSET_SIZE; i++) {
+ if (bits[i] != 0) return false;
+ }
+ return true;
+ }
+
+ public void setRange(int from, int to) {
+ for (int i=from; i<=to && i < SINGLE_BYTE_SIZE; i++) set(i);
+ }
+
+ public void setAll() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~0;
+ }
+
+ public void invert() {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = ~bits[i];
+ }
+
+ public void invertTo(BitSet to) {
+ for (int i=0; i<BITSET_SIZE; i++) to.bits[i] = ~bits[i];
+ }
+
+ public void and(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] &= other.bits[i];
+ }
+
+ public void or(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] |= other.bits[i];
+ }
+
+ public void copy(BitSet other) {
+ for (int i=0; i<BITSET_SIZE; i++) bits[i] = other.bits[i];
+ }
+
+ public int numOn() {
+ int num = 0;
+ for (int i=0; i<SINGLE_BYTE_SIZE; i++) {
+ if (at(i)) num++;
+ }
+ return num;
+ }
+
+ static int bit(int pos){
+ return 1 << (pos % SINGLE_BYTE_SIZE);
+ }
+
+ private static int log2(int n){
+ int log = 0;
+ while ((n >>>= 1) != 0) log++;
+ return log;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/BitStatus.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,55 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+final class BitStatus {
+ public static final int BIT_STATUS_BITS_NUM = 4 * 8;
+
+ public static int bsClear() {
+ return 0;
+ }
+ public static int bsAll() {
+ return -1;
+ }
+ public static boolean bsAt(int stats, int n) {
+ return (n < BIT_STATUS_BITS_NUM ? stats & (1 << n) : (stats & 1)) != 0;
+ }
+ public static int bsOnAt(int stats, int n) {
+ if (n < BIT_STATUS_BITS_NUM) {
+ stats |= (1 << n);
+ } else {
+ stats |= 1;
+ }
+ return stats;
+ }
+ public static int bsOnAtSimple(int stats, int n) {
+ if (n < BIT_STATUS_BITS_NUM) stats |= (1 << n);
+ return stats;
+ }
+
+ public static int bsOnOff(int v, int f, boolean negative) {
+ if (negative) {
+ v &= ~f;
+ } else {
+ v |= f;
+ }
+ return v;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,1462 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
+import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl;
+import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+
+class ByteCodeMachine extends StackMachine {
+ private int bestLen; // return value
+ private int s = 0; // current char
+
+ private int range; // right range
+ private int sprev;
+ private int sstart;
+ private int sbegin;
+
+ private final int[]code; // byte code
+ private int ip; // instruction pointer
+
+ ByteCodeMachine(Regex regex, char[] chars, int p, int end) {
+ super(regex, chars, p, end);
+ this.code = regex.code;
+ }
+
+ protected int stkp; // a temporary
+ private boolean makeCaptureHistoryTree(CaptureTreeNode node) {
+ //CaptureTreeNode child;
+ int k = stkp;
+ //int k = kp;
+
+ while (k < stk) {
+ StackEntry e = stack[k];
+ if (e.type == MEM_START) {
+ int n = e.getMemNum();
+ if (n <= Config.MAX_CAPTURE_HISTORY_GROUP && bsAt(regex.captureHistory, n)) {
+ CaptureTreeNode child = new CaptureTreeNode();
+ child.group = n;
+ child.beg = e.getMemPStr() - str;
+ node.addChild(child);
+ stkp = k + 1;
+ if (makeCaptureHistoryTree(child)) return true;
+
+ k = stkp;
+ child.end = e.getMemPStr() - str;
+ }
+ } else if (e.type == MEM_END) {
+ if (e.getMemNum() == node.group) {
+ node.end = e.getMemPStr() - str;
+ stkp = k;
+ return false;
+ }
+ }
+ }
+ return true; /* 1: root node ending. */
+ }
+
+ private void checkCaptureHistory(Region region) {
+ CaptureTreeNode node;
+ if (region.historyRoot == null) {
+ node = region.historyRoot = new CaptureTreeNode();
+ } else {
+ node = region.historyRoot;
+ node.clear();
+ }
+
+ // was clear ???
+ node.group = 0;
+ node.beg = sstart - str;
+ node.end = s - str;
+
+ stkp = 0;
+ makeCaptureHistoryTree(region.historyRoot);
+ }
+
+ private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen, int textEnd) {
+
+ int s2 = ps2.value;
+ int end1 = s1 + mbLen;
+
+ while (s1 < end1) {
+ char c1 = Character.toLowerCase(chars[s1++]);
+ char c2 = Character.toLowerCase(chars[s2++]);
+
+ if (c1 != c2) {
+ return false;
+ }
+ }
+ ps2.value = s2;
+ return true;
+ }
+
+ private void debugMatchBegin() {
+ Config.log.println("match_at: " +
+ "str: " + str +
+ ", end: " + end +
+ ", start: " + this.sstart +
+ ", sprev: " + this.sprev);
+ Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str));
+ }
+
+ private void debugMatchLoop() {
+ if (Config.DEBUG_MATCH) {
+ Config.log.printf("%4d", (s - str)).print("> \"");
+ int q, i;
+ for (i=0, q=s; i<7 && q<end && s>=0; i++) {
+ if (q < end) Config.log.print(new String(new char[]{chars[q++]}));
+ }
+ String str = q < end ? "...\"" : "\"";
+ q += str.length();
+ Config.log.print(str);
+ for (i=0; i<20-(q-s);i++) Config.log.print(" ");
+ StringBuilder sb = new StringBuilder();
+ new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
+ Config.log.println(sb.toString());
+ }
+ }
+
+ protected final int matchAt(int range, int sstart, int sprev) {
+ this.range = range;
+ this.sstart = sstart;
+ this.sprev = sprev;
+
+ stk = 0;
+ ip = 0;
+
+ if (Config.DEBUG_MATCH) debugMatchBegin();
+
+ init();
+
+ bestLen = -1;
+ s = sstart;
+
+ final int[]code = this.code;
+ while (true) {
+ if (Config.DEBUG_MATCH) debugMatchLoop();
+
+ sbegin = s;
+ switch (code[ip++]) {
+ case OPCode.END: if (opEnd()) return finish(); break;
+ case OPCode.EXACT1: opExact1(); break;
+ case OPCode.EXACT2: opExact2(); continue;
+ case OPCode.EXACT3: opExact3(); continue;
+ case OPCode.EXACT4: opExact4(); continue;
+ case OPCode.EXACT5: opExact5(); continue;
+ case OPCode.EXACTN: opExactN(); continue;
+
+ case OPCode.EXACTMB2N1: opExactMB2N1(); break;
+ case OPCode.EXACTMB2N2: opExactMB2N2(); continue;
+ case OPCode.EXACTMB2N3: opExactMB2N3(); continue;
+ case OPCode.EXACTMB2N: opExactMB2N(); continue;
+ case OPCode.EXACTMB3N: opExactMB3N(); continue;
+ case OPCode.EXACTMBN: opExactMBN(); continue;
+
+ case OPCode.EXACT1_IC: opExact1IC(); break;
+ case OPCode.EXACTN_IC: opExactNIC(); continue;
+
+ case OPCode.CCLASS: opCClass(); break;
+ case OPCode.CCLASS_MB: opCClassMB(); break;
+ case OPCode.CCLASS_MIX: opCClassMIX(); break;
+ case OPCode.CCLASS_NOT: opCClassNot(); break;
+ case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break;
+ case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break;
+ case OPCode.CCLASS_NODE: opCClassNode(); break;
+
+ case OPCode.ANYCHAR: opAnyChar(); break;
+ case OPCode.ANYCHAR_ML: opAnyCharML(); break;
+ case OPCode.ANYCHAR_STAR: opAnyCharStar(); break;
+ case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break;
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break;
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_STAR: opStateCheckAnyCharStar(); break;
+ case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:opStateCheckAnyCharMLStar();break;
+
+ case OPCode.WORD: opWord(); break;
+ case OPCode.NOT_WORD: opNotWord(); break;
+ case OPCode.WORD_BOUND: opWordBound(); continue;
+ case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue;
+ case OPCode.WORD_BEGIN: opWordBegin(); continue;
+ case OPCode.WORD_END: opWordEnd(); continue;
+
+ case OPCode.BEGIN_BUF: opBeginBuf(); continue;
+ case OPCode.END_BUF: opEndBuf(); continue;
+ case OPCode.BEGIN_LINE: opBeginLine(); continue;
+ case OPCode.END_LINE: opEndLine(); continue;
+ case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue;
+ case OPCode.BEGIN_POSITION: opBeginPosition(); continue;
+
+ case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue;
+ case OPCode.MEMORY_START: opMemoryStart(); continue;
+ case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue;
+ case OPCode.MEMORY_END: opMemoryEnd(); continue;
+ case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue;
+ case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue;
+
+ case OPCode.BACKREF1: opBackRef1(); continue;
+ case OPCode.BACKREF2: opBackRef2(); continue;
+ case OPCode.BACKREFN: opBackRefN(); continue;
+ case OPCode.BACKREFN_IC: opBackRefNIC(); continue;
+ case OPCode.BACKREF_MULTI: opBackRefMulti(); continue;
+ case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue;
+ case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue;
+
+ case OPCode.NULL_CHECK_START: opNullCheckStart(); continue;
+ case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue;
+ case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue;
+ case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue;
+
+ case OPCode.JUMP: opJump(); continue;
+ case OPCode.PUSH: opPush(); continue;
+
+ // CEC
+ case OPCode.STATE_CHECK_PUSH: opStateCheckPush(); continue;
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP: opStateCheckPushOrJump(); continue;
+ case OPCode.STATE_CHECK: opStateCheck(); continue;
+
+ case OPCode.POP: opPop(); continue;
+ case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue;
+ case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue;
+
+ case OPCode.REPEAT: opRepeat(); continue;
+ case OPCode.REPEAT_NG: opRepeatNG(); continue;
+ case OPCode.REPEAT_INC: opRepeatInc(); continue;
+ case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue;
+ case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue;
+ case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue;
+
+ case OPCode.PUSH_POS: opPushPos(); continue;
+ case OPCode.POP_POS: opPopPos(); continue;
+ case OPCode.PUSH_POS_NOT: opPushPosNot(); continue;
+ case OPCode.FAIL_POS: opFailPos(); continue;
+ case OPCode.PUSH_STOP_BT: opPushStopBT(); continue;
+ case OPCode.POP_STOP_BT: opPopStopBT(); continue;
+
+ case OPCode.LOOK_BEHIND: opLookBehind(); continue;
+ case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue;
+ case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue;
+
+ // USE_SUBEXP_CALL
+ case OPCode.CALL: opCall(); continue;
+ case OPCode.RETURN: opReturn(); continue;
+
+ case OPCode.FINISH:
+ return finish();
+
+ case OPCode.FAIL: opFail(); continue;
+
+ default:
+ throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE);
+
+ } // main switch
+ } // main while
+ }
+
+ private boolean opEnd() {
+ int n = s - sstart;
+
+ if (n > bestLen) {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (isFindLongest(regex.options)) {
+ if (n > msaBestLen) {
+ msaBestLen = n;
+ msaBestS = sstart;
+ } else {
+ // goto end_best_len;
+ return endBestLength();
+ }
+ }
+ } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+
+ bestLen = n;
+ final Region region = msaRegion;
+ if (region != null) {
+ // USE_POSIX_REGION_OPTION ... else ...
+ region.beg[0] = msaBegin = sstart - str;
+ region.end[0] = msaEnd = s - str;
+ for (int i = 1; i <= regex.numMem; i++) {
+ // opt!
+ if (repeatStk[memEndStk + i] != INVALID_INDEX) {
+ region.beg[i] = bsAt(regex.btMemStart, i) ?
+ stack[repeatStk[memStartStk + i]].getMemPStr() - str :
+ repeatStk[memStartStk + i] - str;
+
+
+ region.end[i] = bsAt(regex.btMemEnd, i) ?
+ stack[repeatStk[memEndStk + i]].getMemPStr() :
+ repeatStk[memEndStk + i] - str;
+
+ } else {
+ region.beg[i] = region.end[i] = Region.REGION_NOTPOS;
+ }
+
+ }
+
+ if (Config.USE_CAPTURE_HISTORY) {
+ if (regex.captureHistory != 0) checkCaptureHistory(region);
+ }
+ } else {
+ msaBegin = sstart - str;
+ msaEnd = s - str;
+ }
+ } else {
+ Region region = msaRegion;
+ if (Config.USE_POSIX_API_REGION_OPTION) {
+ if (!isPosixRegion(regex.options)) {
+ if (region != null) {
+ region.clear();
+ } else {
+ msaBegin = msaEnd = 0;
+ }
+ }
+ } else {
+ if (region != null) {
+ region.clear();
+ } else {
+ msaBegin = msaEnd = 0;
+ }
+ } // USE_POSIX_REGION_OPTION
+ }
+ // end_best_len:
+ /* default behavior: return first-matching result. */
+ return endBestLength();
+ }
+
+ private boolean endBestLength() {
+ if (isFindCondition(regex.options)) {
+ if (isFindNotEmpty(regex.options) && s == sstart) {
+ bestLen = -1;
+ {opFail(); return false;} /* for retry */
+ }
+ if (isFindLongest(regex.options) && s < range) {
+ {opFail(); return false;} /* for retry */
+ }
+ }
+ // goto finish;
+ return true;
+ }
+
+ private void opExact1() {
+ if (s >= range || code[ip] != chars[s++]) {opFail(); return;}
+ //if (s > range) {opFail(); return;}
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExact2() {
+ if (s + 2 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact3() {
+ if (s + 3 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact4() {
+ if (s + 4 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExact5() {
+ if (s + 5 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ sprev = s;
+ ip++; s++;
+ }
+
+ private void opExactN() {
+ int tlen = code[ip++];
+ if (s + tlen > range) {opFail(); return;}
+
+ if (Config.USE_STRING_TEMPLATES) {
+ char[] bs = regex.templates[code[ip++]];
+ int ps = code[ip++];
+
+ while (tlen-- > 0) if (bs[ps++] != chars[s++]) {opFail(); return;}
+
+ } else {
+ while (tlen-- > 0) if (code[ip++] != chars[s++]) {opFail(); return;}
+ }
+ sprev = s - 1;
+ }
+
+ private void opExactMB2N1() {
+ if (s + 2 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExactMB2N2() {
+ if (s + 4 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = s;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ }
+
+ private void opExactMB2N3() {
+ if (s + 6 > range) {opFail(); return;}
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ sprev = s;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ }
+
+ private void opExactMB2N() {
+ int tlen = code[ip++];
+ if (s + tlen * 2 > range) {opFail(); return;}
+
+ if (Config.USE_STRING_TEMPLATES) {
+ char[] bs = regex.templates[code[ip++]];
+ int ps = code[ip++];
+
+ while(tlen-- > 0) {
+ if (bs[ps] != chars[s]) {opFail(); return;}
+ ps++; s++;
+ if (bs[ps] != chars[s]) {opFail(); return;}
+ ps++; s++;
+ }
+ } else {
+ while(tlen-- > 0) {
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ }
+ sprev = s - 2;
+ }
+
+ private void opExactMB3N() {
+ int tlen = code[ip++];
+ if (s + tlen * 3 > range) {opFail(); return;}
+
+ if (Config.USE_STRING_TEMPLATES) {
+ char[] bs = regex.templates[code[ip++]];
+ int ps = code[ip++];
+
+ while (tlen-- > 0) {
+ if (bs[ps] != chars[s]) {opFail(); return;}
+ ps++; s++;
+ if (bs[ps] != chars[s]) {opFail(); return;}
+ ps++; s++;
+ if (bs[ps] != chars[s]) {opFail(); return;}
+ ps++; s++;
+ }
+ } else {
+ while (tlen-- > 0) {
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ }
+
+ sprev = s - 3;
+ }
+
+ private void opExactMBN() {
+ int tlen = code[ip++]; /* mb-len */
+ int tlen2= code[ip++]; /* string len */
+
+ tlen2 *= tlen;
+ if (s + tlen2 > range) {opFail(); return;}
+
+ if (Config.USE_STRING_TEMPLATES) {
+ char[] bs = regex.templates[code[ip++]];
+ int ps = code[ip++];
+
+ while (tlen2-- > 0) {
+ if (bs[ps] != chars[s]) {opFail(); return;}
+ ps++; s++;
+ }
+ } else {
+ while (tlen2-- > 0) {
+ if (code[ip] != chars[s]) {opFail(); return;}
+ ip++; s++;
+ }
+ }
+
+ sprev = s - tlen;
+ }
+
+ private void opExact1IC() {
+ if (s >= range || code[ip] != Character.toLowerCase(chars[s++])) {opFail(); return;}
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opExactNIC() {
+ int tlen = code[ip++];
+ if (s + tlen > range) {opFail(); return;}
+
+ if (Config.USE_STRING_TEMPLATES) {
+ char[] bs = regex.templates[code[ip++]];
+ int ps = code[ip++];
+
+ while (tlen-- > 0) if (bs[ps++] != Character.toLowerCase(chars[s++])) {opFail(); return;}
+ } else {
+
+ while (tlen-- > 0) if (code[ip++] != Character.toLowerCase(chars[s++])) {opFail(); return;}
+ }
+ sprev = s - 1;
+ }
+
+ private boolean isInBitSet() {
+ int c = chars[s];
+ return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
+ }
+
+ private void opCClass() {
+ if (s >= range || !isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private boolean isInClassMB() {
+ int tlen = code[ip++];
+ if (s >= range) return false;
+ int ss = s;
+ s++;
+ int c = chars[ss];
+ if (!EncodingHelper.isInCodeRange(code, ip, c)) return false;
+ ip += tlen;
+ return true;
+ }
+
+ private void opCClassMB() {
+ // beyond string check
+ if (s >= range || chars[s] <= 0xff) {opFail(); return;}
+ if (!isInClassMB()) {opFail(); return;} // not!!!
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassMIX() {
+ if (s >= range) {opFail(); return;}
+ if (chars[s] > 0xff) {
+ ip += BitSet.BITSET_SIZE;
+ if (!isInClassMB()) {opFail(); return;}
+ } else {
+ if (!isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ int tlen = code[ip++]; // by code range length
+ ip += tlen;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNot() {
+ if (s >= range || isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private boolean isNotInClassMB() {
+ int tlen = code[ip++];
+
+ if (!(s + 1 <= range)) {
+ if (s >= range) return false;
+ s = end;
+ ip += tlen;
+ return true;
+ }
+
+ int ss = s;
+ s++;
+ int c = chars[ss];
+
+ if (EncodingHelper.isInCodeRange(code, ip, c)) return false;
+ ip += tlen;
+ return true;
+ }
+
+ private void opCClassMBNot() {
+ if (s >= range) {opFail(); return;}
+ if (chars[s] <= 0xff) {
+ s++;
+ int tlen = code[ip++];
+ ip += tlen;
+ sprev = sbegin; // break;
+ return;
+ }
+ if (!isNotInClassMB()) {opFail(); return;}
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassMIXNot() {
+ if (s >= range) {opFail(); return;}
+ if (chars[s] > 0xff) {
+ ip += BitSet.BITSET_SIZE;
+ if (!isNotInClassMB()) {opFail(); return;}
+ } else {
+ if (isInBitSet()) {opFail(); return;}
+ ip += BitSet.BITSET_SIZE;
+ int tlen = code[ip++];
+ ip += tlen;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opCClassNode() {
+ if (s >= range) {opFail(); return;}
+ CClassNode cc = (CClassNode)regex.operands[code[ip++]];
+ int ss = s;
+ s++;
+ int c = chars[ss];
+ if (!cc.isCodeInCCLength(c)) {opFail(); return;}
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyChar() {
+ if (s >= range) {opFail(); return;}
+ if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharML() {
+ if (s >= range) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStar() {
+ final char[] chars = this.chars;
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ if (isNewLine(chars, s, end)) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStar() {
+ while (s < range) {
+ pushAlt(ip, s, sprev);
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharStarPeekNext() {
+ final char c = (char)code[ip];
+ final char[] chars = this.chars;
+
+ while (s < range) {
+ char b = chars[s];
+ if (c == b) pushAlt(ip + 1, s, sprev);
+ if (b == EncodingHelper.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ private void opAnyCharMLStarPeekNext() {
+ final char c = (char)code[ip];
+ final char[] chars = this.chars;
+
+ while (s < range) {
+ if (c == chars[s]) pushAlt(ip + 1, s, sprev);
+ sprev = s;
+ s++;
+ }
+ ip++;
+ sprev = sbegin; // break;
+ }
+
+ // CEC
+ private void opStateCheckAnyCharStar() {
+ int mem = code[ip++];
+ final char[] chars = this.chars;
+
+ while (s < range) {
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;}
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ // CEC
+ private void opStateCheckAnyCharMLStar() {
+ int mem = code[ip++];
+
+ while (s < range) {
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushAltWithStateCheck(ip, s, sprev, mem);
+ sprev = s;
+ s++;
+ }
+ sprev = sbegin; // break;
+ }
+
+ private void opWord() {
+ if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opNotWord() {
+ if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;}
+ s++;
+ sprev = sbegin; // break;
+ }
+
+ private void opWordBound() {
+ if (s == str) {
+ if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
+ } else if (s == end) {
+ if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
+ } else {
+ if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
+ }
+ }
+
+ private void opNotWordBound() {
+ if (s == str) {
+ if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;}
+ } else if (s == end) {
+ if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
+ } else {
+ if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
+ }
+ }
+
+ private void opWordBegin() {
+ if (s < range && EncodingHelper.isWord(chars[s])) {
+ if (s == str || !EncodingHelper.isWord(chars[sprev])) return;
+ }
+ opFail();
+ }
+
+ private void opWordEnd() {
+ if (s != str && EncodingHelper.isWord(chars[sprev])) {
+ if (s == end || !EncodingHelper.isWord(chars[s])) return;
+ }
+ opFail();
+ }
+
+ private void opBeginBuf() {
+ if (s != str) opFail();
+ }
+
+ private void opEndBuf() {
+ if (s != end) opFail();
+ }
+
+ private void opBeginLine() {
+ if (s == str) {
+ if (isNotBol(msaOptions)) opFail();
+ return;
+ } else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) {
+ return;
+ }
+ opFail();
+ }
+
+ private void opEndLine() {
+ if (s == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) {
+ if (isNotEol(msaOptions)) opFail();
+ }
+ return;
+ } else {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ }
+ } else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) {
+ return;
+ }
+ opFail();
+ }
+
+ private void opSemiEndBuf() {
+ if (s == end) {
+ if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ if (str == end || !isNewLine(chars, sprev, end)) {
+ if (isNotEol(msaOptions)) opFail();
+ }
+ return;
+ } else {
+ if (isNotEol(msaOptions)) opFail();
+ return;
+ }
+ } else if (isNewLine(chars, s, end) && s + 1 == end) {
+ return;
+ } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) {
+ int ss = s + 2;
+ if (ss == end) return;
+ }
+ opFail();
+ }
+
+ private void opBeginPosition() {
+ if (s != msaStart) opFail();
+ }
+
+ private void opMemoryStartPush() {
+ int mem = code[ip++];
+ pushMemStart(mem, s);
+ }
+
+ private void opMemoryStart() {
+ int mem = code[ip++];
+ repeatStk[memStartStk + mem] = s;
+ }
+
+ private void opMemoryEndPush() {
+ int mem = code[ip++];
+ pushMemEnd(mem, s);
+ }
+
+ private void opMemoryEnd() {
+ int mem = code[ip++];
+ repeatStk[memEndStk + mem] = s;
+ }
+
+ private void opMemoryEndPushRec() {
+ int mem = code[ip++];
+ int stkp = getMemStart(mem); /* should be before push mem-end. */
+ pushMemEnd(mem, s);
+ repeatStk[memStartStk + mem] = stkp;
+ }
+
+ private void opMemoryEndRec() {
+ int mem = code[ip++];
+ repeatStk[memEndStk + mem] = s;
+ int stkp = getMemStart(mem);
+
+ if (BitStatus.bsAt(regex.btMemStart, mem)) {
+ repeatStk[memStartStk + mem] = stkp;
+ } else {
+ repeatStk[memStartStk + mem] = stack[stkp].getMemPStr();
+ }
+
+ pushMemEndMark(mem);
+ }
+
+ private boolean backrefInvalid(int mem) {
+ return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX;
+ }
+
+ private int backrefStart(int mem) {
+ return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem];
+ }
+
+ private int backrefEnd(int mem) {
+ return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem];
+ }
+
+ private void backref(int mem) {
+ /* if you want to remove following line,
+ you should check in parse and compile time. (numMem) */
+ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+
+ // STRING_CMP
+ while(n-- > 0) if (chars[pstart++] != chars[s++]) {opFail(); return;}
+
+ int len;
+
+ // beyond string check
+ if (sprev < range) {
+ while (sprev + 1 < s) sprev++;
+ }
+ }
+
+ private void opBackRef1() {
+ backref(1);
+ }
+
+ private void opBackRef2() {
+ backref(2);
+ }
+
+ private void opBackRefN() {
+ backref(code[ip++]);
+ }
+
+ private void opBackRefNIC() {
+ int mem = code[ip++];
+ /* if you want to remove following line,
+ you should check in parse and compile time. (numMem) */
+ if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+ sprev = s;
+
+ value = s;
+ if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;}
+ s = value;
+
+ int len;
+ // if (sprev < chars.length)
+ while (sprev + 1 < s) sprev++;
+ }
+
+ private void opBackRefMulti() {
+ int tlen = code[ip++];
+
+ int i;
+ loop:for (i=0; i<tlen; i++) {
+ int mem = code[ip++];
+ if (backrefInvalid(mem)) continue;
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+
+ sprev = s;
+ int swork = s;
+
+ while (n-- > 0) {
+ if (chars[pstart++] != chars[swork++]) continue loop;
+ }
+
+ s = swork;
+
+ int len;
+
+ // beyond string check
+ if (sprev < range) {
+ while (sprev + 1 < s) sprev++;
+ }
+
+ ip += tlen - i - 1; // * SIZE_MEMNUM (1)
+ break; /* success */
+ }
+ if (i == tlen) {opFail(); return;}
+ }
+
+ private void opBackRefMultiIC() {
+ int tlen = code[ip++];
+
+ int i;
+ loop:for (i=0; i<tlen; i++) {
+ int mem = code[ip++];
+ if (backrefInvalid(mem)) continue;
+
+ int pstart = backrefStart(mem);
+ int pend = backrefEnd(mem);
+
+ int n = pend - pstart;
+ if (s + n > range) {opFail(); return;}
+
+ sprev = s;
+
+ value = s;
+ if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) continue loop; // STRING_CMP_VALUE_IC
+ s = value;
+
+ int len;
+ // if (sprev < chars.length)
+ while (sprev + 1 < s) sprev++;
+
+ ip += tlen - i - 1; // * SIZE_MEMNUM (1)
+ break; /* success */
+ }
+ if (i == tlen) {opFail(); return;}
+ }
+
+ private boolean memIsInMemp(int mem, int num, int memp) {
+ for (int i=0; i<num; i++) {
+ int m = code[memp++];
+ if (mem == m) return true;
+ }
+ return false;
+ }
+
+ // USE_BACKREF_AT_LEVEL // (s) and (end) implicit
+ private boolean backrefMatchAtNestedLevel(boolean ignoreCase, int caseFoldFlag,
+ int nest, int memNum, int memp) {
+ int pend = -1;
+ int level = 0;
+ int k = stk - 1;
+
+ while (k >= 0) {
+ StackEntry e = stack[k];
+
+ if (e.type == CALL_FRAME) {
+ level--;
+ } else if (e.type == RETURN) {
+ level++;
+ } else if (level == nest) {
+ if (e.type == MEM_START) {
+ if (memIsInMemp(e.getMemNum(), memNum, memp)) {
+ int pstart = e.getMemPStr();
+ if (pend != -1) {
+ if (pend - pstart > end - s) return false; /* or goto next_mem; */
+ int p = pstart;
+
+ value = s;
+ if (ignoreCase) {
+ if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) {
+ return false; /* or goto next_mem; */
+ }
+ } else {
+ while (p < pend) {
+ if (chars[p++] != chars[value++]) return false; /* or goto next_mem; */
+ }
+ }
+ s = value;
+
+ return true;
+ }
+ }
+ } else if (e.type == MEM_END) {
+ if (memIsInMemp(e.getMemNum(), memNum, memp)) {
+ pend = e.getMemPStr();
+ }
+ }
+ }
+ k--;
+ }
+ return false;
+ }
+
+ private void opBackRefAtLevel() {
+ int ic = code[ip++];
+ int level = code[ip++];
+ int tlen = code[ip++];
+
+ sprev = s;
+ if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
+ int len;
+ while (sprev + 1 < s) sprev++;
+ ip += tlen; // * SIZE_MEMNUM
+ } else {
+ {opFail(); return;}
+ }
+ }
+
+ /* no need: IS_DYNAMIC_OPTION() == 0 */
+ private void opSetOptionPush() {
+ // option = code[ip++]; // final for now
+ pushAlt(ip, s, sprev);
+ ip += OPSize.SET_OPTION + OPSize.FAIL;
+ }
+
+ private void opSetOption() {
+ // option = code[ip++]; // final for now
+ }
+
+ private void opNullCheckStart() {
+ int mem = code[ip++];
+ pushNullCheckStart(mem, s);
+ }
+
+ private void nullCheckFound() {
+ // null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch(code[ip++]) {
+ case OPCode.JUMP:
+ case OPCode.PUSH:
+ ip++; // p += SIZE_RELADDR;
+ break;
+ case OPCode.REPEAT_INC:
+ case OPCode.REPEAT_INC_NG:
+ case OPCode.REPEAT_INC_SG:
+ case OPCode.REPEAT_INC_NG_SG:
+ ip++; // p += SIZE_MEMNUM;
+ break;
+ default:
+ throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE);
+ } // switch
+ }
+
+ private void opNullCheckEnd() {
+ int mem = code[ip++];
+ int isNull = nullCheck(mem, s); /* mem: null check id */
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s);
+ }
+
+ nullCheckFound();
+ }
+ }
+
+ // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+ private void opNullCheckEndMemST() {
+ int mem = code[ip++]; /* mem: null check id */
+ int isNull = nullCheckMemSt(mem, s);
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s);
+ }
+
+ if (isNull == -1) {opFail(); return;}
+ nullCheckFound();
+ }
+ }
+
+ // USE_SUBEXP_CALL
+ private void opNullCheckEndMemSTPush() {
+ int mem = code[ip++]; /* mem: null check id */
+
+ int isNull;
+ if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
+ isNull = nullCheckMemStRec(mem, s);
+ } else {
+ isNull = nullCheckRec(mem, s);
+ }
+
+ if (isNull != 0) {
+ if (Config.DEBUG_MATCH) {
+ Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s);
+ }
+
+ if (isNull == -1) {opFail(); return;}
+ nullCheckFound();
+ } else {
+ pushNullCheckEnd(mem);
+ }
+ }
+
+ private void opJump() {
+ ip += code[ip] + 1;
+ }
+
+ private void opPush() {
+ int addr = code[ip++];
+ pushAlt(ip + addr, s, sprev);
+ }
+
+ // CEC
+ private void opStateCheckPush() {
+ int mem = code[ip++];
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ int addr = code[ip++];
+ pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ }
+
+ // CEC
+ private void opStateCheckPushOrJump() {
+ int mem = code[ip++];
+ int addr= code[ip++];
+
+ if (stateCheckVal(s, mem)) {
+ ip += addr;
+ } else {
+ pushAltWithStateCheck(ip + addr, s, sprev, mem);
+ }
+ }
+
+ // CEC
+ private void opStateCheck() {
+ int mem = code[ip++];
+ if (stateCheckVal(s, mem)) {opFail(); return;}
+ pushStateCheck(s, mem);
+ }
+
+ private void opPop() {
+ popOne();
+ }
+
+ private void opPushOrJumpExact1() {
+ int addr = code[ip++];
+ // beyond string check
+ if (s < range && code[ip] == chars[s]) {
+ ip++;
+ pushAlt(ip + addr, s, sprev);
+ return;
+ }
+ ip += addr + 1;
+ }
+
+ private void opPushIfPeekNext() {
+ int addr = code[ip++];
+ // beyond string check
+ if (s < range && code[ip] == chars[s]) {
+ ip++;
+ pushAlt(ip + addr, s, sprev);
+ return;
+ }
+ ip++;
+ }
+
+ private void opRepeat() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int addr= code[ip++];
+
+ // ensure1();
+ repeatStk[mem] = stk;
+ pushRepeat(mem, ip);
+
+ if (regex.repeatRangeLo[mem] == 0) { // lower
+ pushAlt(ip + addr, s, sprev);
+ }
+ }
+
+ private void opRepeatNG() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int addr= code[ip++];
+
+ // ensure1();
+ repeatStk[mem] = stk;
+ pushRepeat(mem, ip);
+
+ if (regex.repeatRangeLo[mem] == 0) {
+ pushAlt(ip, s, sprev);
+ ip += addr;
+ }
+ }
+
+ private void repeatInc(int mem, int si) {
+ StackEntry e = stack[si];
+
+ e.increaseRepeatCount();
+
+ if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
+ /* end of repeat. Nothing to do. */
+ } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
+ pushAlt(ip, s, sprev);
+ ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
+ } else {
+ ip = e.getRepeatPCode();
+ }
+ pushRepeatInc(si);
+ }
+
+ private void opRepeatInc() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int si = repeatStk[mem];
+ repeatInc(mem, si);
+ }
+
+ private void opRepeatIncSG() {
+ int mem = code[ip++]; /* mem: OP_REPEAT ID */
+ int si = getRepeat(mem);
+ repeatInc(mem, si);
+ }
+
+ private void repeatIncNG(int mem, int si) {
+ StackEntry e = stack[si];
+
+ e.increaseRepeatCount();
+
+ if (e.getRepeatCount() < regex.repeatRangeHi[mem]) {
+ if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
+ int pcode = e.getRepeatPCode();
+ pushRepeatInc(si);
+ pushAlt(pcode, s, sprev);
+ } else {
+ ip = e.getRepeatPCode();
+ pushRepeatInc(si);
+ }
+ } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) {
+ pushRepeatInc(si);
+ }
+ }
+
+ private void opRepeatIncNG() {
+ int mem = code[ip++];
+ int si = repeatStk[mem];
+ repeatIncNG(mem, si);
+ }
+
+ private void opRepeatIncNGSG() {
+ int mem = code[ip++];
+ int si = getRepeat(mem);
+ repeatIncNG(mem, si);
+ }
+
+ private void opPushPos() {
+ pushPos(s, sprev);
+ }
+
+ private void opPopPos() {
+ StackEntry e = stack[posEnd()];
+ s = e.getStatePStr();
+ sprev= e.getStatePStrPrev();
+ }
+
+ private void opPushPosNot() {
+ int addr = code[ip++];
+ pushPosNot(ip + addr, s, sprev);
+ }
+
+ private void opFailPos() {
+ popTilPosNot();
+ opFail();
+ }
+
+ private void opPushStopBT() {
+ pushStopBT();
+ }
+
+ private void opPopStopBT() {
+ stopBtEnd();
+ }
+
+ private void opLookBehind() {
+ int tlen = code[ip++];
+ s = EncodingHelper.stepBack(str, s, tlen);
+ if (s == -1) {opFail(); return;}
+ sprev = EncodingHelper.prevCharHead(str, s);
+ }
+
+ private void opLookBehindSb() {
+ int tlen = code[ip++];
+ s -= tlen;
+ if (s < str) {opFail(); return;}
+ sprev = s == str ? -1 : s - 1;
+ }
+
+ private void opPushLookBehindNot() {
+ int addr = code[ip++];
+ int tlen = code[ip++];
+ int q = EncodingHelper.stepBack(str, s, tlen);
+ if (q == -1) {
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ ip += addr;
+ // return FAIL;
+ } else {
+ pushLookBehindNot(ip + addr, s, sprev);
+ s = q;
+ sprev = EncodingHelper.prevCharHead(str, s);
+ }
+ }
+
+ private void opFailLookBehindNot() {
+ popTilLookBehindNot();
+ opFail();
+ }
+
+ private void opCall() {
+ int addr = code[ip++];
+ pushCallFrame(ip);
+ ip = addr; // absolute address
+ }
+
+ private void opReturn() {
+ ip = sreturn();
+ pushReturn();
+ }
+
+ private void opFail() {
+ if (stack == null) {
+ ip = regex.codeLength - 1;
+ return;
+ }
+
+
+ StackEntry e = pop();
+ ip = e.getStatePCode();
+ s = e.getStatePStr();
+ sprev = e.getStatePStrPrev();
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.getStateCheck() != 0) {
+ e.type = STATE_CHECK_MARK;
+ stk++;
+ }
+ }
+ }
+
+ private int finish() {
+ return bestLen;
+ }
+}
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodePrinter.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,416 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+
+class ByteCodePrinter {
+ final int[]code;
+ final int codeLength;
+ final char[][] templates;
+
+ Object[]operands;
+ int operantCount;
+ WarnCallback warnings;
+
+ public ByteCodePrinter(Regex regex) {
+ code = regex.code;
+ codeLength = regex.codeLength;
+ operands = regex.operands;
+ operantCount = regex.operandLength;
+
+ templates = regex.templates;
+ warnings = regex.warnings;
+ }
+
+ public String byteCodeListToString() {
+ return compiledByteCodeListToString();
+ }
+
+ private void pString(StringBuilder sb, int len, int s) {
+ sb.append(":");
+ while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
+ }
+
+ private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) {
+ sb.append(":T:");
+ while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]}));
+ }
+
+ private void pLenString(StringBuilder sb, int len, int mbLen, int s) {
+ int x = len * mbLen;
+ sb.append(":" + len + ":");
+ while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]}));
+ }
+
+ private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, char[] tm, int idx) {
+ int x = len * mbLen;
+ sb.append(":T:" + len + ":");
+ while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]}));
+ }
+
+ public int compiledByteCodeToString(StringBuilder sb, int bp) {
+ int len, n, mem, addr, scn, cod;
+ BitSet bs;
+ CClassNode cc;
+ int tm, idx;
+
+ sb.append("[" + OPCode.OpCodeNames[code[bp]]);
+ int argType = OPCode.OpCodeArgTypes[code[bp]];
+ int ip = bp;
+ if (argType != Arguments.SPECIAL) {
+ bp++;
+ switch (argType) {
+ case Arguments.NON:
+ break;
+
+ case Arguments.RELADDR:
+ sb.append(":(" + code[bp] + ")");
+ bp += OPSize.RELADDR;
+ break;
+
+ case Arguments.ABSADDR:
+ sb.append(":(" + code[bp] + ")");
+ bp += OPSize.ABSADDR;
+ break;
+
+ case Arguments.LENGTH:
+ sb.append(":" + code[bp]);
+ bp += OPSize.LENGTH;
+ break;
+
+ case Arguments.MEMNUM:
+ sb.append(":" + code[bp]);
+ bp += OPSize.MEMNUM;
+ break;
+
+ case Arguments.OPTION:
+ sb.append(":" + code[bp]);
+ bp += OPSize.OPTION;
+ break;
+
+ case Arguments.STATE_CHECK:
+ sb.append(":" + code[bp]);
+ bp += OPSize.STATE_CHECK;
+ break;
+ }
+ } else {
+ switch (code[bp++]) {
+ case OPCode.EXACT1:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
+ case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB:
+ case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB:
+ pString(sb, 1, bp++);
+ break;
+
+ case OPCode.EXACT2:
+ pString(sb, 2, bp);
+ bp += 2;
+ break;
+
+ case OPCode.EXACT3:
+ pString(sb, 3, bp);
+ bp += 3;
+ break;
+
+ case OPCode.EXACT4:
+ pString(sb, 4, bp);
+ bp += 4;
+ break;
+
+ case OPCode.EXACT5:
+ pString(sb, 5, bp);
+ bp += 5;
+ break;
+
+ case OPCode.EXACTN:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ if (Config.USE_STRING_TEMPLATES) {
+ tm = code[bp];
+ bp += OPSize.INDEX;
+ idx = code[bp];
+ bp += OPSize.INDEX;
+ pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
+ } else {
+ pLenString(sb, len, 1, bp);
+ bp += len;
+ }
+ break;
+
+ case OPCode.EXACTMB2N1:
+ pString(sb, 2, bp);
+ bp += 2;
+ break;
+
+ case OPCode.EXACTMB2N2:
+ pString(sb, 4, bp);
+ bp += 4;
+ break;
+
+ case OPCode.EXACTMB2N3:
+ pString(sb, 6, bp);
+ bp += 6;
+ break;
+
+ case OPCode.EXACTMB2N:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ if (Config.USE_STRING_TEMPLATES) {
+ tm = code[bp];
+ bp += OPSize.INDEX;
+ idx = code[bp];
+ bp += OPSize.INDEX;
+ pLenStringFromTemplate(sb, len, 2, templates[tm], idx);
+ } else {
+ pLenString(sb, len, 2, bp);
+ bp += len * 2;
+ }
+ break;
+
+ case OPCode.EXACTMB3N:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ if (Config.USE_STRING_TEMPLATES) {
+ tm = code[bp];
+ bp += OPSize.INDEX;
+ idx = code[bp];
+ bp += OPSize.INDEX;
+ pLenStringFromTemplate(sb, len, 3, templates[tm], idx);
+ } else {
+ pLenString(sb, len, 3, bp);
+ bp += len * 3;
+ }
+ break;
+
+ case OPCode.EXACTMBN:
+ int mbLen = code[bp];
+ bp += OPSize.LENGTH;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ n = len * mbLen;
+
+ if (Config.USE_STRING_TEMPLATES) {
+ tm = code[bp];
+ bp += OPSize.INDEX;
+ idx = code[bp];
+ bp += OPSize.INDEX;
+ sb.append(":T:" + mbLen + ":" + len + ":");
+
+ while (n-- > 0) sb.append(new String(new char[]{templates[tm][idx++]}));
+ } else {
+ sb.append(":" + mbLen + ":" + len + ":");
+
+ while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]}));
+ }
+
+ break;
+
+ case OPCode.EXACT1_IC:
+ case OPCode.EXACT1_IC_SB:
+ final int MAX_CHAR_LENGTH = 6;
+ byte[]bytes = new byte[MAX_CHAR_LENGTH];
+ for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i];
+ pString(sb, 1, bp);
+ bp++;
+ break;
+
+ case OPCode.EXACTN_IC:
+ case OPCode.EXACTN_IC_SB:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ if (Config.USE_STRING_TEMPLATES) {
+ tm = code[bp];
+ bp += OPSize.INDEX;
+ idx = code[bp];
+ bp += OPSize.INDEX;
+ pLenStringFromTemplate(sb, len, 1, templates[tm], idx);
+ } else {
+ pLenString(sb, len, 1, bp);
+ bp += len;
+ }
+ break;
+
+ case OPCode.CCLASS:
+ case OPCode.CCLASS_SB:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ sb.append(":" + n);
+ break;
+
+ case OPCode.CCLASS_NOT:
+ case OPCode.CCLASS_NOT_SB:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ sb.append(":" + n);
+ break;
+
+ case OPCode.CCLASS_MB:
+ case OPCode.CCLASS_MB_NOT:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ cod = code[bp];
+ //bp += OPSize.CODE_POINT;
+ bp += len;
+ sb.append(":" + cod + ":" + len);
+ break;
+
+ case OPCode.CCLASS_MIX:
+ case OPCode.CCLASS_MIX_NOT:
+ bs = new BitSet();
+ System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
+ n = bs.numOn();
+ bp += BitSet.BITSET_SIZE;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ cod = code[bp];
+ //bp += OPSize.CODE_POINT;
+ bp += len;
+ sb.append(":" + n + ":" + cod + ":" + len);
+ break;
+
+ case OPCode.CCLASS_NODE:
+ cc = (CClassNode)operands[code[bp]];
+ bp += OPSize.POINTER;
+ n = cc.bs.numOn();
+ sb.append(":" + cc + ":" + n);
+ break;
+
+ case OPCode.BACKREFN_IC:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ sb.append(":" + mem);
+ break;
+
+ case OPCode.BACKREF_MULTI_IC:
+ case OPCode.BACKREF_MULTI:
+ sb.append(" ");
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ for (int i=0; i<len; i++) {
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ if (i > 0) sb.append(", ");
+ sb.append(mem);
+ }
+ break;
+
+ case OPCode.BACKREF_WITH_LEVEL: {
+ int option = code[bp];
+ bp += OPSize.OPTION;
+ sb.append(":" + option);
+ int level = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + level);
+ sb.append(" ");
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ for (int i=0; i<len; i++) {
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ if (i > 0) sb.append(", ");
+ sb.append(mem);
+ }
+ break;
+ }
+
+ case OPCode.REPEAT:
+ case OPCode.REPEAT_NG:
+ mem = code[bp];
+ bp += OPSize.MEMNUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + mem + ":" + addr);
+ break;
+
+ case OPCode.PUSH_OR_JUMP_EXACT1:
+ case OPCode.PUSH_IF_PEEK_NEXT:
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":(" + addr + ")");
+ pString(sb, 1, bp);
+ bp++;
+ break;
+
+ case OPCode.LOOK_BEHIND:
+ case OPCode.LOOK_BEHIND_SB:
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + len);
+ break;
+
+ case OPCode.PUSH_LOOK_BEHIND_NOT:
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ len = code[bp];
+ bp += OPSize.LENGTH;
+ sb.append(":" + len + ":(" + addr + ")");
+ break;
+
+ case OPCode.STATE_CHECK_PUSH:
+ case OPCode.STATE_CHECK_PUSH_OR_JUMP:
+ scn = code[bp];
+ bp += OPSize.STATE_CHECK_NUM;
+ addr = code[bp];
+ bp += OPSize.RELADDR;
+ sb.append(":" + scn + ":(" + addr + ")");
+ break;
+
+ default:
+ throw new InternalException("undefined code: " + code[--bp]);
+ }
+ }
+
+ sb.append("]");
+
+ // @opcode_address(opcode_size)
+ if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append("@" + ip + "(" + (bp - ip) + ")");
+
+ return bp;
+ }
+
+ private String compiledByteCodeListToString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("code length: " + codeLength + "\n");
+
+ int ncode = 0;
+ int bp = 0;
+ int end = codeLength;
+
+ while (bp < end) {
+ ncode++;
+
+ if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " ");
+
+ bp = compiledByteCodeToString(sb, bp);
+ }
+ sb.append("\n");
+ return sb.toString();
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CaptureTreeNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,74 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public class CaptureTreeNode {
+
+
+ int group;
+ int beg;
+ int end;
+ // int allocated;
+ int numChildren;
+ CaptureTreeNode[]children;
+
+ CaptureTreeNode() {
+ beg = Region.REGION_NOTPOS;
+ end = Region.REGION_NOTPOS;
+ group = -1;
+ }
+
+ static final int HISTORY_TREE_INIT_ALLOC_SIZE = 8;
+ void addChild(CaptureTreeNode child) {
+ if (children == null) {
+ children = new CaptureTreeNode[HISTORY_TREE_INIT_ALLOC_SIZE];
+ } else if (numChildren >= children.length) {
+ CaptureTreeNode[]tmp = new CaptureTreeNode[children.length << 1];
+ System.arraycopy(children, 0, tmp, 0, children.length);
+ children = tmp;
+ }
+
+ children[numChildren] = child;
+ numChildren++;
+ }
+
+ void clear() {
+ for (int i=0; i<numChildren; i++) {
+ children[i] = null; // ???
+ }
+ numChildren = 0;
+ beg = end = Region.REGION_NOTPOS;
+ group = -1;
+ }
+
+ CaptureTreeNode cloneTree() {
+ CaptureTreeNode clone = new CaptureTreeNode();
+ clone.beg = beg;
+ clone.end = end;
+
+ for (int i=0; i<numChildren; i++) {
+ CaptureTreeNode child = children[i].cloneTree();
+ clone.addChild(child);
+ }
+ return clone;
+ }
+
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,378 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
+
+public final class CodeRangeBuffer {
+ private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
+ private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
+
+ int[]p;
+ int used;
+
+ public CodeRangeBuffer(int[]ranges) {
+ p = ranges;
+ used = ranges[0] + 1;
+ }
+
+ public CodeRangeBuffer() {
+ p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
+ writeCodePoint(0, 0);
+ }
+
+ public int[]getCodeRange() {
+ return p;
+ }
+
+ private CodeRangeBuffer(CodeRangeBuffer orig) {
+ p = new int[orig.p.length];
+ System.arraycopy(orig.p, 0, p, 0, p.length);
+ used = orig.used;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("CodeRange");
+ buf.append("\n used: " + used);
+ buf.append("\n code point: " + p[0]);
+ buf.append("\n ranges: ");
+
+ for (int i=0; i<p[0]; i++) {
+ buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]");
+ if (i > 0 && i % 6 == 0) buf.append("\n ");
+ }
+
+ return buf.toString();
+ }
+
+ private static String rangeNumToString(int num){
+ return "0x" + Integer.toString(num, 16);
+ }
+
+ public void expand(int low) {
+ int length = p.length;
+ do { length <<= 1; } while (length < low);
+ int[]tmp = new int[length];
+ System.arraycopy(p, 0, tmp, 0, used);
+ p = tmp;
+ }
+
+ public void ensureSize(int size) {
+ int length = p.length;
+ while (length < size ) { length <<= 1; }
+ if (p.length != length) {
+ int[]tmp = new int[length];
+ System.arraycopy(p, 0, tmp, 0, used);
+ p = tmp;
+ }
+ }
+
+ private void moveRight(int from, int to, int n) {
+ if (to + n > p.length) expand(to + n);
+ System.arraycopy(p, from, p, to, n);
+ if (to + n > used) used = to + n;
+ }
+
+ protected void moveLeft(int from, int to, int n) {
+ System.arraycopy(p, from, p, to, n);
+ }
+
+ private void moveLeftAndReduce(int from, int to) {
+ System.arraycopy(p, from, p, to, used - from);
+ used -= from - to;
+ }
+
+ public void writeCodePoint(int pos, int b) {
+ int u = pos + 1;
+ if (p.length < u) expand(u);
+ p[pos] = b;
+ if (used < u) used = u;
+ }
+
+ public CodeRangeBuffer clone() {
+ return new CodeRangeBuffer(this);
+ }
+
+ // ugly part: these methods should be made OO
+ // add_code_range_to_buf
+ public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) {
+ if (from > to) {
+ int n = from;
+ from = to;
+ to = n;
+ }
+
+ if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode
+
+ int[]p = pbuf.p;
+ int n = p[0];
+
+ int low = 0;
+ int bound = n;
+
+ while (low < bound) {
+ int x = (low + bound) >>> 1;
+ if (from > p[x * 2 + 2]) {
+ low = x + 1;
+ } else {
+ bound = x;
+ }
+ }
+
+ int high = low;
+ bound = n;
+
+ while (high < bound) {
+ int x = (high + bound) >>> 1;
+ if (to >= p[x * 2 + 1] - 1) {
+ high = x + 1;
+ } else {
+ bound = x;
+ }
+ }
+
+ int incN = low + 1 - high;
+
+ if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES);
+
+ if (incN != 1) {
+ if (from > p[low * 2 + 1]) from = p[low * 2 + 1];
+ if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2];
+ }
+
+ if (incN != 0 && high < n) {
+ int fromPos = 1 + high * 2;
+ int toPos = 1 + (low + 1) * 2;
+ int size = (n - high) * 2;
+
+ if (incN > 0) {
+ pbuf.moveRight(fromPos, toPos, size);
+ } else {
+ pbuf.moveLeftAndReduce(fromPos, toPos);
+ }
+ }
+
+ int pos = 1 + low * 2;
+ // pbuf.ensureSize(pos + 2);
+ pbuf.writeCodePoint(pos, from);
+ pbuf.writeCodePoint(pos + 1, to);
+ n += incN;
+ pbuf.writeCodePoint(0, n);
+
+ return pbuf;
+ }
+
+ // add_code_range, be aware of it returning null!
+ public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
+ if (from >to) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ return pbuf;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ return addCodeRangeToBuff(pbuf, from, to);
+ }
+
+ // SET_ALL_MULTI_BYTE_RANGE
+ protected static CodeRangeBuffer setAllMultiByteRange(CodeRangeBuffer pbuf) {
+ return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE);
+ }
+
+ // ADD_ALL_MULTI_BYTE_RANGE
+ public static CodeRangeBuffer addAllMultiByteRange(CodeRangeBuffer pbuf) {
+ return setAllMultiByteRange(pbuf);
+ }
+
+ // not_code_range_buf
+ public static CodeRangeBuffer notCodeRangeBuff(CodeRangeBuffer bbuf) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf == null) return setAllMultiByteRange(pbuf);
+
+ int[]p = bbuf.p;
+ int n = p[0];
+
+ if (n <= 0) return setAllMultiByteRange(pbuf);
+
+ int pre = EncodingHelper.mbcodeStartPosition();
+
+ int from;
+ int to = 0;
+ for (int i=0; i<n; i++) {
+ from = p[i * 2 + 1];
+ to = p[i * 2 + 2];
+ if (pre <= from - 1) {
+ pbuf = addCodeRangeToBuff(pbuf, pre, from - 1);
+ }
+ if (to == ALL_MULTI_BYTE_RANGE) break;
+ pre = to + 1;
+ }
+
+ if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE);
+ return pbuf;
+ }
+
+ // or_code_range_buf
+ public static CodeRangeBuffer orCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
+ CodeRangeBuffer bbuf2, boolean not2) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf1 == null && bbuf2 == null) {
+ if (not1 || not2) {
+ return setAllMultiByteRange(pbuf);
+ }
+ return null;
+ }
+
+ if (bbuf2 == null) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ if (bbuf1 == null) {
+ if (not1) {
+ return setAllMultiByteRange(pbuf);
+ } else {
+ if (!not2) {
+ return bbuf2.clone();
+ } else {
+ return notCodeRangeBuff(bbuf2);
+ }
+ }
+ }
+
+ if (not1) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ if (!not2 && !not1) { /* 1 OR 2 */
+ pbuf = bbuf2.clone();
+ } else if (!not1) { /* 1 OR (not 2) */
+ pbuf = notCodeRangeBuff(bbuf2);
+ }
+
+ int[]p1 = bbuf1.p;
+ int n1 = p1[0];
+
+ for (int i=0; i<n1; i++) {
+ int from = p1[i * 2 + 1];
+ int to = p1[i * 2 + 2];
+ pbuf = addCodeRangeToBuff(pbuf, from, to);
+ }
+
+ return pbuf;
+ }
+
+ // and_code_range1
+ public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, int[]data, int n) {
+ for (int i=0; i<n; i++) {
+ int from2 = data[i * 2 + 1];
+ int to2 = data[i * 2 + 2];
+ if (from2 < from1) {
+ if (to2 < from1) {
+ continue;
+ } else {
+ from1 = to2 + 1;
+ }
+ } else if (from2 <= to1) {
+ if (to2 < to1) {
+ if (from1 <= from2 - 1) {
+ pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1);
+ }
+ from1 = to2 + 1;
+ } else {
+ to1 = from2 - 1;
+ }
+ } else {
+ from1 = from2;
+ }
+ if (from1 > to1) break;
+ }
+
+ if (from1 <= to1) {
+ pbuf = addCodeRangeToBuff(pbuf, from1, to1);
+ }
+
+ return pbuf;
+ }
+
+ // and_code_range_buf
+ public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
+ CodeRangeBuffer bbuf2, boolean not2) {
+ CodeRangeBuffer pbuf = null;
+
+ if (bbuf1 == null) {
+ if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */
+ return null;
+ } else if (bbuf2 == null) {
+ if (not2) return bbuf1.clone();
+ return null;
+ }
+
+ if (not1) {
+ CodeRangeBuffer tbuf;
+ boolean tnot;
+ // swap
+ tnot = not1; not1 = not2; not2 = tnot;
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
+ }
+
+ int[]p1 = bbuf1.p;
+ int n1 = p1[0];
+ int[]p2 = bbuf2.p;
+ int n2 = p2[0];
+
+ if (!not2 && !not1) { /* 1 AND 2 */
+ for (int i=0; i<n1; i++) {
+ int from1 = p1[i * 2 + 1];
+ int to1 = p1[i * 2 + 2];
+
+ for (int j=0; j<n2; j++) {
+ int from2 = p2[j * 2 + 1];
+ int to2 = p2[j * 2 + 2];
+
+ if (from2 > to1) break;
+ if (to2 < from1) continue;
+ int from = from1 > from2 ? from1 : from2;
+ int to = to1 < to2 ? to1 : to2;
+ pbuf = addCodeRangeToBuff(pbuf, from, to);
+ }
+ }
+ } else if (!not1) { /* 1 AND (not 2) */
+ for (int i=0; i<n1; i++) {
+ int from1 = p1[i * 2 + 1];
+ int to1 = p1[i * 2 + 2];
+ pbuf = andCodeRange1(pbuf, from1, to1, p2, n2);
+ }
+ }
+
+ return pbuf;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Compiler.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,178 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
+
+abstract class Compiler implements ErrorMessages {
+ protected final Analyser analyser;
+ protected final Regex regex;
+
+ protected Compiler(Analyser analyser) {
+ this.analyser = analyser;
+ this.regex = analyser.regex;
+ }
+
+ final void compile() {
+ prepare();
+ compileTree(analyser.root);
+ finish();
+ }
+
+ protected abstract void prepare();
+ protected abstract void finish();
+
+ protected abstract void compileAltNode(ConsAltNode node);
+
+ private void compileStringRawNode(StringNode sn) {
+ if (sn.length() <= 0) return;
+ addCompileString(sn.chars, sn.p, 1 /*sb*/, sn.length(), false);
+ }
+
+ private void compileStringNode(StringNode node) {
+ StringNode sn = node;
+ if (sn.length() <= 0) return;
+
+ boolean ambig = sn.isAmbig();
+
+ int p, prev;
+ p = prev = sn.p;
+ int end = sn.end;
+ char[] chars = sn.chars;
+ p++;
+ int slen = 1;
+
+ while (p < end) {
+ slen++;
+ p++;
+ }
+ addCompileString(chars, prev, 1, slen, ambig);
+ }
+
+ protected abstract void addCompileString(char[] chars, int p, int mbLength, int strLength, boolean ignoreCase);
+
+ protected abstract void compileCClassNode(CClassNode node);
+ protected abstract void compileCTypeNode(CTypeNode node);
+ protected abstract void compileAnyCharNode();
+ protected abstract void compileCallNode(CallNode node);
+ protected abstract void compileBackrefNode(BackRefNode node);
+ protected abstract void compileCECQuantifierNode(QuantifierNode node);
+ protected abstract void compileNonCECQuantifierNode(QuantifierNode node);
+ protected abstract void compileOptionNode(EncloseNode node);
+ protected abstract void compileEncloseNode(EncloseNode node);
+ protected abstract void compileAnchorNode(AnchorNode node);
+
+ protected final void compileTree(Node node) {
+ switch (node.getType()) {
+ case NodeType.LIST:
+ ConsAltNode lin = (ConsAltNode)node;
+ do {
+ compileTree(lin.car);
+ } while ((lin = lin.cdr) != null);
+ break;
+
+ case NodeType.ALT:
+ compileAltNode((ConsAltNode)node);
+ break;
+
+ case NodeType.STR:
+ StringNode sn = (StringNode)node;
+ if (sn.isRaw()) {
+ compileStringRawNode(sn);
+ } else {
+ compileStringNode(sn);
+ }
+ break;
+
+ case NodeType.CCLASS:
+ compileCClassNode((CClassNode)node);
+ break;
+
+ case NodeType.CTYPE:
+ compileCTypeNode((CTypeNode)node);
+ break;
+
+ case NodeType.CANY:
+ compileAnyCharNode();
+ break;
+
+ case NodeType.BREF:
+ compileBackrefNode((BackRefNode)node);
+ break;
+
+ case NodeType.CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ compileCallNode((CallNode)node);
+ break;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case NodeType.QTFR:
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ compileCECQuantifierNode((QuantifierNode)node);
+ } else {
+ compileNonCECQuantifierNode((QuantifierNode)node);
+ }
+ break;
+
+ case NodeType.ENCLOSE:
+ EncloseNode enode = (EncloseNode)node;
+ if (enode.isOption()) {
+ compileOptionNode(enode);
+ } else {
+ compileEncloseNode(enode);
+ }
+ break;
+
+ case NodeType.ANCHOR:
+ compileAnchorNode((AnchorNode)node);
+ break;
+
+ default:
+ // undefined node type
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+ }
+
+ protected final void compileTreeNTimes(Node node, int n) {
+ for (int i=0; i<n; i++) compileTree(node);
+ }
+
+ protected void newSyntaxException(String message) {
+ throw new SyntaxException(message);
+ }
+
+ protected void newInternalException(String message) {
+ throw new InternalException(message);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,100 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import java.io.PrintStream;
+
+public interface Config {
+ final int CHAR_TABLE_SIZE = 256;
+
+ /* from jcodings */
+ final boolean VANILLA = false;
+ final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30);
+ final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
+ final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN;
+ final boolean USE_CRNL_AS_LINE_TERMINATOR = false;
+
+ final boolean USE_NAMED_GROUP = true;
+ final boolean USE_SUBEXP_CALL = true;
+ final boolean USE_BACKREF_WITH_LEVEL = true; /* \k<name+n>, \k<name-n> */
+
+ final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
+ final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */
+ final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = false;
+
+ final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true;
+
+ final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false;
+ final boolean USE_CAPTURE_HISTORY = false;
+ final boolean USE_VARIABLE_META_CHARS = true;
+ final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */
+ final boolean USE_POSIX_API_REGION_OPTION = true; /* needed for POSIX API support */
+ final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true;
+ final boolean USE_COMBINATION_EXPLOSION_CHECK = false;
+
+ final int NREGION = 10;
+ final int MAX_BACKREF_NUM = 1000;
+ final int MAX_REPEAT_NUM = 100000;
+ final int MAX_MULTI_BYTE_RANGES_NUM = 10000;
+
+
+ final boolean USE_WARN = true;
+
+ // internal config
+ final boolean USE_PARSE_TREE_NODE_RECYCLE = true;
+ final boolean USE_OP_PUSH_OR_JUMP_EXACT = true;
+ final boolean USE_SHARED_CCLASS_TABLE = false;
+ final boolean USE_QTFR_PEEK_NEXT = true;
+
+ final int INIT_MATCH_STACK_SIZE = 64;
+ final int DEFAULT_MATCH_STACK_LIMIT_SIZE = 0; /* unlimited */
+ final int NUMBER_OF_POOLED_STACKS = 4;
+
+
+
+ final boolean DONT_OPTIMIZE = false;
+
+ final boolean USE_STRING_TEMPLATES = true; // use embeded string templates in Regex object as byte arrays instead of compiling them into int bytecode array
+
+
+ final int MAX_CAPTURE_HISTORY_GROUP = 31;
+
+
+ final int CHECK_STRING_THRESHOLD_LEN = 7;
+ final int CHECK_BUFF_MAX_SIZE = 0x4000;
+
+ final boolean NON_UNICODE_SDW = true;
+
+
+ final PrintStream log = System.out;
+ final PrintStream err = System.err;
+
+ final boolean DEBUG_ALL = false;
+
+ final boolean DEBUG = DEBUG_ALL;
+ final boolean DEBUG_PARSE_TREE = DEBUG_ALL;
+ final boolean DEBUG_PARSE_TREE_RAW = true;
+ final boolean DEBUG_COMPILE = DEBUG_ALL;
+ final boolean DEBUG_COMPILE_BYTE_CODE_INFO = DEBUG_ALL;
+ final boolean DEBUG_SEARCH = DEBUG_ALL;
+ final boolean DEBUG_MATCH = DEBUG_ALL;
+ final boolean DEBUG_ASM = true;
+ final boolean DEBUG_ASM_EXEC = true;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,285 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
+
+import java.util.Arrays;
+
+public class EncodingHelper {
+
+ public final static char NEW_LINE = 0xa;
+ public final static char RETURN = 0xd;
+
+ final static char[] EMPTYCHARS = new char[0];
+ final static int[][] codeRanges = new int[15][];
+
+ public static int digitVal(int code) {
+ return code - '0';
+ }
+
+ public static int odigitVal(int code) {
+ return digitVal(code);
+ }
+
+ public static boolean isXDigit(int code) {
+ return Character.isDigit(code) || (code >= 'a' && code <= 'f') || (code >= 'A' && code <= 'F');
+ }
+
+ public static int xdigitVal(int code) {
+ if (Character.isDigit(code)) {
+ return code - '0';
+ } else if (code >= 'a' && code <= 'f') {
+ return code - 'a' + 10;
+ } else {
+ return code - 'A' + 10;
+ }
+ }
+
+ public static boolean isDigit(int code) {
+ return code >= '0' && code <= '9';
+ }
+
+ public static boolean isWord(int code) {
+ // letter, digit, or '_'
+ return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
+ }
+
+ public static boolean isNewLine(int code) {
+ return code == NEW_LINE;
+ }
+
+ public static boolean isNewLine(char[] chars, int p, int end) {
+ return p < end && chars[p] == NEW_LINE;
+ }
+
+ public static boolean isCrnl(char[] chars, int p, int end) {
+ return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE;
+ }
+
+ // Encoding.prevCharHead
+ public static int prevCharHead(int p, int s) {
+ return s <= p ? -1 : s - 1;
+ }
+
+ /* onigenc_get_right_adjust_char_head_with_prev */
+ public static int rightAdjustCharHeadWithPrev(int s, IntHolder prev) {
+ if (prev != null) prev.value = -1; /* Sorry */
+ return s;
+ }
+
+ // Encoding.stepBack
+ public static int stepBack(int p, int s, int n) {
+ while (s != -1 && n-- > 0) {
+ if (s <= p) return -1;
+ s--;
+ }
+ return s;
+ }
+
+ /* onigenc_with_ascii_strncmp */
+ public static int strNCmp(char[] chars1, int p1, int end, char[] chars2, int p2, int n) {
+ while (n-- > 0) {
+ if (p1 >= end) return chars2[p2];
+ int c = chars1[p1];
+ int x = chars2[p2] - c;
+ if (x != 0) return x;
+
+ p2++;
+ p1++;
+ }
+ return 0;
+ }
+
+ public static int mbcToCode(byte[] bytes, int p, int end) {
+ int code = 0;
+ for (int i = p; i < end; i++) {
+ code = (code << 8) | (bytes[i] & 0xff);
+ }
+ return code;
+ }
+
+ public static int mbcodeStartPosition() {
+ return 0x80;
+ }
+
+ public static char[] caseFoldCodesByString(int flag, char c) {
+ if (Character.isUpperCase(c)) {
+ return new char[] {Character.toLowerCase(c)};
+ } else if (Character.isLowerCase(c)) {
+ return new char[] {Character.toUpperCase(c)};
+ } else {
+ return EMPTYCHARS;
+ }
+ }
+
+ public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) {
+ int[] code = new int[1];
+
+ for (int c = 0; c < 0xffff; c++) {
+ if (Character.getType(c) == Character.LOWERCASE_LETTER) {
+
+ int upper = code[0] = Character.toUpperCase(c);
+ fun.apply(c, code, 1, arg);
+
+ code[0] = c;
+ fun.apply(upper, code, 1, arg);
+ }
+ }
+ }
+
+ // CodeRange.isInCodeRange
+ public static boolean isInCodeRange(int[]p, int code) {
+ int low = 0;
+ int n = p[0];
+ int high = n;
+
+ while (low < high) {
+ int x = (low + high) >> 1;
+ if (code > p[(x << 1) + 2]) {
+ low = x + 1;
+ } else {
+ high = x;
+ }
+ }
+ return low < n && code >= p[(low << 1) + 1];
+ }
+
+ public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
+ sbOut.value = 0x100; // use bitset for codes smaller than 256
+ int[] range = null;
+
+ if (ctype < codeRanges.length) {
+ range = codeRanges[ctype];
+
+ if (range == null) {
+ // format: [numberOfRanges, rangeStart, rangeEnd, ...]
+ range = new int[16];
+ int rangeCount = 0;
+ int lastCode = -2;
+
+ for (int code = 0; code <= 0xffff; code++) {
+ if (isCodeCType(code, ctype)) {
+ if (lastCode < code -1) {
+ if (rangeCount * 2 + 2 >= range.length) {
+ range = Arrays.copyOf(range, range.length * 2);
+ }
+ range[rangeCount * 2 + 1] = code;
+ rangeCount++;
+ }
+ range[rangeCount * 2] = lastCode = code;
+ }
+ }
+
+ if (rangeCount * 2 + 1 < range.length) {
+ range = Arrays.copyOf(range, rangeCount * 2 + 1);
+ }
+
+ range[0] = rangeCount;
+ codeRanges[ctype] = range;
+ }
+ }
+
+ return range;
+ }
+
+ // CodeRange.isInCodeRange
+ public static boolean isInCodeRange(int[]p, int offset, int code) {
+ int low = 0;
+ int n = p[offset];
+ int high = n ;
+
+ while (low < high) {
+ int x = (low + high) >> 1;
+ if (code > p[(x << 1) + 2 + offset]) {
+ low = x + 1;
+ } else {
+ high = x;
+ }
+ }
+ return low < n && code >= p[(low << 1) + 1 + offset];
+ }
+
+ /**
+ * @see [http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt]
+ */
+ public static boolean isCodeCType(int code, int ctype) {
+ int type;
+ switch (ctype) {
+ case CharacterType.NEWLINE:
+ return code == EncodingHelper.NEW_LINE;
+ case CharacterType.ALPHA:
+ return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
+ case CharacterType.BLANK:
+ return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
+ case CharacterType.CNTRL:
+ type = Character.getType(code);
+ return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
+ case CharacterType.DIGIT:
+ return EncodingHelper.isDigit(code);
+ case CharacterType.GRAPH:
+ switch (code) {
+ case 0x09:
+ case 0x0a:
+ case 0x0b:
+ case 0x0c:
+ case 0x0d:
+ return false;
+ default:
+ type = Character.getType(code);
+ return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
+ }
+ case CharacterType.LOWER:
+ return Character.isLowerCase(code);
+ case CharacterType.PRINT:
+ type = Character.getType(code);
+ return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
+ case CharacterType.PUNCT:
+ return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
+ case CharacterType.SPACE:
+ // ECMA 7.2 and 7.3
+ switch (code) {
+ case 0x09:
+ case 0x0a:
+ case 0x0b:
+ case 0x0c:
+ case 0x0d:
+ return true;
+ default:
+ // true if Unicode separator or BOM
+ return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
+ }
+ case CharacterType.UPPER:
+ return Character.isUpperCase(code);
+ case CharacterType.XDIGIT:
+ return EncodingHelper.isXDigit(code);
+ case CharacterType.WORD:
+ return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
+ case CharacterType.ALNUM:
+ return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
+ case CharacterType.ASCII:
+ return code < 0x80;
+ default:
+ throw new RuntimeException("illegal character type: " + ctype);
+ }
+ }
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,1274 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isSingleline;
+import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.PosixBracket;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.Ptr;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.JOniException;
+
+class Lexer extends ScannerSupport {
+ protected final ScanEnvironment env;
+ protected final Syntax syntax; // fast access to syntax
+ protected final Token token = new Token(); // current token
+
+ protected Lexer(ScanEnvironment env, char[] chars, int p, int end) {
+ super(chars, p, end);
+ this.env = env;
+ this.syntax = env.syntax;
+ }
+
+ /**
+ * @return 0: normal {n,m}, 2: fixed {n}
+ * !introduce returnCode here
+ */
+ private int fetchRangeQuantifier() {
+ mark();
+ boolean synAllow = syntax.allowInvalidInterval();
+
+ if (!left()) {
+ if (synAllow) {
+ return 1; /* "....{" : OK! */
+ } else {
+ newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
+ }
+ }
+
+ if (!synAllow) {
+ c = peek();
+ if (c == ')' || c == '(' || c == '|') {
+ newSyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
+ }
+ }
+
+ int low = scanUnsignedNumber();
+ if (low < 0) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+ if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+
+ boolean nonLow = false;
+ if (p == _p) { /* can't read low */
+ if (syntax.allowIntervalLowAbbrev()) {
+ low = 0;
+ nonLow = true;
+ } else {
+ return invalidRangeQuantifier(synAllow);
+ }
+ }
+
+ if (!left()) return invalidRangeQuantifier(synAllow);
+
+ fetch();
+ int up;
+ int ret = 0;
+ if (c == ',') {
+ int prev = p; // ??? last
+ up = scanUnsignedNumber();
+ if (up < 0) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+ if (up > Config.MAX_REPEAT_NUM) newValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
+
+ if (p == prev) {
+ if (nonLow) return invalidRangeQuantifier(synAllow);
+ up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
+ } else {
+ if (nonLow) return invalidRangeQuantifier(synAllow);
+ unfetch();
+ up = low; /* {n} : exact n times */
+ ret = 2; /* fixed */
+ }
+
+ if (!left()) return invalidRangeQuantifier(synAllow);
+ fetch();
+
+ if (syntax.opEscBraceInterval()) {
+ if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow);
+ fetch();
+ }
+
+ if (c != '}') return invalidRangeQuantifier(synAllow);
+
+ if (!isRepeatInfinite(up) && low > up) {
+ newValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
+ }
+
+ token.type = TokenType.INTERVAL;
+ token.setRepeatLower(low);
+ token.setRepeatUpper(up);
+
+ return ret; /* 0: normal {n,m}, 2: fixed {n} */
+ }
+
+ private int invalidRangeQuantifier(boolean synAllow) {
+ if (synAllow) {
+ restore();
+ return 1;
+ } else {
+ newSyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
+ return 0; // not reached
+ }
+ }
+
+ /* \M-, \C-, \c, or \... */
+ private int fetchEscapedValue() {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+ fetch();
+
+ switch(c) {
+
+ case 'M':
+ if (syntax.op2EscCapitalMBarMeta()) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
+ fetch();
+ if (c != '-') newSyntaxException(ERR_META_CODE_SYNTAX);
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_META);
+ fetch();
+ if (c == syntax.metaCharTable.esc) {
+ c = fetchEscapedValue();
+ }
+ c = ((c & 0xff) | 0x80);
+ } else {
+ fetchEscapedValueBackSlash();
+ }
+ break;
+
+ case 'C':
+ if (syntax.op2EscCapitalCBarControl()) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ fetch();
+ if (c != '-') newSyntaxException(ERR_CONTROL_CODE_SYNTAX);
+ fetchEscapedValueControl();
+ } else {
+ fetchEscapedValueBackSlash();
+ }
+ break;
+
+ case 'c':
+ if (syntax.opEscCControl()) {
+ fetchEscapedValueControl();
+ }
+ /* fall through */
+
+ default:
+ fetchEscapedValueBackSlash();
+ } // switch
+
+ return c; // ???
+ }
+
+ private void fetchEscapedValueBackSlash() {
+ c = env.convertBackslashValue(c);
+ }
+
+ private void fetchEscapedValueControl() {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_CONTROL);
+ fetch();
+ if (c == '?') {
+ c = 0177;
+ } else {
+ if (c == syntax.metaCharTable.esc) {
+ c = fetchEscapedValue();
+ }
+ c &= 0x9f;
+ }
+ }
+
+ private int nameEndCodePoint(int start) {
+ switch(start) {
+ case '<':
+ return '>';
+ case '\'':
+ return '\'';
+ default:
+ return 0;
+ }
+ }
+
+ // USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL
+ /*
+ \k<name+n>, \k<name-n>
+ \k<num+n>, \k<num-n>
+ \k<-num+n>, \k<-num-n>
+ */
+
+ // value implicit (rnameEnd)
+ private boolean fetchNameWithLevel(int startCode, Ptr rbackNum, Ptr rlevel) {
+ int src = p;
+ boolean existLevel = false;
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+ if (Character.isDigit(c)) {
+ isNum = 1;
+ } else if (c == '-') {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else if (!EncodingHelper.isWord(c)) {
+ err = ERR_INVALID_GROUP_NAME;
+ }
+ }
+
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')' || c == '+' || c == '-') {
+ if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (isNum != 0) {
+ if (EncodingHelper.isDigit(c)) {
+ isNum = 1;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (!EncodingHelper.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ boolean isEndCode = false;
+ if (err == null && c != endCode) {
+ if (c == '+' || c == '-') {
+ int flag = c == '-' ? -1 : 1;
+
+ fetch();
+ if (!EncodingHelper.isDigit(c)) newValueException(ERR_INVALID_GROUP_NAME, src, stop);
+ unfetch();
+ int level = scanUnsignedNumber();
+ if (level < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ rlevel.p = level * flag;
+ existLevel = true;
+
+ fetch();
+ isEndCode = c == endCode;
+ }
+
+ if (!isEndCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+ }
+
+ if (err == null) {
+ if (isNum != 0) {
+ mark();
+ p = pnumHead;
+ int backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0) {
+ newValueException(ERR_INVALID_GROUP_NAME, src, stop);
+ }
+ rbackNum.p = backNum * sign;
+ }
+ value = nameEnd;
+ return existLevel;
+ } else {
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ return false; // not reached
+ }
+ }
+
+ // USE_NAMED_GROUP
+ // ref: 0 -> define name (don't allow number name)
+ // 1 -> reference name (allow number name)
+ private int fetchNameForNamedGroup(int startCode, boolean ref) {
+ int src = p;
+ value = 0;
+
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+ if (EncodingHelper.isDigit(c)) {
+ if (ref) {
+ isNum = 1;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (c == '-') {
+ if (ref) {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ // isNum = 0;
+ }
+ } else if (!EncodingHelper.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (err == null) {
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')') {
+ if (isNum == 2) err = ERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (isNum != 0) {
+ if (EncodingHelper.isDigit(c)) {
+ isNum = 1;
+ } else {
+ if (!EncodingHelper.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ } else {
+ err = ERR_INVALID_GROUP_NAME;
+ }
+ // isNum = 0;
+ }
+ } else {
+ if (!EncodingHelper.isWord(c)) {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+ }
+
+ if (c != endCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+
+ int backNum = 0;
+ if (isNum != 0) {
+ mark();
+ p = pnumHead;
+ backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0) {
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ }
+ backNum *= sign;
+ }
+ value = nameEnd;
+ return backNum;
+ } else {
+ while (left()) {
+ nameEnd = p;
+ fetch();
+ if (c == endCode || c == ')') break;
+ }
+ if (!left()) nameEnd = stop;
+ newValueException(err, src, nameEnd);
+ return 0; // not reached
+ }
+ }
+
+ // #else USE_NAMED_GROUP
+ // make it return nameEnd!
+ private final int fetchNameForNoNamedGroup(int startCode, boolean ref) {
+ int src = p;
+ value = 0;
+
+ int isNum = 0;
+ int sign = 1;
+
+ int endCode = nameEndCodePoint(startCode);
+ int pnumHead = p;
+ int nameEnd = stop;
+
+ String err = null;
+ if (!left()) {
+ newValueException(ERR_EMPTY_GROUP_NAME);
+ } else {
+ fetch();
+ if (c == endCode) newValueException(ERR_EMPTY_GROUP_NAME);
+
+ if (EncodingHelper.isDigit(c)) {
+ isNum = 1;
+ } else if (c == '-') {
+ isNum = 2;
+ sign = -1;
+ pnumHead = p;
+ } else {
+ err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while(left()) {
+ nameEnd = p;
+
+ fetch();
+ if (c == endCode || c == ')') break;
+ if (!EncodingHelper.isDigit(c)) err = ERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+
+ if (err == null && c != endCode) {
+ err = ERR_INVALID_GROUP_NAME;
+ nameEnd = stop;
+ }
+
+ if (err == null) {
+ mark();
+ p = pnumHead;
+ int backNum = scanUnsignedNumber();
+ restore();
+ if (backNum < 0) {
+ newValueException(ERR_TOO_BIG_NUMBER);
+ } else if (backNum == 0){
+ newValueException(ERR_INVALID_GROUP_NAME, src, nameEnd);
+ }
+ backNum *= sign;
+
+ value = nameEnd;
+ return backNum;
+ } else {
+ newValueException(err, src, nameEnd);
+ return 0; // not reached
+ }
+ }
+
+ protected final int fetchName(int startCode, boolean ref) {
+ if (Config.USE_NAMED_GROUP) {
+ return fetchNameForNamedGroup(startCode, ref);
+ } else {
+ return fetchNameForNoNamedGroup(startCode, ref);
+ }
+ }
+
+ private boolean strExistCheckWithEsc(int[]s, int n, int bad) {
+ int p = this.p;
+ int to = this.stop;
+
+ boolean inEsc = false;
+ int i=0;
+ while(p < to) {
+ if (inEsc) {
+ inEsc = false;
+ p ++;
+ } else {
+ int x = chars[p];
+ int q = p + 1;
+ if (x == s[0]) {
+ for (i=1; i<n && q < to; i++) {
+ x = chars[q];
+ if (x != s[i]) break;
+ q++;
+ }
+ if (i >= n) return true;
+ p++;
+ } else {
+ x = chars[p];
+ if (x == bad) return false;
+ else if (x == syntax.metaCharTable.esc) inEsc = true;
+ p = q;
+ }
+ }
+ }
+ return false;
+ }
+
+ private static final int send[] = new int[]{':', ']'};
+
+ private void fetchTokenInCCFor_charType(boolean flag, int type) {
+ token.type = TokenType.CHAR_TYPE;
+ token.setPropCType(type);
+ token.setPropNot(flag);
+ }
+
+ private void fetchTokenInCCFor_p() {
+ int c2 = peek(); // !!! migrate to peekIs
+ if (c2 == '{' && syntax.op2EscPBraceCharProperty()) {
+ inc();
+ token.type = TokenType.CHAR_PROPERTY;
+ token.setPropNot(c == 'P');
+
+ if (syntax.op2EscPBraceCircumflexNot()) {
+ c2 = fetchTo();
+ if (c2 == '^') {
+ token.setPropNot(!token.getPropNot());
+ } else {
+ unfetch();
+ }
+ }
+ } else {
+ syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c);
+ }
+ }
+
+ private void fetchTokenInCCFor_x() {
+ if (!left()) return;
+ int last = p;
+
+ if (peekIs('{') && syntax.opEscXBraceHex8()) {
+ inc();
+ int num = scanUnsignedHexadecimalNumber(8);
+ if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ if (left()) {
+ int c2 = peek();
+ if (EncodingHelper.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
+ }
+
+ if (p > last + 1 && left() && peekIs('}')) {
+ inc();
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ } else {
+ /* can't read nothing or invalid format */
+ p = last;
+ }
+ } else if (syntax.opEscXHex2()) {
+ int num = scanUnsignedHexadecimalNumber(2);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 16;
+ token.setC(num);
+ }
+ }
+
+ private void fetchTokenInCCFor_u() {
+ if (!left()) return;
+ int last = p;
+
+ if (syntax.op2EscUHex4()) {
+ int num = scanUnsignedHexadecimalNumber(4);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ }
+ }
+
+ private void fetchTokenInCCFor_digit() {
+ if (syntax.opEscOctal3()) {
+ unfetch();
+ int last = p;
+ int num = scanUnsignedOctalNumber(3);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 8;
+ token.setC(num);
+ }
+ }
+
+ private void fetchTokenInCCFor_posixBracket() {
+ if (syntax.opPosixBracket() && peekIs(':')) {
+ token.backP = p; /* point at '[' is readed */
+ inc();
+ if (strExistCheckWithEsc(send, send.length, ']')) {
+ token.type = TokenType.POSIX_BRACKET_OPEN;
+ } else {
+ unfetch();
+ // remove duplication, goto cc_in_cc;
+ if (syntax.op2CClassSetOp()) {
+ token.type = TokenType.CC_CC_OPEN;
+ } else {
+ env.ccEscWarn("[");
+ }
+ }
+ } else { // cc_in_cc:
+ if (syntax.op2CClassSetOp()) {
+ token.type = TokenType.CC_CC_OPEN;
+ } else {
+ env.ccEscWarn("[");
+ }
+ }
+ }
+
+ private void fetchTokenInCCFor_and() {
+ if (syntax.op2CClassSetOp() && left() && peekIs('&')) {
+ inc();
+ token.type = TokenType.CC_AND;
+ }
+ }
+
+ protected final TokenType fetchTokenInCC() {
+ if (!left()) {
+ token.type = TokenType.EOT;
+ return token.type;
+ }
+
+ fetch();
+ token.type = TokenType.CHAR;
+ token.base = 0;
+ token.setC(c);
+ token.escaped = false;
+
+ if (c == ']') {
+ token.type = TokenType.CC_CLOSE;
+ } else if (c == '-') {
+ token.type = TokenType.CC_RANGE;
+ } else if (c == syntax.metaCharTable.esc) {
+ if (!syntax.backSlashEscapeInCC()) return token.type;
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+ fetch();
+ token.escaped = true;
+ token.setC(c);
+
+ switch (c) {
+ case 'w':
+ fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+ break;
+ case 'W':
+ fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+ break;
+ case 'd':
+ fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+ break;
+ case 'D':
+ fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+ break;
+ case 's':
+ fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+ break;
+ case 'S':
+ fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+ break;
+ case 'h':
+ if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
+ break;
+ case 'H':
+ if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
+ break;
+ case 'p':
+ case 'P':
+ fetchTokenInCCFor_p();
+ break;
+ case 'x':
+ fetchTokenInCCFor_x();
+ break;
+ case 'u':
+ fetchTokenInCCFor_u();
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ fetchTokenInCCFor_digit();
+ break;
+
+ default:
+ unfetch();
+ int num = fetchEscapedValue();
+ if (token.getC() != num) {
+ token.setCode(num);
+ token.type = TokenType.CODE_POINT;
+ }
+ break;
+ } // switch
+
+ } else if (c == '[') {
+ fetchTokenInCCFor_posixBracket();
+ } else if (c == '&') {
+ fetchTokenInCCFor_and();
+ }
+ return token.type;
+ }
+
+ protected final int backrefRelToAbs(int relNo) {
+ return env.numMem + 1 + relNo;
+ }
+
+ private void fetchTokenFor_repeat(int lower, int upper) {
+ token.type = TokenType.OP_REPEAT;
+ token.setRepeatLower(lower);
+ token.setRepeatUpper(upper);
+ greedyCheck();
+ }
+
+ private void fetchTokenFor_openBrace() {
+ switch (fetchRangeQuantifier()) {
+ case 0:
+ greedyCheck();
+ break;
+ case 2:
+ if (syntax.fixedIntervalIsGreedyOnly()) {
+ possessiveCheck();
+ } else {
+ greedyCheck();
+ }
+ break;
+ default: /* 1 : normal char */
+ } // inner switch
+ }
+
+ private void fetchTokenFor_anchor(int subType) {
+ token.type = TokenType.ANCHOR;
+ token.setAnchor(subType);
+ }
+
+ private void fetchTokenFor_xBrace() {
+ if (!left()) return;
+
+ int last = p;
+ if (peekIs('{') && syntax.opEscXBraceHex8()) {
+ inc();
+ int num = scanUnsignedHexadecimalNumber(8);
+ if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
+ if (left()) {
+ if (EncodingHelper.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
+ }
+
+ if (p > last + 1 && left() && peekIs('}')) {
+ inc();
+ token.type = TokenType.CODE_POINT;
+ token.setCode(num);
+ } else {
+ /* can't read nothing or invalid format */
+ p = last;
+ }
+ } else if (syntax.opEscXHex2()) {
+ int num = scanUnsignedHexadecimalNumber(2);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 16;
+ token.setC(num);
+ }
+ }
+
+ private void fetchTokenFor_uHex() {
+ if (!left()) return;
+ int last = p;
+
+ if (syntax.op2EscUHex4()) {
+ int num = scanUnsignedHexadecimalNumber(4);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.CODE_POINT;
+ token.base = 16;
+ token.setCode(num);
+ }
+ }
+
+ private void fetchTokenFor_digit() {
+ unfetch();
+ int last = p;
+ int num = scanUnsignedNumber();
+ if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
+ } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
+ if (syntax.strictCheckBackref()) {
+ if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ token.type = TokenType.BACKREF;
+ token.setBackrefNum(1);
+ token.setBackrefRef1(num);
+ token.setBackrefByName(false);
+ if (Config.USE_BACKREF_WITH_LEVEL) token.setBackrefExistLevel(false);
+ return;
+ }
+
+ if (c == '8' || c == '9') { /* normal char */ // skip_backref:
+ p = last;
+ inc();
+ return;
+ }
+ p = last;
+
+ fetchTokenFor_zero(); /* fall through */
+ }
+
+ private void fetchTokenFor_zero() {
+ if (syntax.opEscOctal3()) {
+ int last = p;
+ int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
+ if (num < 0) newValueException(ERR_TOO_BIG_NUMBER);
+ if (p == last) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ token.type = TokenType.RAW_BYTE;
+ token.base = 8;
+ token.setC(num);
+ } else if (c != '0') {
+ inc();
+ }
+ }
+
+ private void fetchTokenFor_namedBackref() {
+ if (syntax.op2EscKNamedBackref()) {
+ if (left()) {
+ fetch();
+ if (c =='<' || c == '\'') {
+ int last = p;
+ int backNum;
+ if (Config.USE_BACKREF_WITH_LEVEL) {
+ Ptr rbackNum = new Ptr();
+ Ptr rlevel = new Ptr();
+ token.setBackrefExistLevel(fetchNameWithLevel(c, rbackNum, rlevel));
+ token.setBackrefLevel(rlevel.p);
+ backNum = rbackNum.p;
+ } else {
+ backNum = fetchName(c, true);
+ } // USE_BACKREF_AT_LEVEL
+ int nameEnd = value; // set by fetchNameWithLevel/fetchName
+
+ if (backNum != 0) {
+ if (backNum < 0) {
+ backNum = backrefRelToAbs(backNum);
+ if (backNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+
+ if (syntax.strictCheckBackref() && (backNum > env.numMem || env.memNodes == null)) {
+ newValueException(ERR_INVALID_BACKREF);
+ }
+ token.type = TokenType.BACKREF;
+ token.setBackrefByName(false);
+ token.setBackrefNum(1);
+ token.setBackrefRef1(backNum);
+ } else {
+ NameEntry e = env.reg.nameToGroupNumbers(chars, last, nameEnd);
+ if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, last, nameEnd);
+
+ if (syntax.strictCheckBackref()) {
+ if (e.backNum == 1) {
+ if (e.backRef1 > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF);
+ } else {
+ for (int i=0; i<e.backNum; i++) {
+ if (e.backRefs[i] > env.numMem ||
+ env.memNodes == null ||
+ env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF);
+ }
+ }
+ }
+
+ token.type = TokenType.BACKREF;
+ token.setBackrefByName(true);
+
+ if (e.backNum == 1) {
+ token.setBackrefNum(1);
+ token.setBackrefRef1(e.backRef1);
+ } else {
+ token.setBackrefNum(e.backNum);
+ token.setBackrefRefs(e.backRefs);
+ }
+ }
+ } else {
+ unfetch();
+ syntaxWarn(Warnings.INVALID_BACKREFERENCE);
+ }
+ } else {
+ syntaxWarn(Warnings.INVALID_BACKREFERENCE);
+ }
+ }
+ }
+
+ private void fetchTokenFor_subexpCall() {
+ if (syntax.op2EscGSubexpCall()) {
+ if (left()) {
+ fetch();
+ if (c == '<' || c == '\'') {
+ int last = p;
+ int gNum = fetchName(c, true);
+ int nameEnd = value;
+ token.type = TokenType.CALL;
+ token.setCallNameP(last);
+ token.setCallNameEnd(nameEnd);
+ token.setCallGNum(gNum);
+ } else {
+ unfetch();
+ syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
+ }
+ } else {
+ syntaxWarn(Warnings.INVALID_SUBEXP_CALL);
+ }
+ }
+ }
+
+ private void fetchTokenFor_charProperty() {
+ if (peekIs('{') && syntax.op2EscPBraceCharProperty()) {
+ inc();
+ token.type = TokenType.CHAR_PROPERTY;
+ token.setPropNot(c == 'P');
+
+ if (syntax.op2EscPBraceCircumflexNot()) {
+ fetch();
+ if (c == '^') {
+ token.setPropNot(!token.getPropNot());
+ } else {
+ unfetch();
+ }
+ }
+ } else {
+ syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c);
+ }
+ }
+
+ private void fetchTokenFor_metaChars() {
+ if (c == syntax.metaCharTable.anyChar) {
+ token.type = TokenType.ANYCHAR;
+ } else if (c == syntax.metaCharTable.anyTime) {
+ fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
+ } else if (c == syntax.metaCharTable.zeroOrOneTime) {
+ fetchTokenFor_repeat(0, 1);
+ } else if (c == syntax.metaCharTable.oneOrMoreTime) {
+ fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
+ } else if (c == syntax.metaCharTable.anyCharAnyTime) {
+ token.type = TokenType.ANYCHAR_ANYTIME;
+ // goto out
+ }
+ }
+
+ protected final TokenType fetchToken() {
+ // mark(); // out
+ start:
+ while(true) {
+ if (!left()) {
+ token.type = TokenType.EOT;
+ return token.type;
+ }
+
+ token.type = TokenType.STRING;
+ token.base = 0;
+ token.backP = p;
+
+ fetch();
+
+ if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
+ if (!left()) newSyntaxException(ERR_END_PATTERN_AT_ESCAPE);
+
+ token.backP = p;
+ fetch();
+
+ token.setC(c);
+ token.escaped = true;
+ switch(c) {
+
+ case '*':
+ if (syntax.opEscAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
+ break;
+ case '+':
+ if (syntax.opEscPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
+ break;
+ case '?':
+ if (syntax.opEscQMarkZeroOne()) fetchTokenFor_repeat(0, 1);
+ break;
+ case '{':
+ if (syntax.opEscBraceInterval()) fetchTokenFor_openBrace();
+ break;
+ case '|':
+ if (syntax.opEscVBarAlt()) token.type = TokenType.ALT;
+ break;
+ case '(':
+ if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_OPEN;
+ break;
+ case ')':
+ if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE;
+ break;
+ case 'w':
+ if (syntax.opEscWWord()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+ break;
+ case 'W':
+ if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
+ break;
+ case 'b':
+ if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.WORD_BOUND);
+ break;
+ case 'B':
+ if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
+ break;
+ case '<':
+ if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
+ break;
+ case '>':
+ if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_END);
+ break;
+ case 's':
+ if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+ break;
+ case 'S':
+ if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
+ break;
+ case 'd':
+ if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+ break;
+ case 'D':
+ if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
+ break;
+ case 'h':
+ if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
+ break;
+ case 'H':
+ if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
+ break;
+ case 'A':
+ if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
+ break;
+ case 'Z':
+ if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.SEMI_END_BUF);
+ break;
+ case 'z':
+ if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF);
+ break;
+ case 'G':
+ if (syntax.opEscCapitalGBeginAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_POSITION);
+ break;
+ case '`':
+ if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
+ break;
+ case '\'':
+ if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF);
+ break;
+ case 'x':
+ fetchTokenFor_xBrace();
+ break;
+ case 'u':
+ fetchTokenFor_uHex();
+ break;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ fetchTokenFor_digit();
+ break;
+ case '0':
+ fetchTokenFor_zero();
+ break;
+ case 'k':
+ if (Config.USE_NAMED_GROUP) fetchTokenFor_namedBackref();
+ break;
+ case 'g':
+ if (Config.USE_SUBEXP_CALL) fetchTokenFor_subexpCall();
+ break;
+ case 'Q':
+ if (syntax.op2EscCapitalQQuote()) token.type = TokenType.QUOTE_OPEN;
+ break;
+ case 'p':
+ case 'P':
+ fetchTokenFor_charProperty();
+ break;
+
+ default:
+ unfetch();
+ int num = fetchEscapedValue();
+
+ /* set_raw: */
+ if (token.getC() != num) {
+ token.type = TokenType.CODE_POINT;
+ token.setCode(num);
+ } else { /* string */
+ p = token.backP + 1;
+ }
+ break;
+
+ } // switch (c)
+
+ } else {
+ token.setC(c);
+ token.escaped = false;
+
+ if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) {
+ fetchTokenFor_metaChars();
+ break;
+ }
+
+ {
+ switch(c) {
+ case '.':
+ if (syntax.opDotAnyChar()) token.type = TokenType.ANYCHAR;
+ break;
+ case '*':
+ if (syntax.opAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
+ break;
+ case '+':
+ if (syntax.opPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
+ break;
+ case '?':
+ if (syntax.opQMarkZeroOne()) fetchTokenFor_repeat(0, 1);
+ break;
+ case '{':
+ if (syntax.opBraceInterval()) fetchTokenFor_openBrace();
+ break;
+ case '|':
+ if (syntax.opVBarAlt()) token.type = TokenType.ALT;
+ break;
+
+ case '(':
+ if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
+ inc();
+ if (peekIs('#')) {
+ fetch();
+ while (true) {
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+ fetch();
+ if (c == syntax.metaCharTable.esc) {
+ if (left()) fetch();
+ } else {
+ if (c == ')') break;
+ }
+ }
+ continue start; // goto start
+ }
+ unfetch();
+ }
+
+ if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_OPEN;
+ break;
+ case ')':
+ if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE;
+ break;
+ case '^':
+ if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
+ break;
+ case '$':
+ if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
+ break;
+ case '[':
+ if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN;
+ break;
+ case ']':
+ //if (*src > env->pattern) /* /].../ is allowed. */
+ //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
+ break;
+ case '#':
+ if (Option.isExtend(env.option)) {
+ while (left()) {
+ fetch();
+ if (EncodingHelper.isNewLine(c)) break;
+ }
+ continue start; // goto start
+ }
+ break;
+
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ if (Option.isExtend(env.option)) continue start; // goto start
+ break;
+
+ default: // string
+ break;
+
+ } // switch
+ }
+ }
+
+ break;
+ } // while
+ return token.type;
+ }
+
+ private void greedyCheck() {
+ if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) {
+
+ fetch();
+
+ token.setRepeatGreedy(false);
+ token.setRepeatPossessive(false);
+ } else {
+ possessiveCheck();
+ }
+ }
+
+ private void possessiveCheck() {
+ if (left() && peekIs('+') &&
+ (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL ||
+ syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) {
+
+ fetch();
+
+ token.setRepeatGreedy(true);
+ token.setRepeatPossessive(true);
+ } else {
+ token.setRepeatGreedy(true);
+ token.setRepeatPossessive(false);
+ }
+ }
+
+ protected final int fetchCharPropertyToCType() {
+ mark();
+
+ while (left()) {
+ int last = p;
+ fetch();
+ if (c == '}') {
+ String name = new String(chars, _p, last - _p);
+ return PosixBracket.propertyNameToCType(name);
+ } else if (c == '(' || c == ')' || c == '{' || c == '|') {
+ String name = new String(chars, _p, last - _p);
+ throw new JOniException(ERR_INVALID_CHAR_PROPERTY_NAME.replaceAll("%n", name));
+ }
+ }
+ newInternalException(ERR_PARSER_BUG);
+ return 0; // not reached
+ }
+
+ protected final void syntaxWarn(String message, char c) {
+ syntaxWarn(message.replace("<%n>", Character.toString(c)));
+ }
+
+ protected final void syntaxWarn(String message) {
+ if (Config.USE_WARN) {
+ env.reg.warnings.warn(message + ": /" + new String(chars, getBegin(), getEnd()) + "/");
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,556 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
+
+public abstract class Matcher extends IntHolder {
+ protected final Regex regex;
+
+ protected final char[] chars;
+ protected final int str;
+ protected final int end;
+
+ protected int msaStart;
+ protected int msaOptions;
+ protected final Region msaRegion;
+ protected int msaBestLen;
+ protected int msaBestS;
+
+ protected int msaBegin;
+ protected int msaEnd;
+
+ public Matcher(Regex regex, char[] chars) {
+ this(regex, chars, 0, chars.length);
+ }
+
+ public Matcher(Regex regex, char[] chars, int p, int end) {
+ this.regex = regex;
+
+ this.chars = chars;
+ this.str = p;
+ this.end = end;
+
+ this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
+ }
+
+ // main matching method
+ protected abstract int matchAt(int range, int sstart, int sprev);
+
+ protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum);
+ protected abstract void stateCheckBuffClear();
+
+ public final Region getRegion() {
+ return msaRegion;
+ }
+
+ public final Region getEagerRegion() {
+ return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd);
+ }
+
+ public final int getBegin() {
+ return msaBegin;
+ }
+
+ public final int getEnd() {
+ return msaEnd;
+ }
+
+ protected final void msaInit(int option, int start) {
+ msaOptions = option;
+ msaStart = start;
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1;
+ }
+
+ public final int match(int at, int range, int option) {
+ msaInit(option, at);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ int offset = at = str;
+ stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction?
+ } // USE_COMBINATION_EXPLOSION_CHECK
+
+ int prev = EncodingHelper.prevCharHead(str, at);
+
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ return matchAt(end /*range*/, at, prev);
+ } else {
+ return matchAt(range /*range*/, at, prev);
+ }
+ }
+
+ int low, high; // these are the return values
+ private boolean forwardSearchRange(char[] chars, int str, int end, int s, int range, IntHolder lowPrev) {
+ int pprev = -1;
+ int p = s;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("forward_search_range: "+
+ "str: " + str +
+ ", end: " + end +
+ ", s: " + s +
+ ", range: " + range);
+ }
+
+ if (regex.dMin > 0) {
+ p += regex.dMin;
+ }
+
+ retry:while (true) {
+ p = regex.searchAlgorithm.search(regex, chars, p, end, range);
+
+ if (p != -1 && p < range) {
+ if (p - regex.dMin < s) {
+ // retry_gate:
+ pprev = p;
+ p++;
+ continue retry;
+ }
+
+ if (regex.subAnchor != 0) {
+ switch (regex.subAnchor) {
+ case AnchorType.BEGIN_LINE:
+ if (p != str) {
+ int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
+ if (!EncodingHelper.isNewLine(chars, prev, end)) {
+ // goto retry_gate;
+ pprev = p;
+ p++;
+ continue retry;
+ }
+ }
+ break;
+
+ case AnchorType.END_LINE:
+ if (p == end) {
+ if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
+ if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) {
+ // goto retry_gate;
+ pprev = p;
+ p++;
+ continue retry;
+ }
+ }
+ } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
+ //if () break;
+ // goto retry_gate;
+ pprev = p;
+ p++;
+ continue retry;
+ }
+ break;
+ } // switch
+ }
+
+ if (regex.dMax == 0) {
+ low = p;
+ if (lowPrev != null) { // ??? // remove null checks
+ if (low > s) {
+ lowPrev.value = EncodingHelper.prevCharHead(s, p);
+ } else {
+ lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
+ }
+ }
+ } else {
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ low = p - regex.dMax;
+
+ if (low > s) {
+ low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
+ if (lowPrev != null && lowPrev.value == -1) {
+ lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
+ }
+ } else {
+ if (lowPrev != null) {
+ lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low);
+ }
+ }
+ }
+ }
+ /* no needs to adjust *high, *high is used as range check only */
+ high = p - regex.dMin;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("forward_search_range success: "+
+ "low: " + (low - str) +
+ ", high: " + (high - str) +
+ ", dmin: " + regex.dMin +
+ ", dmax: " + regex.dMax);
+ }
+
+ return true; /* success */
+ }
+
+ return false; /* fail */
+ } //while
+ }
+
+ // low, high
+ private boolean backwardSearchRange(char[] chars, int str, int end, int s, int range, int adjrange) {
+ range += regex.dMin;
+ int p = s;
+
+ retry:while (true) {
+ p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range);
+
+ if (p != -1) {
+ if (regex.subAnchor != 0) {
+ switch (regex.subAnchor) {
+ case AnchorType.BEGIN_LINE:
+ if (p != str) {
+ int prev = EncodingHelper.prevCharHead(str, p);
+ if (!EncodingHelper.isNewLine(chars, prev, end)) {
+ p = prev;
+ continue retry;
+ }
+ }
+ break;
+
+ case AnchorType.END_LINE:
+ if (p == end) {
+ if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
+ int prev = EncodingHelper.prevCharHead(adjrange, p);
+ if (prev == -1) return false;
+ if (EncodingHelper.isNewLine(chars, prev, end)) {
+ p = prev;
+ continue retry;
+ }
+ }
+ } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
+ p = EncodingHelper.prevCharHead(adjrange, p);
+ if (p == -1) return false;
+ continue retry;
+ }
+ break;
+ } // switch
+ }
+
+ /* no needs to adjust *high, *high is used as range check only */
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ low = p - regex.dMax;
+ high = p - regex.dMin;
+ }
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("backward_search_range: "+
+ "low: " + (low - str) +
+ ", high: " + (high - str));
+ }
+
+ return true;
+ }
+
+ if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail.");
+ return false;
+ } // while
+ }
+
+ // MATCH_AND_RETURN_CHECK
+ private boolean matchCheck(int upperRange, int s, int prev) {
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ //range = upperRange;
+ if (matchAt(upperRange, s, prev) != -1) {
+ if (!isFindLongest(regex.options)) return true;
+ }
+ } else {
+ //range = upperRange;
+ if (matchAt(upperRange, s, prev) != -1) return true;
+ }
+ } else {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (matchAt(end, s, prev) != -1) {
+ //range = upperRange;
+ if (!isFindLongest(regex.options)) return true;
+ }
+ } else {
+ //range = upperRange;
+ if (matchAt(end, s, prev) != -1) return true;
+ }
+ }
+ return false;
+ }
+
+ public final int search(int start, int range, int option) {
+ int s, prev;
+ int origStart = start;
+ int origRange = range;
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search (entry point): "+
+ "str: " + str +
+ ", end: " + (end - str) +
+ ", start: " + (start - str) +
+ ", range " + (range - str));
+ }
+
+ if (start > end || start < str) return -1;
+
+ /* anchor optimize: resume search range */
+ if (regex.anchor != 0 && str < end) {
+ int minSemiEnd, maxSemiEnd;
+
+ if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
+ /* search start-position only */
+ // !begin_position:!
+ if (range > start) {
+ range = start + 1;
+ } else {
+ range = start;
+ }
+ } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
+ /* search str-position only */
+ if (range > start) {
+ if (start != str) return -1; // mismatch_no_msa;
+ range = str + 1;
+ } else {
+ if (range <= str) {
+ start = str;
+ range = str;
+ } else {
+ return -1; // mismatch_no_msa;
+ }
+ }
+ } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
+ minSemiEnd = maxSemiEnd = end;
+ // !end_buf:!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
+ int preEnd = EncodingHelper.stepBack(str, end, 1);
+ maxSemiEnd = end;
+ if (EncodingHelper.isNewLine(chars, preEnd, end)) {
+ minSemiEnd = preEnd;
+ if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
+ preEnd = EncodingHelper.stepBack(str, preEnd, 1);
+ if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
+ minSemiEnd = preEnd;
+ }
+ }
+ if (minSemiEnd > str && start <= minSemiEnd) {
+ // !goto end_buf;!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ }
+ } else {
+ minSemiEnd = end;
+ // !goto end_buf;!
+ if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
+ }
+ } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
+ // goto !begin_position;!
+ if (range > start) {
+ range = start + 1;
+ } else {
+ range = start;
+ }
+ }
+
+ } else if (str == end) { /* empty string */
+ // empty address ?
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search: empty string.");
+ }
+
+ if (regex.thresholdLength == 0) {
+ s = start = str;
+ prev = -1;
+ msaInit(option, start);
+
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear();
+
+ if (matchCheck(end, s, prev)) return match(s);
+ return mismatch();
+ }
+ return -1; // goto mismatch_no_msa;
+ }
+
+ if (Config.DEBUG_SEARCH) {
+ Config.log.println("onig_search(apply anchor): " +
+ "end: " + (end - str) +
+ ", start " + (start - str) +
+ ", range " + (range - str));
+ }
+
+ msaInit(option, origStart);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ int offset = Math.min(start, range) - str;
+ stateCheckBuffInit(end - str, offset, regex.numCombExpCheck);
+ }
+
+ s = start;
+ if (range > start) { /* forward search */
+ if (s > str) {
+ prev = EncodingHelper.prevCharHead(str, s);
+ } else {
+ prev = 0; // -1
+ }
+
+ if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
+ int schRange = range;
+ if (regex.dMax != 0) {
+ if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
+ schRange = end;
+ } else {
+ schRange += regex.dMax;
+ if (schRange > end) schRange = end;
+ }
+ }
+ if ((end - start) < regex.thresholdLength) return mismatch();
+
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
+ do {
+ if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev
+ if (s < low) {
+ s = low;
+ prev = value;
+ }
+ while (s <= high) {
+ if (matchCheck(origRange, s, prev)) return match(s); // ???
+ prev = s;
+ s++;
+ }
+ } while (s < range);
+ return mismatch();
+
+ } else { /* check only. */
+ if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch();
+
+ if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
+ do {
+ if (matchCheck(origRange, s, prev)) return match(s);
+ prev = s;
+ s++;
+ } while (s < range);
+ return mismatch();
+ }
+
+ }
+ }
+
+ do {
+ if (matchCheck(origRange, s, prev)) return match(s);
+ prev = s;
+ s++;
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ if (matchCheck(origRange, s, prev)) return match(s);
+ }
+ } else { /* backward search */
+ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
+ if (origStart < end) {
+ origStart++; // /* is upper range */
+ }
+ }
+
+ if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
+ int adjrange;
+ if (range < end) {
+ adjrange = range;
+ } else {
+ adjrange = end;
+ }
+ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
+ do {
+ int schStart = s + regex.dMax;
+ if (schStart > end) schStart = end;
+ if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high
+ if (s > high) s = high;
+ while (s != -1 && s >= low) {
+ prev = EncodingHelper.prevCharHead(str, s);
+ if (matchCheck(origStart, s, prev)) return match(s);
+ s = prev;
+ }
+ } while (s >= range);
+ return mismatch();
+ } else { /* check only. */
+ if ((end - range) < regex.thresholdLength) return mismatch();
+
+ int schStart = s;
+ if (regex.dMax != 0) {
+ if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
+ schStart = end;
+ } else {
+ schStart += regex.dMax;
+ if (schStart > end) {
+ schStart = end;
+ }
+ }
+ }
+ if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch();
+ }
+ }
+
+ do {
+ prev = EncodingHelper.prevCharHead(str, s);
+ if (matchCheck(origStart, s, prev)) return match(s);
+ s = prev;
+ } while (s >= range);
+
+ }
+ return mismatch();
+ }
+
+ private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) {
+ if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa;
+
+ if (range > start) {
+ if ((minSemiEnd - start) > regex.anchorDmax) {
+ start = minSemiEnd - regex.anchorDmax;
+ if (start >= end) {
+ /* match with empty at end */
+ start = EncodingHelper.prevCharHead(str, end);
+ }
+ }
+ if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
+ range = maxSemiEnd - regex.anchorDmin + 1;
+ }
+ if (start >= range) return true; // mismatch_no_msa;
+ } else {
+ if ((minSemiEnd - range) > regex.anchorDmax) {
+ range = minSemiEnd - regex.anchorDmax;
+ }
+ if ((maxSemiEnd - start) < regex.anchorDmin) {
+ start = maxSemiEnd - regex.anchorDmin;
+ }
+ if (range > start) return true; // mismatch_no_msa;
+ }
+ return false;
+ }
+
+ private int match(int s) {
+ return s - str; // sstart ???
+ }
+
+ private int mismatch() {
+ if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
+ if (msaBestLen >= 0) {
+ int s = msaBestS;
+ return match(s);
+ }
+ }
+ // falls through finish:
+ return -1;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/MatcherFactory.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public abstract class MatcherFactory {
+ public abstract Matcher create(Regex regex, char[] chars, int p, int end);
+
+ static final MatcherFactory DEFAULT = new MatcherFactory() {
+ @Override
+ public Matcher create(Regex regex, char[] chars, int p, int end) {
+ return new ByteCodeMachine(regex, chars, p, end);
+ }
+ };
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/MinMaxLen.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,139 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+final class MinMaxLen {
+ int min; /* min byte length */
+ int max; /* max byte length */
+
+ MinMaxLen() {
+ }
+
+ MinMaxLen(int min, int max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ /* 1000 / (min-max-dist + 1) */
+ private static final short distValues[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
+ };
+
+ int distanceValue() {
+ if (max == INFINITE_DISTANCE) return 0;
+ int d = max - min;
+ /* return dist_vals[d] * 16 / (mm->min + 12); */
+ return d < distValues.length ? distValues[d] : 1;
+ }
+
+ int compareDistanceValue(MinMaxLen other, int v1, int v2) {
+ if (v2 <= 0) return -1;
+ if (v1 <= 0) return 1;
+
+ v1 *= distanceValue();
+ v2 *= other.distanceValue();
+
+ if (v2 > v1) return 1;
+ if (v2 < v1) return -1;
+
+ if (other.min < min) return 1;
+ if (other.min > min) return -1;
+ return 0;
+ }
+
+ boolean equal(MinMaxLen other) {
+ return min == other.min && max == other.max;
+ }
+
+ void set(int min, int max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ void clear() {
+ min = max = 0;
+ }
+
+ void copy(MinMaxLen other) {
+ min = other.min;
+ max = other.max;
+ }
+
+ void add(MinMaxLen other) {
+ min = distanceAdd(min, other.min);
+ max = distanceAdd(max, other.max);
+ }
+
+ void addLength(int len) {
+ min = distanceAdd(min, len);
+ max = distanceAdd(max, len);
+ }
+
+ void altMerge(MinMaxLen other) {
+ if (min > other.min) min = other.min;
+ if (max < other.max) max = other.max;
+ }
+
+ static final int INFINITE_DISTANCE = 0x7FFFFFFF;
+ static int distanceAdd(int d1, int d2) {
+ if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) {
+ return INFINITE_DISTANCE;
+ } else {
+ if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2;
+ else return INFINITE_DISTANCE;
+ }
+ }
+
+ static int distanceMultiply(int d, int m) {
+ if (m == 0) return 0;
+ if (d < INFINITE_DISTANCE / m) {
+ return d * m;
+ } else {
+ return INFINITE_DISTANCE;
+ }
+ }
+
+ static String distanceRangeToString(int a, int b) {
+ String s = "";
+ if (a == INFINITE_DISTANCE) {
+ s += "inf";
+ } else {
+ s += "(" + a + ")";
+ }
+
+ s += "-";
+
+ if (b == INFINITE_DISTANCE) {
+ s += "inf";
+ } else {
+ s += "(" + b + ")";
+ }
+ return s;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/NameEntry.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,97 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public final class NameEntry {
+ static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8;
+
+ public final char[] name;
+ public final int nameP;
+ public final int nameEnd;
+
+ int backNum;
+ int backRef1;
+ int backRefs[];
+
+ public NameEntry(char[] chars, int p, int end) {
+ name = chars;
+ nameP = p;
+ nameEnd = end;
+ }
+
+ public int[] getBackRefs() {
+ switch (backNum) {
+ case 0:
+ return new int[]{};
+ case 1:
+ return new int[]{backRef1};
+ default:
+ int[]result = new int[backNum];
+ System.arraycopy(backRefs, 0, result, 0, backNum);
+ return result;
+ }
+ }
+
+ private void alloc() {
+ backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM];
+ }
+
+ private void ensureSize() {
+ if (backNum > backRefs.length) {
+ int[]tmp = new int[backRefs.length << 1];
+ System.arraycopy(backRefs, 0, tmp, 0, backRefs.length);
+ backRefs = tmp;
+ }
+ }
+
+ public void addBackref(int backRef) {
+ backNum++;
+
+ switch (backNum) {
+ case 1:
+ backRef1 = backRef;
+ break;
+ case 2:
+ alloc();
+ backRefs[0] = backRef1;
+ backRefs[1] = backRef;
+ break;
+ default:
+ ensureSize();
+ backRefs[backNum - 1] = backRef;
+ }
+ }
+
+ public String toString() {
+ StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " ");
+ if (backNum == 0) {
+ buff.append("-");
+ } else if (backNum == 1){
+ buff.append(backRef1);
+ } else {
+ for (int i=0; i<backNum; i++){
+ if (i > 0) buff.append(", ");
+ buff.append(backRefs[i]);
+ }
+ }
+ return buff.toString();
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/NativeMachine.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public abstract class NativeMachine extends Matcher {
+
+ protected NativeMachine(Regex regex, char[] chars, int p, int end) {
+ super(regex, chars, p, end);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/NodeOptInfo.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,125 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public final class NodeOptInfo {
+ final MinMaxLen length = new MinMaxLen();
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+ final OptExactInfo exb = new OptExactInfo(); /* boundary */
+ final OptExactInfo exm = new OptExactInfo(); /* middle */
+ final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */
+ final OptMapInfo map = new OptMapInfo(); /* boundary */
+
+ public void setBoundNode(MinMaxLen mmd) {
+ exb.mmd.copy(mmd);
+ expr.mmd.copy(mmd);
+ map.mmd.copy(mmd);
+ }
+
+ public void clear() {
+ length.clear();
+ anchor.clear();
+ exb.clear();
+ exm.clear();
+ expr.clear();
+ map.clear();
+ }
+
+ public void copy(NodeOptInfo other) {
+ length.copy(other.length);
+ anchor.copy(other.anchor);
+ exb.copy(other.exb);
+ exm.copy(other.exm);
+ expr.copy(other.expr);
+ map.copy(other.map);
+ }
+
+ public void concatLeftNode(NodeOptInfo other) {
+ OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ?
+ tanchor.concat(anchor, other.anchor, length.max, other.length.max);
+ anchor.copy(tanchor);
+
+ if (other.exb.length > 0 && length.max == 0) {
+ tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max);
+ other.exb.anchor.copy(tanchor);
+ }
+
+ if (other.map.value > 0 && length.max == 0) {
+ if (other.map.mmd.max == 0) {
+ other.map.anchor.leftAnchor |= anchor.leftAnchor;
+ }
+ }
+
+ boolean exbReach = exb.reachEnd;
+ boolean exmReach = exm.reachEnd;
+
+ if (other.length.max != 0) {
+ exb.reachEnd = exm.reachEnd = false;
+ }
+
+ if (other.exb.length > 0) {
+ if (exbReach) {
+ exb.concat(other.exb);
+ other.exb.clear();
+ } else if (exmReach) {
+ exm.concat(other.exb);
+ other.exb.clear();
+ }
+ }
+
+ exm.select(other.exb);
+ exm.select(other.exm);
+
+ if (expr.length > 0) {
+ if (other.length.max > 0) {
+ // TODO: make sure it is not an Oniguruma bug (casting unsigned int to int for arithmetic comparison)
+ int otherLengthMax = other.length.max;
+ if (otherLengthMax == MinMaxLen.INFINITE_DISTANCE) otherLengthMax = -1;
+ if (expr.length > otherLengthMax) expr.length = otherLengthMax;
+ if (expr.mmd.max == 0) {
+ exb.select(expr);
+ } else {
+ exm.select(expr);
+ }
+ }
+ } else if (other.expr.length > 0) {
+ expr.copy(other.expr);
+ }
+
+ map.select(other.map);
+ length.add(other.length);
+ }
+
+ public void altMerge(NodeOptInfo other, OptEnvironment env) {
+ anchor.altMerge(other.anchor);
+ exb.altMerge(other.exb, env);
+ exm.altMerge(other.exm, env);
+ expr.altMerge(other.expr, env);
+ map.altMerge(other.map);
+ length.altMerge(other.length);
+ }
+
+ public void setBound(MinMaxLen mmd) {
+ exb.mmd.copy(mmd);
+ expr.mmd.copy(mmd);
+ map.mmd.copy(mmd);
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/OptAnchorInfo.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,92 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+
+final class OptAnchorInfo implements AnchorType {
+ int leftAnchor;
+ int rightAnchor;
+
+ void clear() {
+ leftAnchor = rightAnchor = 0;
+ }
+
+ void copy(OptAnchorInfo other) {
+ leftAnchor = other.leftAnchor;
+ rightAnchor = other.rightAnchor;
+ }
+
+ void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) {
+ leftAnchor = left.leftAnchor;
+ if (leftLength == 0) leftAnchor |= right.leftAnchor;
+
+ rightAnchor = right.rightAnchor;
+ if (rightLength == 0) rightAnchor |= left.rightAnchor;
+ }
+
+ boolean isSet(int anchor) {
+ if ((leftAnchor & anchor) != 0) return true;
+ return (rightAnchor & anchor) != 0;
+ }
+
+ void add(int anchor) {
+ if (isLeftAnchor(anchor)) {
+ leftAnchor |= anchor;
+ } else {
+ rightAnchor |= anchor;
+ }
+ }
+
+ void remove(int anchor) {
+ if (isLeftAnchor(anchor)) {
+ leftAnchor &= ~anchor;
+ } else {
+ rightAnchor &= ~anchor;
+ }
+ }
+
+ void altMerge(OptAnchorInfo other) {
+ leftAnchor &= other.leftAnchor;
+ rightAnchor &= other.rightAnchor;
+ }
+
+ static boolean isLeftAnchor(int anchor) { // make a mask for it ?
+ return !(anchor == END_BUF || anchor == SEMI_END_BUF ||
+ anchor == END_LINE || anchor == PREC_READ ||
+ anchor == PREC_READ_NOT);
+ }
+
+ static String anchorToString(int anchor) {
+ StringBuffer s = new StringBuffer("[");
+
+ if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf ");
+ if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line ");
+ if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos ");
+ if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf ");
+ if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf ");
+ if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line ");
+ if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star ");
+ if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl ");
+ s.append("]");
+
+ return s.toString();
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/OptEnvironment.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+// remove this one in future and pass mmd directly
+final class OptEnvironment {
+ final MinMaxLen mmd = new MinMaxLen();
+ int options;
+ int caseFoldFlag;
+ ScanEnvironment scanEnv;
+
+ void copy(OptEnvironment other) {
+ mmd.copy(other.mmd);
+ options = other.options;
+ caseFoldFlag = other.caseFoldFlag;
+ scanEnv = other.scanEnv;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/OptExactInfo.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,153 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+final class OptExactInfo {
+ static final int OPT_EXACT_MAXLEN = 24;
+
+ final MinMaxLen mmd = new MinMaxLen();
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+
+ boolean reachEnd;
+ boolean ignoreCase;
+
+ final char chars[] = new char[OPT_EXACT_MAXLEN];
+ int length;
+
+ boolean isFull() {
+ return length >= OPT_EXACT_MAXLEN;
+ }
+
+ void clear() {
+ mmd.clear();
+ anchor.clear();
+
+ reachEnd = false;
+ ignoreCase = false;
+ length = 0;
+ }
+
+ void copy(OptExactInfo other) {
+ mmd.copy(other.mmd);
+ anchor.copy(other.anchor);
+ reachEnd = other.reachEnd;
+ ignoreCase = other.ignoreCase;
+ length = other.length;
+
+ System.arraycopy(other.chars, 0, chars, 0, OPT_EXACT_MAXLEN);
+ }
+
+ void concat(OptExactInfo other) {
+ if (!ignoreCase && other.ignoreCase) {
+ if (length >= other.length) return; /* avoid */
+ ignoreCase = true;
+ }
+
+ int p = 0; // add->s;
+ int end = p + other.length;
+
+ int i;
+ for (i = length; p < end;) {
+ if (i + 1 > OPT_EXACT_MAXLEN) break;
+ chars[i++] = other.chars[p++];
+ }
+
+ length = i;
+ reachEnd = (p == end ? other.reachEnd : false);
+
+ OptAnchorInfo tmp = new OptAnchorInfo();
+ tmp.concat(anchor, other.anchor, 1, 1);
+ if (!other.reachEnd) tmp.rightAnchor = 0;
+ anchor.copy(tmp);
+ }
+
+ // ?? raw is not used here
+ void concatStr(char[] lchars, int p, int end, boolean raw) {
+ int i;
+ for (i = length; p < end && i < OPT_EXACT_MAXLEN;) {
+ if (i + 1 > OPT_EXACT_MAXLEN) break;
+ chars[i++] = lchars[p++];
+ }
+
+ length = i;
+ }
+
+ void altMerge(OptExactInfo other, OptEnvironment env) {
+ if (other.length == 0 || length == 0) {
+ clear();
+ return;
+ }
+
+ if (!mmd.equal(other.mmd)) {
+ clear();
+ return;
+ }
+
+ int i;
+ for (i = 0; i < length && i < other.length; i++) {
+ if (chars[i] != other.chars[i]) break;
+ }
+
+ if (!other.reachEnd || i<other.length || i<length) reachEnd = false;
+
+ length = i;
+ ignoreCase |= other.ignoreCase;
+
+ anchor.altMerge(other.anchor);
+
+ if (!reachEnd) anchor.rightAnchor = 0;
+ }
+
+
+ void select(OptExactInfo alt) {
+ int v1 = length;
+ int v2 = alt.length;
+
+ if (v2 == 0) {
+ return;
+ } else if (v1 == 0) {
+ copy(alt);
+ return;
+ } else if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = OptMapInfo.positionValue(chars[0] & 0xff);
+ v1 = OptMapInfo.positionValue(alt.chars[0] & 0xff);
+
+ if (length > 1) v1 += 5;
+ if (alt.length > 1) v2 += 5;
+ }
+
+ if (!ignoreCase) v1 *= 2;
+ if (!alt.ignoreCase) v2 *= 2;
+
+ if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
+ }
+
+ // comp_opt_exact_or_map_info
+ private static final int COMP_EM_BASE = 20;
+ int compare(OptMapInfo m) {
+ if (m.value <= 0) return -1;
+
+ int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2);
+ int vm = COMP_EM_BASE * 5 * 2 / m.value;
+
+ return mmd.compareDistanceValue(m.mmd, ve, vm);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/OptMapInfo.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,120 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+final class OptMapInfo {
+
+ final MinMaxLen mmd = new MinMaxLen(); /* info position */
+ final OptAnchorInfo anchor = new OptAnchorInfo();
+
+ int value; /* weighted value */
+ final byte map[] = new byte[Config.CHAR_TABLE_SIZE];
+
+ void clear() {
+ mmd.clear();
+ anchor.clear();
+ value = 0;
+ for (int i=0; i<map.length; i++) map[i] = 0;
+ }
+
+ void copy(OptMapInfo other) {
+ mmd.copy(other.mmd);
+ anchor.copy(other.anchor);
+ value = other.value;
+ //for(int i=0; i<map.length; i++) map[i] = other.map[i];
+ System.arraycopy(other.map, 0, map, 0, other.map.length);
+ }
+
+ void addChar(int c) {
+ int c_ = c & 0xff;
+ if (map[c_] == 0) {
+ map[c_] = 1;
+ value += positionValue(c_);
+ }
+ }
+
+ void addCharAmb(char[] chars, int p, int end, int caseFoldFlag) {
+ addChar(chars[p]);
+
+ caseFoldFlag &= ~Config.INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
+ char[]items = EncodingHelper.caseFoldCodesByString(caseFoldFlag, chars[p]);
+
+ for (int i=0; i<items.length; i++) {
+ addChar(items[i]);
+ }
+ }
+
+ // select_opt_map_info
+ private static final int z = 1<<15; /* 32768: something big value */
+ void select(OptMapInfo alt) {
+ if (alt.value == 0) return;
+ if (value == 0) {
+ copy(alt);
+ return;
+ }
+
+ int v1 = z / value;
+ int v2 = z /alt.value;
+
+ if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt);
+ }
+
+ // alt_merge_opt_map_info
+ void altMerge(OptMapInfo other) {
+ /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ if (value == 0) return;
+ if (other.value == 0 || mmd.max < other.mmd.max) {
+ clear();
+ return;
+ }
+
+ mmd.altMerge(other.mmd);
+
+ int val = 0;
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ if (other.map[i] != 0) map[i] = 1;
+ if (map[i] != 0) val += positionValue(i);
+ }
+
+ value = val;
+ anchor.altMerge(other.anchor);
+ }
+
+ static final short ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
+ };
+
+ // map_position_value
+ static int positionValue(int i) {
+ if (i < ByteValTable.length) {
+ return ByteValTable[i];
+ } else {
+ return 4; /* Take it easy. */
+ }
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Option.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,122 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public class Option {
+
+ /* options */
+ public static final int NONE = 0;
+ public static final int IGNORECASE = (1<<0);
+ public static final int EXTEND = (1<<1);
+ public static final int MULTILINE = (1<<2);
+ public static final int SINGLELINE = (1<<3);
+ public static final int FIND_LONGEST = (1<<4);
+ public static final int FIND_NOT_EMPTY = (1<<5);
+ public static final int NEGATE_SINGLELINE = (1<<6);
+ public static final int DONT_CAPTURE_GROUP = (1<<7);
+ public static final int CAPTURE_GROUP = (1<<8);
+
+ /* options (search time) */
+ public static final int NOTBOL = (1<<9);
+ public static final int NOTEOL = (1<<10);
+ public static final int POSIX_REGION = (1<<11);
+ public static final int MAXBIT = (1<<12); /* limit */
+
+ public static final int DEFAULT = NONE;
+
+ public static String toString(int option) {
+ String options = "";
+ if (isIgnoreCase(option)) options += "IGNORECASE ";
+ if (isExtend(option)) options += "EXTEND ";
+ if (isMultiline(option)) options += "MULTILINE ";
+ if (isSingleline(option)) options += "SINGLELINE ";
+ if (isFindLongest(option)) options += "FIND_LONGEST ";
+ if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY ";
+ if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE ";
+ if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP ";
+ if (isCaptureGroup(option)) options += "CAPTURE_GROUP ";
+
+ if (isNotBol(option)) options += "NOTBOL ";
+ if (isNotEol(option)) options += "NOTEOL ";
+ if (isPosixRegion(option)) options += "POSIX_REGION ";
+
+ return options;
+ }
+
+ public static boolean isIgnoreCase(int option) {
+ return (option & IGNORECASE) != 0;
+ }
+
+ public static boolean isExtend(int option) {
+ return (option & EXTEND) != 0;
+ }
+
+ public static boolean isSingleline(int option) {
+ return (option & SINGLELINE) != 0;
+ }
+
+ public static boolean isMultiline(int option) {
+ return (option & MULTILINE) != 0;
+ }
+
+ public static boolean isFindLongest(int option) {
+ return (option & FIND_LONGEST) != 0;
+ }
+
+ public static boolean isFindNotEmpty(int option) {
+ return (option & FIND_NOT_EMPTY) != 0;
+ }
+
+ public static boolean isFindCondition(int option) {
+ return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0;
+ }
+
+ public static boolean isNegateSingleline(int option) {
+ return (option & NEGATE_SINGLELINE) != 0;
+ }
+
+ public static boolean isDontCaptureGroup(int option) {
+ return (option & DONT_CAPTURE_GROUP) != 0;
+ }
+
+ public static boolean isCaptureGroup(int option) {
+ return (option & CAPTURE_GROUP) != 0;
+ }
+
+ public static boolean isNotBol(int option) {
+ return (option & NOTBOL) != 0;
+ }
+
+ public static boolean isNotEol(int option) {
+ return (option & NOTEOL) != 0;
+ }
+
+ public static boolean isPosixRegion(int option) {
+ return (option & POSIX_REGION) != 0;
+ }
+
+ /* OP_SET_OPTION is required for these options. ??? */
+ // public static boolean isDynamic(int option) {
+ // return (option & (MULTILINE | IGNORECASE)) != 0;
+ // }
+ public static boolean isDynamic(int option) {
+ return false;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,953 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAtSimple;
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
+
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.PosixBracket;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.Ptr;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CTypeNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CallNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
+
+class Parser extends Lexer {
+
+ protected final Regex regex;
+ protected Node root;
+
+ protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
+ // this approach will not affect recursive calls
+
+ protected Parser(ScanEnvironment env, char[] chars, int p, int end) {
+ super(env, chars, p, end);
+ regex = env.reg;
+ }
+
+ // onig_parse_make_tree
+ protected final Node parse() {
+ root = parseRegexp();
+ regex.numMem = env.numMem;
+ return root;
+ }
+
+ private static final int POSIX_BRACKET_NAME_MIN_LEN = 4;
+ private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH = 20;
+ private static final char BRACKET_END[] = ":]".toCharArray();
+ private boolean parsePosixBracket(CClassNode cc) {
+ mark();
+
+ boolean not;
+ if (peekIs('^')) {
+ inc();
+ not = true;
+ } else {
+ not = false;
+ }
+ if (stop - p >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket
+ char[][] pbs = PosixBracket.PBSNamesLower;
+ for (int i=0; i<pbs.length; i++) {
+ char[] name = pbs[i];
+ // hash lookup here ?
+ if (EncodingHelper.strNCmp(chars, p, stop, name, 0, name.length) == 0) {
+ p += name.length;
+ if (EncodingHelper.strNCmp(chars, p, stop, BRACKET_END, 0, BRACKET_END.length) != 0) {
+ newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
+ }
+ cc.addCType(PosixBracket.PBSValues[i], not, env, this);
+ inc();
+ inc();
+ return false;
+ }
+ }
+
+ }
+
+ // not_posix_bracket:
+ c = 0;
+ int i= 0;
+ while (left() && ((c=peek()) != ':') && c != ']') {
+ inc();
+ if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
+ }
+
+ if (c == ':' && left()) {
+ inc();
+ if (left()) {
+ fetch();
+ if (c == ']') newSyntaxException(ERR_INVALID_POSIX_BRACKET_TYPE);
+ }
+ }
+ restore();
+ return true; /* 1: is not POSIX bracket, but no error. */
+ }
+
+ private CClassNode parseCharProperty() {
+ int ctype = fetchCharPropertyToCType();
+ CClassNode n = new CClassNode();
+ n.addCType(ctype, false, env, this);
+ if (token.getPropNot()) n.setNot();
+ return n;
+ }
+
+ private boolean codeExistCheck(int code, boolean ignoreEscaped) {
+ mark();
+
+ boolean inEsc = false;
+ while (left()) {
+ if (ignoreEscaped && inEsc) {
+ inEsc = false;
+ } else {
+ fetch();
+ if (c == code) {
+ restore();
+ return true;
+ }
+ if (c == syntax.metaCharTable.esc) inEsc = true;
+ }
+ }
+
+ restore();
+ return false;
+ }
+
+ private CClassNode parseCharClass() {
+ fetchTokenInCC();
+
+ final boolean neg;
+ if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
+ neg = true;
+ fetchTokenInCC();
+ } else {
+ neg = false;
+ }
+
+ if (token.type == TokenType.CC_CLOSE) {
+ if (!codeExistCheck(']', true)) newSyntaxException(ERR_EMPTY_CHAR_CLASS);
+ env.ccEscWarn("]");
+ token.type = TokenType.CHAR; /* allow []...] */
+ }
+
+ CClassNode cc = new CClassNode();
+ CClassNode prevCC = null;
+ CClassNode workCC = null;
+
+ CCStateArg arg = new CCStateArg();
+
+ boolean andStart = false;
+ arg.state = CCSTATE.START;
+
+ while (token.type != TokenType.CC_CLOSE) {
+ boolean fetched = false;
+
+ switch (token.type) {
+
+ case CHAR:
+ if (token.getC() > 0xff) {
+ arg.inType = CCVALTYPE.CODE_POINT;
+ } else {
+ arg.inType = CCVALTYPE.SB; // sb_char:
+ }
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ parseCharClassValEntry2(cc, arg); // goto val_entry2
+ break;
+
+ case RAW_BYTE:
+ if (token.base != 0) { /* tok->base != 0 : octal or hexadec. */
+ byte[] buf = new byte[4];
+ int psave = p;
+ int base = token.base;
+ buf[0] = (byte)token.getC();
+ int i;
+ for (i=1; i<4; i++) {
+ fetchTokenInCC();
+ if (token.type != TokenType.RAW_BYTE || token.base != base) {
+ fetched = true;
+ break;
+ }
+ buf[i] = (byte)token.getC();
+ }
+
+ if (i == 1) {
+ arg.v = buf[0] & 0xff;
+ arg.inType = CCVALTYPE.SB; // goto raw_single
+ } else {
+ arg.v = EncodingHelper.mbcToCode(buf, 0, buf.length);
+ arg.inType = CCVALTYPE.CODE_POINT;
+ }
+ } else {
+ arg.v = token.getC();
+ arg.inType = CCVALTYPE.SB; // raw_single:
+ }
+ arg.vIsRaw = true;
+ parseCharClassValEntry2(cc, arg); // goto val_entry2
+ break;
+
+ case CODE_POINT:
+ arg.v = token.getCode();
+ arg.vIsRaw = true;
+ parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
+ break;
+
+ case POSIX_BRACKET_OPEN:
+ if (parsePosixBracket(cc)) { /* true: is not POSIX bracket */
+ env.ccEscWarn("[");
+ p = token.backP;
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ parseCharClassValEntry(cc, arg); // goto val_entry
+ break;
+ }
+ cc.nextStateClass(arg, env); // goto next_class
+ break;
+
+ case CHAR_TYPE:
+ cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
+ cc.nextStateClass(arg, env); // next_class:
+ break;
+
+ case CHAR_PROPERTY:
+ int ctype = fetchCharPropertyToCType();
+ cc.addCType(ctype, token.getPropNot(), env, this);
+ cc.nextStateClass(arg, env); // goto next_class
+ break;
+
+ case CC_RANGE:
+ if (arg.state == CCSTATE.VALUE) {
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
+ parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
+ break;
+ } else if (token.type == TokenType.CC_AND) {
+ env.ccEscWarn("-");
+ parseCharClassRangeEndVal(cc, arg); // goto range_end_val
+ break;
+ }
+ arg.state = CCSTATE.RANGE;
+ } else if (arg.state == CCSTATE.START) {
+ arg.v = token.getC(); /* [-xa] is allowed */
+ arg.vIsRaw = false;
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
+ parseCharClassValEntry(cc, arg); // goto val_entry
+ break;
+ } else if (arg.state == CCSTATE.RANGE) {
+ env.ccEscWarn("-");
+ parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
+ break;
+ } else { /* CCS_COMPLETE */
+ fetchTokenInCC();
+ fetched = true;
+ if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
+ parseCharClassRangeEndVal(cc, arg); // goto range_end_val
+ break;
+ } else if (token.type == TokenType.CC_AND) {
+ env.ccEscWarn("-");
+ parseCharClassRangeEndVal(cc, arg); // goto range_end_val
+ break;
+ }
+
+ if (syntax.allowDoubleRangeOpInCC()) {
+ env.ccEscWarn("-");
+ parseCharClassSbChar(cc, arg); // goto sb_char /* [0-9-a] is allowed as [0-9\-a] */
+ break;
+ }
+ newSyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
+ }
+ break;
+
+ case CC_CC_OPEN: /* [ */
+ CClassNode acc = parseCharClass();
+ cc.or(acc);
+ break;
+
+ case CC_AND: /* && */
+ if (arg.state == CCSTATE.VALUE) {
+ arg.v = 0; // ??? safe v ?
+ arg.vIsRaw = false;
+ cc.nextStateValue(arg, env);
+ }
+ /* initialize local variables */
+ andStart = true;
+ arg.state = CCSTATE.START;
+ if (prevCC != null) {
+ prevCC.and(cc);
+ } else {
+ prevCC = cc;
+ if (workCC == null) workCC = new CClassNode();
+ cc = workCC;
+ }
+ cc.clear();
+ break;
+
+ case EOT:
+ newSyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } // switch
+
+ if (!fetched) fetchTokenInCC();
+
+ } // while
+
+ if (arg.state == CCSTATE.VALUE) {
+ arg.v = 0; // ??? safe v ?
+ arg.vIsRaw = false;
+ cc.nextStateValue(arg, env);
+ }
+
+ if (prevCC != null) {
+ prevCC.and(cc);
+ cc = prevCC;
+ }
+
+ if (neg) {
+ cc.setNot();
+ } else {
+ cc.clearNot();
+ }
+
+ if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
+ if (!cc.isEmpty()) {
+ final int NEW_LINE = 0x0a;
+ if (EncodingHelper.isNewLine(NEW_LINE)) {
+ cc.bs.set(NEW_LINE);
+ }
+ }
+ }
+
+ return cc;
+ }
+
+ private void parseCharClassSbChar(CClassNode cc, CCStateArg arg) {
+ arg.inType = CCVALTYPE.SB;
+ arg.v = token.getC();
+ arg.vIsRaw = false;
+ parseCharClassValEntry2(cc, arg); // goto val_entry2
+ }
+
+ private void parseCharClassRangeEndVal(CClassNode cc, CCStateArg arg) {
+ arg.v = '-';
+ arg.vIsRaw = false;
+ parseCharClassValEntry(cc, arg); // goto val_entry
+ }
+
+ private void parseCharClassValEntry(CClassNode cc, CCStateArg arg) {
+ arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
+ parseCharClassValEntry2(cc, arg); // val_entry2:
+ }
+
+ private void parseCharClassValEntry2(CClassNode cc, CCStateArg arg) {
+ cc.nextStateValue(arg, env);
+ }
+
+ private Node parseEnclose(TokenType term) {
+ Node node = null;
+
+ if (!left()) newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
+
+ int option = env.option;
+
+ if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
+ inc();
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+
+ boolean listCapture = false;
+
+ fetch();
+ switch(c) {
+ case ':': /* (?:...) grouping only */
+ fetchToken(); // group:
+ node = parseSubExp(term);
+ returnCode = 1; /* group */
+ return node;
+ case '=':
+ node = new AnchorNode(AnchorType.PREC_READ);
+ break;
+ case '!': /* preceding read */
+ node = new AnchorNode(AnchorType.PREC_READ_NOT);
+ break;
+ case '>': /* (?>...) stop backtrack */
+ node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ break;
+ case '\'':
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ listCapture = false; // goto named_group1
+ node = parseEncloseNamedGroup2(listCapture);
+ break;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ } // USE_NAMED_GROUP
+ break;
+ case '<': /* look behind (?<=...), (?<!...) */
+ fetch();
+ if (c == '=') {
+ node = new AnchorNode(AnchorType.LOOK_BEHIND);
+ } else if (c == '!') {
+ node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
+ } else {
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ unfetch();
+ c = '<';
+
+ listCapture = false; // named_group1:
+ node = parseEncloseNamedGroup2(listCapture); // named_group2:
+ break;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+
+ } else { // USE_NAMED_GROUP
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // USE_NAMED_GROUP
+ }
+ break;
+ case '@':
+ if (syntax.op2AtMarkCaptureHistory()) {
+ if (Config.USE_NAMED_GROUP) {
+ if (syntax.op2QMarkLtNamedGroup()) {
+ fetch();
+ if (c == '<' || c == '\'') {
+ listCapture = true;
+ node = parseEncloseNamedGroup2(listCapture); // goto named_group2 /* (?@<name>...) */
+ }
+ unfetch();
+ }
+ } // USE_NAMED_GROUP
+ EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
+ int num = env.addMemEntry();
+ if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
+ en.regNum = num;
+ node = en;
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+
+ // case 'p': #ifdef USE_POSIXLINE_OPTION
+ case '-':
+ case 'i':
+ case 'm':
+ case 's':
+ case 'x':
+ boolean neg = false;
+ while (true) {
+ switch(c) {
+ case ':':
+ case ')':
+ break;
+ case '-':
+ neg = true;
+ break;
+ case 'x':
+ option = bsOnOff(option, Option.EXTEND, neg);
+ break;
+ case 'i':
+ option = bsOnOff(option, Option.IGNORECASE, neg);
+ break;
+ case 's':
+ if (syntax.op2OptionPerl()) {
+ option = bsOnOff(option, Option.MULTILINE, neg);
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+ case 'm':
+ if (syntax.op2OptionPerl()) {
+ option = bsOnOff(option, Option.SINGLELINE, !neg);
+ } else if (syntax.op2OptionRuby()) {
+ option = bsOnOff(option, Option.MULTILINE, neg);
+ } else {
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ }
+ break;
+ // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
+ // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
+ // break;
+
+ default:
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // switch
+
+ if (c == ')') {
+ EncloseNode en = new EncloseNode(option, 0); // node_new_option
+ node = en;
+ returnCode = 2; /* option only */
+ return node;
+ } else if (c == ':') {
+ int prev = env.option;
+ env.option = option;
+ fetchToken();
+ Node target = parseSubExp(term);
+ env.option = prev;
+ EncloseNode en = new EncloseNode(option, 0); // node_new_option
+ en.setTarget(target);
+ node = en;
+ returnCode = 0;
+ return node;
+ }
+ if (!left()) newSyntaxException(ERR_END_PATTERN_IN_GROUP);
+ fetch();
+ } // while
+
+ default:
+ newSyntaxException(ERR_UNDEFINED_GROUP_OPTION);
+ } // switch
+
+ } else {
+ if (isDontCaptureGroup(env.option)) {
+ fetchToken(); // goto group
+ node = parseSubExp(term);
+ returnCode = 1; /* group */
+ return node;
+ }
+ EncloseNode en = new EncloseNode(env.option, false); // node_new_enclose_memory
+ int num = env.addMemEntry();
+ en.regNum = num;
+ node = en;
+ }
+
+ fetchToken();
+ Node target = parseSubExp(term);
+
+ if (node.getType() == NodeType.ANCHOR) {
+ AnchorNode an = (AnchorNode) node;
+ an.setTarget(target);
+ } else {
+ EncloseNode en = (EncloseNode)node;
+ en.setTarget(target);
+ if (en.type == EncloseType.MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ env.setMemNode(en.regNum, node);
+ }
+ }
+ returnCode = 0;
+ return node; // ??
+ }
+
+ private Node parseEncloseNamedGroup2(boolean listCapture) {
+ int nm = p;
+ int num = fetchName(c, false);
+ int nameEnd = value;
+ num = env.addMemEntry();
+ if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
+
+ regex.nameAdd(chars, nm, nameEnd, num, syntax);
+ EncloseNode en = new EncloseNode(env.option, true); // node_new_enclose_memory
+ en.regNum = num;
+
+ Node node = en;
+
+ if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num);
+ env.numNamed++;
+ return node;
+ }
+
+ private int findStrPosition(int[]s, int n, int from, int to, Ptr nextChar) {
+ int x;
+ int q;
+ int p = from;
+ int i = 0;
+ while (p < to) {
+ x = chars[p];
+ q = p + 1;
+ if (x == s[0]) {
+ for (i=1; i<n && q<to; i++) {
+ x = chars[q];
+ if (x != s[i]) break;
+ q++;
+ }
+ if (i >= n) {
+ if (chars[nextChar.p] != 0) nextChar.p = q; // we may need zero term semantics...
+ return p;
+ }
+ }
+ p = q;
+ }
+ return -1;
+ }
+
+ private Node parseExp(TokenType term) {
+ if (token.type == term) return StringNode.EMPTY; // goto end_of_token
+
+ Node node = null;
+ boolean group = false;
+
+ switch(token.type) {
+ case ALT:
+ case EOT:
+ return StringNode.EMPTY; // end_of_token:, node_new_empty
+
+ case SUBEXP_OPEN:
+ node = parseEnclose(TokenType.SUBEXP_CLOSE);
+ if (returnCode == 1) {
+ group = true;
+ } else if (returnCode == 2) { /* option only */
+ int prev = env.option;
+ EncloseNode en = (EncloseNode)node;
+ env.option = en.option;
+ fetchToken();
+ Node target = parseSubExp(term);
+ env.option = prev;
+ en.setTarget(target);
+ return node;
+ }
+ break;
+ case SUBEXP_CLOSE:
+ if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
+ if (token.escaped) {
+ return parseExpTkRawByte(group); // goto tk_raw_byte
+ } else {
+ return parseExpTkByte(group); // goto tk_byte
+ }
+ case STRING:
+ return parseExpTkByte(group); // tk_byte:
+
+ case RAW_BYTE:
+ return parseExpTkRawByte(group); // tk_raw_byte:
+ case CODE_POINT:
+ char[] buf = new char[] {(char)token.getCode()};
+ // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
+ node = new StringNode(buf, 0, 1);
+ break;
+
+ case QUOTE_OPEN:
+ int[] endOp = new int[] {syntax.metaCharTable.esc, 'E'};
+ int qstart = p;
+ Ptr nextChar = new Ptr();
+ int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar);
+ if (qend == -1) nextChar.p = qend = stop;
+ node = new StringNode(chars, qstart, qend);
+ p = nextChar.p;
+ break;
+
+ case CHAR_TYPE:
+ switch(token.getPropCType()) {
+ case CharacterType.D:
+ case CharacterType.S:
+ case CharacterType.W:
+ if (Config.NON_UNICODE_SDW) {
+ CClassNode cc = new CClassNode();
+ cc.addCType(token.getPropCType(), false, env, this);
+ if (token.getPropNot()) cc.setNot();
+ node = cc;
+ }
+ break;
+
+ case CharacterType.WORD:
+ node = new CTypeNode(token.getPropCType(), token.getPropNot());
+ break;
+
+ case CharacterType.SPACE:
+ case CharacterType.DIGIT:
+ case CharacterType.XDIGIT:
+ // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
+ CClassNode ccn = new CClassNode();
+ ccn.addCType(token.getPropCType(), false, env, this);
+ if (token.getPropNot()) ccn.setNot();
+ node = ccn;
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+
+ } // inner switch
+ break;
+
+ case CHAR_PROPERTY:
+ node = parseCharProperty();
+ break;
+
+ case CC_CC_OPEN:
+ CClassNode cc = parseCharClass();
+ node = cc;
+ if (isIgnoreCase(env.option)) {
+ ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
+ EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
+
+ if (arg.altRoot != null) {
+ node = ConsAltNode.newAltNode(node, arg.altRoot);
+ }
+ }
+ break;
+
+ case ANYCHAR:
+ node = new AnyCharNode();
+ break;
+
+ case ANYCHAR_ANYTIME:
+ node = new AnyCharNode();
+ QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
+ qn.setTarget(node);
+ node = qn;
+ break;
+
+ case BACKREF:
+ int[]backRefs = token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()};
+ node = new BackRefNode(token.getBackrefNum(),
+ backRefs,
+ token.getBackrefByName(),
+ token.getBackrefExistLevel(), // #ifdef USE_BACKREF_AT_LEVEL
+ token.getBackrefLevel(), // ...
+ env);
+
+ break;
+
+ case CALL:
+ if (Config.USE_SUBEXP_CALL) {
+ int gNum = token.getCallGNum();
+
+ if (gNum < 0) {
+ gNum = backrefRelToAbs(gNum);
+ if (gNum <= 0) newValueException(ERR_INVALID_BACKREF);
+ }
+ node = new CallNode(chars, token.getCallNameP(), token.getCallNameEnd(), gNum);
+ env.numCall++;
+ } // USE_SUBEXP_CALL
+ break;
+
+ case ANCHOR:
+ node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
+ break;
+
+ case OP_REPEAT:
+ case INTERVAL:
+ if (syntax.contextIndepRepeatOps()) {
+ if (syntax.contextInvalidRepeatOps()) {
+ newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
+ } else {
+ node = StringNode.EMPTY; // node_new_empty
+ }
+ } else {
+ return parseExpTkByte(group); // goto tk_byte
+ }
+ break;
+
+ default:
+ newInternalException(ERR_PARSER_BUG);
+ } //switch
+
+ //targetp = node;
+
+ fetchToken(); // re_entry:
+
+ return parseExpRepeat(node, group); // repeat:
+ }
+
+ private Node parseExpTkByte(boolean group) {
+ StringNode node = new StringNode(chars, token.backP, p); // tk_byte:
+ while (true) {
+ fetchToken();
+ if (token.type != TokenType.STRING) break;
+
+ if (token.backP == node.end) {
+ node.end = p; // non escaped character, remain shared, just increase shared range
+ } else {
+ node.cat(chars, token.backP, p); // non continuous string stream, need to COW
+ }
+ }
+ // targetp = node;
+ return parseExpRepeat(node, group); // string_end:, goto repeat
+ }
+
+ private Node parseExpTkRawByte(boolean group) {
+ // tk_raw_byte:
+
+ // important: we don't use 0xff mask here neither in the compiler
+ // (in the template string) so we won't have to mask target
+ // strings when comparing against them in the matcher
+ StringNode node = new StringNode((char)token.getC());
+ node.setRaw();
+
+ int len = 1;
+ while (true) {
+ if (len >= 1) {
+ if (len == 1) {
+ fetchToken();
+ node.clearRaw();
+ // !goto string_end;!
+ return parseExpRepeat(node, group);
+ }
+ }
+
+ fetchToken();
+ if (token.type != TokenType.RAW_BYTE) {
+ /* Don't use this, it is wrong for little endian encodings. */
+ // USE_PAD_TO_SHORT_BYTE_CHAR ...
+
+ newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
+ }
+
+ // important: we don't use 0xff mask here neither in the compiler
+ // (in the template string) so we won't have to mask target
+ // strings when comparing against them in the matcher
+ node.cat((char)token.getC());
+ len++;
+ } // while
+ }
+
+ private Node parseExpRepeat(Node target, boolean group) {
+ while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
+ if (target.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+
+ QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
+ token.getRepeatUpper(),
+ token.type == TokenType.INTERVAL);
+
+ qtfr.greedy = token.getRepeatGreedy();
+ int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
+ Node qn = qtfr;
+
+ if (token.getRepeatPossessive()) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ en.setTarget(qn);
+ qn = en;
+ }
+
+ if (ret == 0) {
+ target = qn;
+ } else if (ret == 2) { /* split case: /abc+/ */
+ target = ConsAltNode.newListNode(target, null);
+ ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
+
+ fetchToken();
+ return parseExpRepeatForCar(target, tmp, group);
+ }
+ fetchToken(); // goto re_entry
+ }
+ return target;
+ }
+
+ private Node parseExpRepeatForCar(Node top, ConsAltNode target, boolean group) {
+ while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
+ if (target.car.isInvalidQuantifier()) newSyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
+
+ QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
+ token.getRepeatUpper(),
+ token.type == TokenType.INTERVAL);
+
+ qtfr.greedy = token.getRepeatGreedy();
+ int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd());
+ Node qn = qtfr;
+
+ if (token.getRepeatPossessive()) {
+ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
+ en.setTarget(qn);
+ qn = en;
+ }
+
+ if (ret == 0) {
+ target.setCar(qn);
+ } else if (ret == 2) { /* split case: /abc+/ */
+ assert false;
+ }
+ fetchToken(); // goto re_entry
+ }
+ return top;
+ }
+
+ private Node parseBranch(TokenType term) {
+ Node node = parseExp(term);
+
+ if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
+ return node;
+ } else {
+ ConsAltNode top = ConsAltNode.newListNode(node, null);
+ ConsAltNode t = top;
+
+ while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
+ node = parseExp(term);
+ if (node.getType() == NodeType.LIST) {
+ t.setCdr((ConsAltNode)node);
+ while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr;
+
+ t = ((ConsAltNode)node);
+ } else {
+ t.setCdr(ConsAltNode.newListNode(node, null));
+ t = t.cdr;
+ }
+ }
+ return top;
+ }
+ }
+
+ /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
+ private Node parseSubExp(TokenType term) {
+ Node node = parseBranch(term);
+
+ if (token.type == term) {
+ return node;
+ } else if (token.type == TokenType.ALT) {
+ ConsAltNode top = ConsAltNode.newAltNode(node, null);
+ ConsAltNode t = top;
+ while (token.type == TokenType.ALT) {
+ fetchToken();
+ node = parseBranch(term);
+
+ t.setCdr(ConsAltNode.newAltNode(node, null));
+ t = t.cdr;
+ }
+
+ if (token.type != term) parseSubExpError(term);
+ return top;
+ } else {
+ parseSubExpError(term);
+ return null; //not reached
+ }
+ }
+
+ private void parseSubExpError(TokenType term) {
+ if (term == TokenType.SUBEXP_CLOSE) {
+ newSyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
+ } else {
+ newInternalException(ERR_PARSER_BUG);
+ }
+ }
+
+ private Node parseRegexp() {
+ fetchToken();
+ return parseSubExp(TokenType.EOT);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Regex.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,413 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isCaptureGroup;
+import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
+
+import java.util.HashMap;
+import java.util.Iterator;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
+
+public final class Regex implements RegexState {
+
+ int[] code; /* compiled pattern */
+ int codeLength;
+ boolean stackNeeded;
+ Object[]operands; /* e.g. shared CClassNode */
+ int operandLength;
+
+ int state; /* normal, searching, compiling */ // remove
+ int numMem; /* used memory(...) num counted from 1 */
+ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
+ int numCombExpCheck; /* combination explosion check */
+ int numCall; /* number of subexp call */
+ int captureHistory; /* (?@...) flag (1-31) */
+ int btMemStart; /* need backtrack flag */
+ int btMemEnd; /* need backtrack flag */
+
+ int stackPopLevel;
+
+ int[]repeatRangeLo;
+ int[]repeatRangeHi;
+
+ public WarnCallback warnings;
+ public MatcherFactory factory;
+
+ int options;
+ int userOptions;
+ Object userObject;
+ //final Syntax syntax;
+ final int caseFoldFlag;
+
+ HashMap<String,NameEntry> nameTable; // named entries
+
+ /* optimization info (string search, char-map and anchors) */
+ SearchAlgorithm searchAlgorithm; /* optimize flag */
+ int thresholdLength; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ int anchorDmin; /* (SEMI_)END_BUF anchor distance */
+ int anchorDmax; /* (SEMI_)END_BUF anchor distance */
+ int subAnchor; /* start-anchor for exact or map */
+
+ char[] exact;
+ int exactP;
+ int exactEnd;
+
+ byte[] map; /* used as BM skip or char-map */
+ int[] intMap; /* BM skip for exact_len > 255 */
+ int[] intMapBackward; /* BM skip for backward search */
+ int dMin; /* min-distance of exact or map */
+ int dMax; /* max-distance of exact or map */
+
+ char[][] templates;
+ int templateNum;
+
+ public Regex(CharSequence cs) {
+ this(cs.toString());
+ }
+
+ public Regex(String str) {
+ this(str.toCharArray(), 0, str.length(), 0);
+ }
+
+ public Regex(char[] chars) {
+ this(chars, 0, chars.length, 0);
+ }
+
+ public Regex(char[] chars, int p, int end) {
+ this(chars, p, end, 0);
+ }
+
+ public Regex(char[] chars, int p, int end, int option) {
+ this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT);
+ }
+
+ // onig_new
+ public Regex(char[] chars, int p, int end, int option, Syntax syntax) {
+ this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT);
+ }
+
+ public Regex(char[]chars, int p, int end, int option, WarnCallback warnings) {
+ this(chars, p, end, option, Syntax.RUBY, warnings);
+ }
+
+ // onig_new
+ public Regex(char[] chars, int p, int end, int option, Syntax syntax, WarnCallback warnings) {
+ this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings);
+ }
+
+ // onig_alloc_init
+ public Regex(char[] chars, int p, int end, int option, int caseFoldFlag, Syntax syntax, WarnCallback warnings) {
+
+ if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
+ (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
+ throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
+ }
+
+ if ((option & Option.NEGATE_SINGLELINE) != 0) {
+ option |= syntax.options;
+ option &= ~Option.SINGLELINE;
+ } else {
+ option |= syntax.options;
+ }
+
+ this.options = option;
+ this.caseFoldFlag = caseFoldFlag;
+ this.warnings = warnings;
+
+ new Analyser(new ScanEnvironment(this, syntax), chars, p, end).compile();
+
+ this.warnings = null;
+ }
+
+ public Matcher matcher(char[] chars) {
+ return matcher(chars, 0, chars.length);
+ }
+
+ public Matcher matcher(char[] chars, int p, int end) {
+ return factory.create(this, chars, p, end);
+ }
+
+ public int numberOfCaptures() {
+ return numMem;
+ }
+
+ public int numberOfCaptureHistories() {
+ if (Config.USE_CAPTURE_HISTORY) {
+ int n = 0;
+ for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (bsAt(captureHistory, i)) n++;
+ }
+ return n;
+ } else {
+ return 0;
+ }
+ }
+
+ String nameTableToString() {
+ StringBuilder sb = new StringBuilder();
+
+ if (nameTable != null) {
+ sb.append("name table\n");
+ for (NameEntry ne : nameTable.values()) {
+ sb.append(" " + ne + "\n");
+ }
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ NameEntry nameFind(char[] name, int nameP, int nameEnd) {
+ if (nameTable != null) return nameTable.get(new String(name, nameP, nameEnd - nameP));
+ return null;
+ }
+
+ void renumberNameTable(int[]map) {
+ if (nameTable != null) {
+ for (NameEntry e : nameTable.values()) {
+ if (e.backNum > 1) {
+ for (int i=0; i<e.backNum; i++) {
+ e.backRefs[i] = map[e.backRefs[i]];
+ }
+ } else if (e.backNum == 1) {
+ e.backRef1 = map[e.backRef1];
+ }
+ }
+ }
+ }
+
+ public int numberOfNames() {
+ return nameTable == null ? 0 : nameTable.size();
+ }
+
+ void nameAdd(char[] name, int nameP, int nameEnd, int backRef, Syntax syntax) {
+ if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.ERR_EMPTY_GROUP_NAME);
+
+ NameEntry e = null;
+ if (nameTable == null) {
+ nameTable = new HashMap<String,NameEntry>(); // 13, oni defaults to 5
+ } else {
+ e = nameFind(name, nameP, nameEnd);
+ }
+
+ if (e == null) {
+ // dup the name here as oni does ?, what for ? (it has to manage it, we don't)
+ e = new NameEntry(name, nameP, nameEnd);
+ nameTable.put(new String(name, nameP, nameEnd - nameP), e);
+ } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) {
+ throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP));
+ }
+
+ e.addBackref(backRef);
+ }
+
+ NameEntry nameToGroupNumbers(char[] name, int nameP, int nameEnd) {
+ return nameFind(name, nameP, nameEnd);
+ }
+
+ public int nameToBackrefNumber(char[] name, int nameP, int nameEnd, Region region) {
+ NameEntry e = nameToGroupNumbers(name, nameP, nameEnd);
+ if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE,
+ new String(name, nameP, nameEnd - nameP));
+
+ switch(e.backNum) {
+ case 0:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ case 1:
+ return e.backRef1;
+ default:
+ if (region != null) {
+ for (int i = e.backNum - 1; i >= 0; i--) {
+ if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i];
+ }
+ }
+ return e.backRefs[e.backNum - 1];
+ }
+ }
+
+ public Iterator<NameEntry> namedBackrefIterator() {
+ return nameTable.values().iterator();
+ }
+
+ public boolean noNameGroupIsActive(Syntax syntax) {
+ if (isDontCaptureGroup(options)) return false;
+
+ if (Config.USE_NAMED_GROUP) {
+ if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false;
+ }
+ return true;
+ }
+
+ /* set skip map for Boyer-Moor search */
+ void setupBMSkipMap() {
+ char[] chars = exact;
+ int p = exactP;
+ int end = exactEnd;
+ int len = end - p;
+
+ if (len < Config.CHAR_TABLE_SIZE) {
+ // map/skip
+ if (map == null) map = new byte[Config.CHAR_TABLE_SIZE];
+
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len;
+ for (int i=0; i<len-1; i++) map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
+ } else {
+ if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE];
+
+ for (int i=0; i<len-1; i++) intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ??
+ }
+ }
+
+ void setExactInfo(OptExactInfo e) {
+ if (e.length == 0) return;
+
+ // shall we copy that ?
+ exact = e.chars;
+ exactP = 0;
+ exactEnd = e.length;
+
+ if (e.ignoreCase) {
+ searchAlgorithm = new SearchAlgorithm.SLOW_IC(this);
+ } else {
+ if (e.length >= 2) {
+ setupBMSkipMap();
+ searchAlgorithm = SearchAlgorithm.BM;
+ } else {
+ searchAlgorithm = SearchAlgorithm.SLOW;
+ }
+ }
+
+ dMin = e.mmd.min;
+ dMax = e.mmd.max;
+
+ if (dMin != MinMaxLen.INFINITE_DISTANCE) {
+ thresholdLength = dMin + (exactEnd - exactP);
+ }
+ }
+
+ void setOptimizeMapInfo(OptMapInfo m) {
+ map = m.map;
+
+ searchAlgorithm = SearchAlgorithm.MAP;
+ dMin = m.mmd.min;
+ dMax = m.mmd.max;
+
+ if (dMin != MinMaxLen.INFINITE_DISTANCE) {
+ thresholdLength = dMin + 1;
+ }
+ }
+
+ void setSubAnchor(OptAnchorInfo anc) {
+ subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
+ subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
+ }
+
+ void clearOptimizeInfo() {
+ searchAlgorithm = SearchAlgorithm.NONE;
+ anchor = 0;
+ anchorDmax = 0;
+ anchorDmin = 0;
+ subAnchor = 0;
+
+ exact = null;
+ exactP = exactEnd = 0;
+ }
+
+ public String encStringToString(byte[]bytes, int p, int end) {
+ StringBuilder sb = new StringBuilder("\nPATTERN: /");
+
+ while (p < end) {
+ sb.append(new String(new byte[]{bytes[p]}));
+ p++;
+ }
+ return sb.append("/").toString();
+ }
+
+ public String optimizeInfoToString() {
+ String s = "";
+ s += "optimize: " + searchAlgorithm.getName() + "\n";
+ s += " anchor: " + OptAnchorInfo.anchorToString(anchor);
+
+ if ((anchor & AnchorType.END_BUF_MASK) != 0) {
+ s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax);
+ }
+
+ s += "\n";
+
+ if (searchAlgorithm != SearchAlgorithm.NONE) {
+ s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
+ }
+
+ s += "dmin: " + dMin + " dmax: " + dMax + "\n";
+ s += "threshold length: " + thresholdLength + "\n";
+
+ if (exact != null) {
+ s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
+ } else if (searchAlgorithm == SearchAlgorithm.MAP) {
+ int n=0;
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
+
+ s += "map: n = " + n + "\n";
+ if (n > 0) {
+ int c=0;
+ s += "[";
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
+ if (map[i] != 0) {
+ if (c > 0) s += ", ";
+ c++;
+ // TODO if (enc.isPrint(i)
+ s += ((char)i);
+ }
+ }
+ s += "]\n";
+ }
+ }
+
+ return s;
+ }
+
+ public int getOptions() {
+ return options;
+ }
+
+ public void setUserOptions(int options) {
+ this.userOptions = options;
+ }
+
+ public int getUserOptions() {
+ return userOptions;
+ }
+
+ public void setUserObject(Object object) {
+ this.userObject = object;
+ }
+
+ public Object getUserObject() {
+ return userObject;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Region.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,66 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public final class Region {
+ static final int REGION_NOTPOS = -1;
+
+ public final int numRegs;
+ public final int[]beg;
+ public final int[]end;
+ public CaptureTreeNode historyRoot;
+
+ public Region(int num) {
+ this.numRegs = num;
+ this.beg = new int[num];
+ this.end = new int[num];
+ }
+
+ public Region(int begin, int end) {
+ this.numRegs = 1;
+ this.beg = new int[]{begin};
+ this.end = new int[]{end};
+ }
+
+ public Region clone() {
+ Region region = new Region(numRegs);
+ System.arraycopy(beg, 0, region.beg, 0, beg.length);
+ System.arraycopy(end, 0, region.end, 0, end.length);
+ if (historyRoot != null) region.historyRoot = historyRoot.cloneTree();
+ return region;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Region: \n");
+ for (int i=0; i<beg.length; i++) sb.append(" " + i + ": (" + beg[i] + "-" + end[i] + ")");
+ return sb.toString();
+ }
+
+ CaptureTreeNode getCaptureTree() {
+ return historyRoot;
+ }
+
+ void clear() {
+ for (int i=0; i<beg.length; i++) {
+ beg[i] = end[i] = REGION_NOTPOS;
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ScanEnvironment.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,137 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsClear;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+
+public final class ScanEnvironment {
+
+ private static final int SCANENV_MEMNODES_SIZE = 8;
+
+ int option;
+ final int caseFoldFlag;
+ final public Syntax syntax;
+ int captureHistory;
+ int btMemStart;
+ int btMemEnd;
+ int backrefedMem;
+
+ final public Regex reg;
+
+ int numCall;
+ UnsetAddrList unsetAddrList; // USE_SUBEXP_CALL
+ public int numMem;
+
+ int numNamed; // USE_NAMED_GROUP
+
+ public Node memNodes[];
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ int numCombExpCheck;
+ int combExpMaxRegNum;
+ int currMaxRegNum;
+ boolean hasRecursion;
+
+ public ScanEnvironment(Regex regex, Syntax syntax) {
+ this.reg = regex;
+ option = regex.options;
+ caseFoldFlag = regex.caseFoldFlag;
+ this.syntax = syntax;
+ }
+
+ public void clear() {
+ captureHistory = bsClear();
+ btMemStart = bsClear();
+ btMemEnd = bsClear();
+ backrefedMem = bsClear();
+
+ numCall = 0;
+ numMem = 0;
+
+ numNamed = 0;
+
+ memNodes = null;
+
+ numCombExpCheck = 0;
+ combExpMaxRegNum = 0;
+ currMaxRegNum = 0;
+ hasRecursion = false;
+ }
+
+ public int addMemEntry() {
+ if (numMem++ == 0) {
+ memNodes = new Node[SCANENV_MEMNODES_SIZE];
+ } else if (numMem >= memNodes.length) {
+ Node[]tmp = new Node[memNodes.length << 1];
+ System.arraycopy(memNodes, 0, tmp, 0, memNodes.length);
+ memNodes = tmp;
+ }
+
+ return numMem;
+ }
+
+ public void setMemNode(int num, Node node) {
+ if (numMem >= num) {
+ memNodes[num] = node;
+ } else {
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ }
+ }
+
+ public int convertBackslashValue(int c) {
+ if (syntax.opEscControlChars()) {
+ switch (c) {
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
+ case 'v':
+ if (syntax.op2EscVVtab()) return 11; // ???
+ break;
+ default:
+ break;
+ }
+ }
+ return c;
+ }
+
+ void ccEscWarn(String s) {
+ if (Config.USE_WARN) {
+ if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) {
+ reg.warnings.warn("character class has '" + s + "' without escape");
+ }
+ }
+ }
+
+ void closeBracketWithoutEscapeWarn(String s) {
+ if (Config.USE_WARN) {
+ if (syntax.warnCCOpNotEscaped()) {
+ reg.warnings.warn("regular expression has '" + s + "' without escape");
+ }
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ScannerSupport.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,178 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
+
+abstract class ScannerSupport extends IntHolder implements ErrorMessages {
+
+ protected final char[] chars; // pattern
+ protected int p; // current scanner position
+ protected int stop; // pattern end (mutable)
+ private int lastFetched; // last fetched value for unfetch support
+ protected int c; // current code point
+
+ private final int begin; // pattern begin position for reset() support
+ private final int end; // pattern end position for reset() support
+ protected int _p; // used by mark()/restore() to mark positions
+
+ protected ScannerSupport(char[] chars, int p, int end) {
+ this.chars = chars;
+ this.begin = p;
+ this.end = end;
+
+ reset();
+ }
+
+ protected int getBegin() {
+ return begin;
+ }
+
+ protected int getEnd() {
+ return end;
+ }
+
+ private final int INT_SIGN_BIT = 1 << 31;
+
+ protected final int scanUnsignedNumber() {
+ int last = c;
+ int num = 0; // long ???
+ while(left()) {
+ fetch();
+ if (Character.isDigit(c)) {
+ int onum = num;
+ num = num * 10 + EncodingHelper.digitVal(c);
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ c = last;
+ return num;
+ }
+
+ protected final int scanUnsignedHexadecimalNumber(int maxLength) {
+ int last = c;
+ int num = 0;
+ while(left() && maxLength-- != 0) {
+ fetch();
+ if (EncodingHelper.isXDigit(c)) {
+ int onum = num;
+ int val = EncodingHelper.xdigitVal(c);
+ num = (num << 4) + val;
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ c = last;
+ return num;
+ }
+
+ protected final int scanUnsignedOctalNumber(int maxLength) {
+ int last = c;
+ int num = 0;
+ while(left() && maxLength-- != 0) {
+ fetch();
+ if (Character.isDigit(c) && c < '8') {
+ int onum = num;
+ int val = EncodingHelper.odigitVal(c);
+ num = (num << 3) + val;
+ if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1;
+ } else {
+ unfetch();
+ break;
+ }
+ }
+ c = last;
+ return num;
+ }
+
+ protected final void reset() {
+ p = begin;
+ stop = end;
+ }
+
+ protected final void mark() {
+ _p = p;
+ }
+
+ protected final void restore() {
+ p = _p;
+ }
+
+ protected final void inc() {
+ lastFetched = p;
+ p++;
+ }
+
+ protected final void fetch() {
+ lastFetched = p;
+ c = chars[p++];
+ }
+
+ protected int fetchTo() {
+ lastFetched = p;
+ return chars[p++];
+ }
+
+ protected final void unfetch() {
+ p = lastFetched;
+ }
+
+ protected final int peek() {
+ return p < stop ? chars[p] : 0;
+ }
+
+ protected final boolean peekIs(int c) {
+ return peek() == c;
+ }
+
+ protected final boolean left() {
+ return p < stop;
+ }
+
+ protected void newSyntaxException(String message) {
+ throw new SyntaxException(message);
+ }
+
+ protected void newValueException(String message) {
+ throw new ValueException(message);
+ }
+
+ protected void newValueException(String message, String str) {
+ throw new ValueException(message, str);
+ }
+
+ protected void newValueException(String message, int p, int end) {
+ throw new ValueException(message, new String(chars, p, end - p));
+ }
+
+ protected void newInternalException(String message) {
+ throw new InternalException(message);
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/SearchAlgorithm.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,294 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public abstract class SearchAlgorithm {
+
+ public abstract String getName();
+ public abstract int search(Regex regex, char[] text, int textP, int textEnd, int textRange);
+ public abstract int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_);
+
+
+ public static final SearchAlgorithm NONE = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "NONE";
+ }
+
+ public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) {
+ return textP;
+ }
+
+ public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ return textP;
+ }
+
+ };
+
+ public static final SearchAlgorithm SLOW = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT";
+ }
+
+ public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) {
+ char[] target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+
+ int s = textP;
+
+ while (s < end) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+
+ if (t == targetEnd) return s;
+ }
+ s++;
+ }
+
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ char[] target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) {
+ s = textStart;
+ }
+
+ while (s >= textP) {
+ if (text[s] == target[targetP]) {
+ int p = s + 1;
+ int t = targetP + 1;
+ while (t < targetEnd) {
+ if (target[t] != text[p++]) break;
+ t++;
+ }
+ if (t == targetEnd) return s;
+ }
+ // s = enc.prevCharHead or s = s <= adjustText ? -1 : s - 1;
+ s--;
+ }
+ return -1;
+ }
+ };
+
+ public static final class SLOW_IC extends SearchAlgorithm {
+ private final int caseFoldFlag;
+
+ public SLOW_IC(Regex regex) {
+ this.caseFoldFlag = regex.caseFoldFlag;
+ }
+
+ public final String getName() {
+ return "EXACT_IC";
+ }
+
+ public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) {
+ char[] target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textEnd;
+ end -= targetEnd - targetP - 1;
+
+ if (end > textRange) end = textRange;
+ int s = textP;
+
+ while (s < end) {
+ if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
+ s++;
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ char[] target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int s = textEnd;
+ s -= targetEnd - targetP;
+
+ if (s > textStart) {
+ s = textStart;
+ }
+
+ while (s >= textP) {
+ if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd)) return s;
+ s = EncodingHelper.prevCharHead(adjustText, s);
+ }
+ return -1;
+ }
+
+ private boolean lowerCaseMatch(char[] t, int tP, int tEnd,
+ char[] chars, int p, int end) {
+
+ while (tP < tEnd) {
+ if (t[tP++] != Character.toLowerCase(chars[p++])) return false;
+ }
+ return true;
+ }
+ };
+
+ public static final SearchAlgorithm BM = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "EXACT_BM";
+ }
+
+ public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) {
+ char[] target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ int end = textRange + (targetEnd - targetP) - 1;
+ if (end > textEnd) end = textEnd;
+
+ int tail = targetEnd - 1;
+ int s = textP + (targetEnd - targetP) - 1;
+
+ if (regex.intMap == null) {
+ while (s < end) {
+ int p = s;
+ int t = tail;
+
+ while (text[p] == target[t]) {
+ if (t == targetP) return p;
+ p--; t--;
+ }
+
+ s += regex.map[text[s] & 0xff];
+ }
+ } else { /* see int_map[] */
+ while (s < end) {
+ int p = s;
+ int t = tail;
+
+ while (text[p] == target[t]) {
+ if (t == targetP) return p;
+ p--; t--;
+ }
+
+ s += regex.intMap[text[s] & 0xff];
+ }
+ }
+ return -1;
+ }
+
+ private static final int BM_BACKWARD_SEARCH_LENGTH_THRESHOLD = 100;
+
+ public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ char[] target = regex.exact;
+ int targetP = regex.exactP;
+ int targetEnd = regex.exactEnd;
+
+ if (regex.intMapBackward == null) {
+ if (s_ - range_ < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) {
+ // goto exact_method;
+ return SLOW.searchBackward(regex, text, textP, adjustText, textEnd, textStart, s_, range_);
+ }
+ setBmBackwardSkip(regex, target, targetP, targetEnd);
+ }
+
+ int s = textEnd - (targetEnd - targetP);
+
+ if (textStart < s) {
+ s = textStart;
+ }
+
+ while (s >= textP) {
+ int p = s;
+ int t = targetP;
+ while (t < targetEnd && text[p] == target[t]) {
+ p++; t++;
+ }
+ if (t == targetEnd) return s;
+
+ s -= regex.intMapBackward[text[s] & 0xff];
+ }
+ return -1;
+ }
+
+
+ private void setBmBackwardSkip(Regex regex, char[] chars, int p, int end) {
+ int[] skip;
+ if (regex.intMapBackward == null) {
+ skip = new int[Config.CHAR_TABLE_SIZE];
+ regex.intMapBackward = skip;
+ } else {
+ skip = regex.intMapBackward;
+ }
+
+ int len = end - p;
+
+ for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) skip[i] = len;
+ for (int i=len-1; i>0; i--) skip[chars[i] & 0xff] = i;
+ }
+ };
+
+ public static final SearchAlgorithm MAP = new SearchAlgorithm() {
+
+ public final String getName() {
+ return "MAP";
+ }
+
+ public final int search(Regex regex, char[] text, int textP, int textEnd, int textRange) {
+ byte[] map = regex.map;
+ int s = textP;
+
+ while (s < textRange) {
+ if (text[s] > 0xff || map[text[s]] != 0) return s;
+ s++;
+ }
+ return -1;
+ }
+
+ public final int searchBackward(Regex regex, char[] text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) {
+ byte[] map = regex.map;
+ int s = textStart;
+
+ if (s >= textEnd) s = textEnd - 1;
+ while (s >= textP) {
+ if (text[s] > 0xff || map[text[s]] != 0) return s;
+ s--;
+ }
+ return -1;
+ }
+ };
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/StackEntry.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,164 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+final class StackEntry {
+ int type;
+ private int E1, E2, E3, E4;
+
+ // first union member
+ /* byte code position */
+ void setStatePCode(int pcode) {
+ E1 = pcode;
+ }
+ int getStatePCode() {
+ return E1;
+ }
+ /* string position */
+ void setStatePStr(int pstr) {
+ E2 = pstr;
+ }
+ int getStatePStr() {
+ return E2;
+ }
+ /* previous char position of pstr */
+ void setStatePStrPrev(int pstrPrev) {
+ E3 = pstrPrev;
+ }
+ int getStatePStrPrev() {
+ return E3;
+ }
+
+ void setStateCheck(int check) {
+ E4 = check;
+ }
+ int getStateCheck() {
+ return E4;
+ }
+
+ // second union member
+ /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ void setRepeatCount(int count) {
+ E1 = count;
+ }
+ int getRepeatCount() {
+ return E1;
+ }
+ void decreaseRepeatCount() {
+ E1--;
+ }
+ void increaseRepeatCount() {
+ E1++;
+ }
+ /* byte code position (head of repeated target) */
+ void setRepeatPCode(int pcode) {
+ E2 = pcode;
+ }
+ int getRepeatPCode() {
+ return E2;
+ }
+ /* repeat id */
+ void setRepeatNum(int num) {
+ E3 = num;
+ }
+ int getRepeatNum() {
+ return E3;
+ }
+
+ // third union member
+ /* index of stack */ /*int repeat_inc struct*/
+ void setSi(int si) {
+ E1 = si;
+ }
+ int getSi() {
+ return E1;
+ }
+
+ // fourth union member
+ /* memory num */
+ void setMemNum(int num) {
+ E1 = num;
+ }
+ int getMemNum() {
+ return E1;
+ }
+ /* start/end position */
+ void setMemPstr(int pstr) {
+ E2 = pstr;
+ }
+ int getMemPStr() {
+ return E2;
+ }
+
+ /* Following information is set, if this stack type is MEM-START */
+ /* prev. info (for backtrack "(...)*" ) */
+ void setMemStart(int start) {
+ E3 = start;
+ }
+ int getMemStart() {
+ return E3;
+ }
+ /* prev. info (for backtrack "(...)*" ) */
+ void setMemEnd(int end) {
+ E4 = end;
+ }
+ int getMemEnd() {
+ return E4;
+ }
+
+ // fifth union member
+ /* null check id */
+ void setNullCheckNum(int num) {
+ E1 = num;
+ }
+ int getNullCheckNum() {
+ return E1;
+ }
+ /* start position */
+ void setNullCheckPStr(int pstr) {
+ E2 = pstr;
+ }
+ int getNullCheckPStr() {
+ return E2;
+ }
+
+ // sixth union member
+ /* byte code position */
+ void setCallFrameRetAddr(int addr) {
+ E1 = addr;
+ }
+ int getCallFrameRetAddr() {
+ return E1;
+ }
+ /* null check id */
+ void setCallFrameNum(int num) {
+ E2 = num;
+ }
+ int getCallFrameNum() {
+ return E2;
+ }
+ /* string position */
+ void setCallFramePStr(int pstr) {
+ E3 = pstr;
+ }
+ int getCallFramePStr() {
+ return E3;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/StackMachine.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,621 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
+
+import java.lang.ref.WeakReference;
+import java.util.Arrays;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.StackType;
+
+abstract class StackMachine extends Matcher implements StackType {
+ protected static final int INVALID_INDEX = -1;
+
+ protected StackEntry[]stack;
+ protected int stk; // stkEnd
+
+ protected final int[]repeatStk;
+ protected final int memStartStk, memEndStk;
+
+ // CEC
+ protected byte[] stateCheckBuff; // move to int[] ?
+ int stateCheckBuffSize;
+
+ protected StackMachine(Regex regex, char[] chars, int p , int end) {
+ super(regex, chars, p, end);
+
+ this.stack = regex.stackNeeded ? fetchStack() : null;
+ int n = regex.numRepeat + (regex.numMem << 1);
+ this.repeatStk = n > 0 ? new int[n] : null;
+
+ memStartStk = regex.numRepeat - 1;
+ memEndStk = memStartStk + regex.numMem;
+ /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */
+ /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */
+ }
+
+ private static StackEntry[] allocateStack() {
+ StackEntry[]stack = new StackEntry[Config.INIT_MATCH_STACK_SIZE];
+ stack[0] = new StackEntry();
+ return stack;
+ }
+
+ private void doubleStack() {
+ StackEntry[] newStack = new StackEntry[stack.length << 1];
+ System.arraycopy(stack, 0, newStack, 0, stack.length);
+ stack = newStack;
+ }
+
+ static final ThreadLocal<WeakReference<StackEntry[]>> stacks
+ = new ThreadLocal<WeakReference<StackEntry[]>>() {
+ @Override
+ protected WeakReference<StackEntry[]> initialValue() {
+ return new WeakReference<StackEntry[]>(allocateStack());
+ }
+ };
+
+ private static StackEntry[] fetchStack() {
+ WeakReference<StackEntry[]> ref = stacks.get();
+ StackEntry[] stack = ref.get();
+ if (stack == null) {
+ ref = new WeakReference<StackEntry[]>(stack = allocateStack());
+ stacks.set(ref);
+ }
+ return stack;
+ }
+
+ protected final void init() {
+ if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */
+ if (repeatStk != null) {
+ for (int i=1; i<=regex.numMem; i++) {
+ repeatStk[i + memStartStk] = repeatStk[i + memEndStk] = INVALID_INDEX;
+ }
+ }
+ }
+
+ protected final StackEntry ensure1() {
+ if (stk >= stack.length) doubleStack();
+ StackEntry e = stack[stk];
+ if (e == null) stack[stk] = e = new StackEntry();
+ return e;
+ }
+
+ protected final void pushType(int type) {
+ ensure1().type = type;
+ stk++;
+ }
+
+ // CEC
+
+ // STATE_CHECK_POS
+ private int stateCheckPos(int s, int snum) {
+ return (s - str) * regex.numCombExpCheck + (snum - 1);
+ }
+
+ // STATE_CHECK_VAL
+ protected final boolean stateCheckVal(int s, int snum) {
+ if (stateCheckBuff != null) {
+ int x = stateCheckPos(s, snum);
+ return (stateCheckBuff[x / 8] & (1 << (x % 8))) != 0;
+ }
+ return false;
+ }
+
+ // ELSE_IF_STATE_CHECK_MARK
+ private void stateCheckMark() {
+ StackEntry e = stack[stk];
+ int x = stateCheckPos(e.getStatePStr(), e.getStateCheck());
+ stateCheckBuff[x / 8] |= (1 << (x % 8));
+ }
+
+ // STATE_CHECK_BUFF_INIT
+ private static final int STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE = 16;
+ protected final void stateCheckBuffInit(int strLength, int offset, int stateNum) {
+ if (stateNum > 0 && strLength >= Config.CHECK_STRING_THRESHOLD_LEN) {
+ int size = ((strLength + 1) * stateNum + 7) >>> 3;
+ offset = (offset * stateNum) >>> 3;
+
+ if (size > 0 && offset < size && size < Config.CHECK_BUFF_MAX_SIZE) {
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {
+ stateCheckBuff = new byte[size];
+ } else {
+ // same impl, reduce...
+ stateCheckBuff = new byte[size];
+ }
+ Arrays.fill(stateCheckBuff, offset, (size - offset), (byte)0);
+ stateCheckBuffSize = size;
+ } else {
+ stateCheckBuff = null; // reduce
+ stateCheckBuffSize = 0;
+ }
+ } else {
+ stateCheckBuff = null; // reduce
+ stateCheckBuffSize = 0;
+ }
+ }
+
+ protected final void stateCheckBuffClear() {
+ stateCheckBuff = null;
+ stateCheckBuffSize = 0;
+ }
+
+ private void push(int type, int pat, int s, int prev) {
+ StackEntry e = ensure1();
+ e.type = type;
+ e.setStatePCode(pat);
+ e.setStatePStr(s);
+ e.setStatePStrPrev(prev);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+ stk++;
+ }
+
+ protected final void pushEnsured(int type, int pat) {
+ StackEntry e = stack[stk];
+ e.type = type;
+ e.setStatePCode(pat);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(0);
+ stk++;
+ }
+
+ protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum) {
+ StackEntry e = ensure1();
+ e.type = ALT;
+ e.setStatePCode(pat);
+ e.setStatePStr(s);
+ e.setStatePStrPrev(sprev);
+ if (Config.USE_COMBINATION_EXPLOSION_CHECK) e.setStateCheck(stateCheckBuff != null ? snum : 0);
+ stk++;
+ }
+
+ protected final void pushStateCheck(int s, int snum) {
+ if (stateCheckBuff != null) {
+ StackEntry e = ensure1();
+ e.type = STATE_CHECK_MARK;
+ e.setStatePStr(s);
+ e.setStateCheck(snum);
+ stk++;
+ }
+ }
+
+ protected final void pushAlt(int pat, int s, int prev) {
+ push(ALT, pat, s, prev);
+ }
+
+ protected final void pushPos(int s, int prev) {
+ push(POS, -1 /*NULL_UCHARP*/, s, prev);
+ }
+
+ protected final void pushPosNot(int pat, int s, int prev) {
+ push(POS_NOT, pat, s, prev);
+ }
+
+ protected final void pushStopBT() {
+ pushType(STOP_BT);
+ }
+
+ protected final void pushLookBehindNot(int pat, int s, int sprev) {
+ push(LOOK_BEHIND_NOT, pat, s, sprev);
+ }
+
+ protected final void pushRepeat(int id, int pat) {
+ StackEntry e = ensure1();
+ e.type = REPEAT;
+ e.setRepeatNum(id);
+ e.setRepeatPCode(pat);
+ e.setRepeatCount(0);
+ stk++;
+ }
+
+ protected final void pushRepeatInc(int sindex) {
+ StackEntry e = ensure1();
+ e.type = REPEAT_INC;
+ e.setSi(sindex);
+ stk++;
+ }
+
+ protected final void pushMemStart(int mnum, int s) {
+ StackEntry e = ensure1();
+ e.type = MEM_START;
+ e.setMemNum(mnum);
+ e.setMemPstr(s);
+ e.setMemStart(repeatStk[memStartStk + mnum]);
+ e.setMemEnd(repeatStk[memEndStk + mnum]);
+ repeatStk[memStartStk + mnum] = stk;
+ repeatStk[memEndStk + mnum] = INVALID_INDEX;
+ stk++;
+ }
+
+ protected final void pushMemEnd(int mnum, int s) {
+ StackEntry e = ensure1();
+ e.type = MEM_END;
+ e.setMemNum(mnum);
+ e.setMemPstr(s);
+ e.setMemStart(repeatStk[memStartStk + mnum]);
+ e.setMemEnd(repeatStk[memEndStk + mnum]);
+ repeatStk[memEndStk + mnum] = stk;
+ stk++;
+ }
+
+ protected final void pushMemEndMark(int mnum) {
+ StackEntry e = ensure1();
+ e.type = MEM_END_MARK;
+ e.setMemNum(mnum);
+ stk++;
+ }
+
+ protected final int getMemStart(int mnum) {
+ int level = 0;
+ int stkp = stk;
+
+ while (stkp > 0) {
+ stkp--;
+ StackEntry e = stack[stkp];
+ if ((e.type & MASK_MEM_END_OR_MARK) != 0 && e.getMemNum() == mnum) {
+ level++;
+ } else if (e.type == MEM_START && e.getMemNum() == mnum) {
+ if (level == 0) break;
+ level--;
+ }
+ }
+ return stkp;
+ }
+
+ protected final void pushNullCheckStart(int cnum, int s) {
+ StackEntry e = ensure1();
+ e.type = NULL_CHECK_START;
+ e.setNullCheckNum(cnum);
+ e.setNullCheckPStr(s);
+ stk++;
+ }
+
+ protected final void pushNullCheckEnd(int cnum) {
+ StackEntry e = ensure1();
+ e.type = NULL_CHECK_END;
+ e.setNullCheckNum(cnum);
+ stk++;
+ }
+
+ protected final void pushCallFrame(int pat) {
+ StackEntry e = ensure1();
+ e.type = CALL_FRAME;
+ e.setCallFrameRetAddr(pat);
+ stk++;
+ }
+
+ protected final void pushReturn() {
+ StackEntry e = ensure1();
+ e.type = RETURN;
+ stk++;
+ }
+
+ // stack debug routines here
+ // ...
+
+ protected final void popOne() {
+ stk--;
+ }
+
+ protected final StackEntry pop() {
+ switch (regex.stackPopLevel) {
+ case StackPopLevel.FREE:
+ return popFree();
+ case StackPopLevel.MEM_START:
+ return popMemStart();
+ default:
+ return popDefault();
+ }
+ }
+
+ private StackEntry popFree() {
+ while (true) {
+ StackEntry e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ return e;
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ private StackEntry popMemStart() {
+ while (true) {
+ StackEntry e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ return e;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ private StackEntry popDefault() {
+ while (true) {
+ StackEntry e = stack[--stk];
+
+ if ((e.type & MASK_POP_USED) != 0) {
+ return e;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final void popTilPosNot() {
+ while (true) {
+ stk--;
+ StackEntry e = stack[stk];
+
+ if (e.type == POS_NOT) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemStart();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END){
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemStart();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final void popTilLookBehindNot() {
+ while (true) {
+ stk--;
+ StackEntry e = stack[stk];
+
+ if (e.type == LOOK_BEHIND_NOT) {
+ break;
+ } else if (e.type == MEM_START) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (e.type == REPEAT_INC) {
+ //int si = stack[stk + IREPEAT_INC_SI];
+ //stack[si + IREPEAT_COUNT]--;
+ stack[e.getSi()].decreaseRepeatCount();
+ } else if (e.type == MEM_END) {
+ repeatStk[memStartStk + e.getMemNum()] = e.getMemStart();
+ repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd();
+ } else if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
+ if (e.type == STATE_CHECK_MARK) stateCheckMark();
+ }
+ }
+ }
+
+ protected final int posEnd() {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+ if ((e.type & MASK_TO_VOID_TARGET) != 0) {
+ e.type = VOID;
+ } else if (e.type == POS) {
+ e.type = VOID;
+ break;
+ }
+ }
+ return k;
+ }
+
+ protected final void stopBtEnd() {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if ((e.type & MASK_TO_VOID_TARGET) != 0) {
+ e.type = VOID;
+ } else if (e.type == STOP_BT) {
+ e.type = VOID;
+ break;
+ }
+ }
+ }
+
+ // int for consistency with other null check routines
+ protected final int nullCheck(int id, int s) {
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ return e.getNullCheckPStr() == s ? 1 : 0;
+ }
+ }
+ }
+ }
+
+ protected final int nullCheckRec(int id, int s) {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (level == 0) {
+ return e.getNullCheckPStr() == s ? 1 : 0;
+ } else {
+ level--;
+ }
+ }
+ } else if (e.type == NULL_CHECK_END) {
+ level++;
+ }
+ }
+ }
+
+ protected final int nullCheckMemSt(int id, int s) {
+ int k = stk;
+ int isNull;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (e.getNullCheckPStr() != s) {
+ isNull = 0;
+ break;
+ } else {
+ int endp;
+ isNull = 1;
+ while (k < stk) {
+ if (e.type == MEM_START) {
+ if (e.getMemEnd() == INVALID_INDEX) {
+ isNull = 0;
+ break;
+ }
+ if (bsAt(regex.btMemEnd, e.getMemNum())) {
+ endp = stack[e.getMemEnd()].getMemPStr();
+ } else {
+ endp = e.getMemEnd();
+ }
+ if (stack[e.getMemStart()].getMemPStr() != endp) {
+ isNull = 0;
+ break;
+ } else if (endp != s) {
+ isNull = -1; /* empty, but position changed */
+ }
+ }
+ k++;
+ e = stack[k]; // !!
+ }
+ break;
+ }
+ }
+ }
+ }
+ return isNull;
+ }
+
+ protected final int nullCheckMemStRec(int id, int s) {
+ int level = 0;
+ int k = stk;
+ int isNull;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == NULL_CHECK_START) {
+ if (e.getNullCheckNum() == id) {
+ if (level == 0) {
+ if (e.getNullCheckPStr() != s) {
+ isNull = 0;
+ break;
+ } else {
+ int endp;
+ isNull = 1;
+ while (k < stk) {
+ if (e.type == MEM_START) {
+ if (e.getMemEnd() == INVALID_INDEX) {
+ isNull = 0;
+ break;
+ }
+ if (bsAt(regex.btMemEnd, e.getMemNum())) {
+ endp = stack[e.getMemEnd()].getMemPStr();
+ } else {
+ endp = e.getMemEnd();
+ }
+ if (stack[e.getMemStart()].getMemPStr() != endp) {
+ isNull = 0;
+ break;
+ } else if (endp != s) {
+ isNull = -1;; /* empty, but position changed */
+ }
+ }
+ k++;
+ e = stack[k];
+ }
+ break;
+ }
+ } else {
+ level--;
+ }
+ }
+ } else if (e.type == NULL_CHECK_END) {
+ if (e.getNullCheckNum() == id) level++;
+ }
+ }
+ return isNull;
+ }
+
+ protected final int getRepeat(int id) {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == REPEAT) {
+ if (level == 0) {
+ if (e.getRepeatNum() == id) return k;
+ }
+ } else if (e.type == CALL_FRAME) {
+ level--;
+ } else if (e.type == RETURN) {
+ level++;
+ }
+ }
+ }
+
+ protected final int sreturn() {
+ int level = 0;
+ int k = stk;
+ while (true) {
+ k--;
+ StackEntry e = stack[k];
+
+ if (e.type == CALL_FRAME) {
+ if (level == 0) {
+ return e.getCallFrameRetAddr();
+ } else {
+ level--;
+ }
+ } else if (e.type == RETURN) {
+ level++;
+ }
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Syntax.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,628 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar.INEFFECTIVE_META_CHAR;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.SyntaxProperties;
+
+public final class Syntax implements SyntaxProperties{
+ private final int op;
+ private final int op2;
+ private final int behavior;
+ public final int options;
+ public final MetaCharTable metaCharTable;
+
+ public Syntax(int op, int op2, int behavior, int options, MetaCharTable metaCharTable) {
+ this.op = op;
+ this.op2 = op2;
+ this.behavior = behavior;
+ this.options = options;
+ this.metaCharTable = metaCharTable;
+ }
+
+ public static class MetaCharTable {
+ public final int esc;
+ public final int anyChar;
+ public final int anyTime;
+ public final int zeroOrOneTime;
+ public final int oneOrMoreTime;
+ public final int anyCharAnyTime;
+
+ public MetaCharTable(int esc, int anyChar, int anyTime,
+ int zeroOrOneTime, int oneOrMoreTime, int anyCharAnyTime) {
+ this.esc = esc;
+ this.anyChar = anyChar;
+ this.anyTime = anyTime;
+ this.zeroOrOneTime = zeroOrOneTime;
+ this.oneOrMoreTime = oneOrMoreTime;
+ this.anyCharAnyTime = anyCharAnyTime;
+ }
+ }
+
+ /**
+ * OP
+ *
+ */
+ protected boolean isOp(int opm) {
+ return (op & opm) != 0;
+ }
+
+ public boolean opVariableMetaCharacters() {
+ return isOp(OP_VARIABLE_META_CHARACTERS);
+ }
+
+ public boolean opDotAnyChar() {
+ return isOp(OP_DOT_ANYCHAR);
+ }
+
+ public boolean opAsteriskZeroInf() {
+ return isOp(OP_ASTERISK_ZERO_INF);
+ }
+
+ public boolean opEscAsteriskZeroInf() {
+ return isOp(OP_ESC_ASTERISK_ZERO_INF);
+ }
+
+ public boolean opPlusOneInf() {
+ return isOp(OP_PLUS_ONE_INF);
+ }
+
+ public boolean opEscPlusOneInf() {
+ return isOp(OP_ESC_PLUS_ONE_INF);
+ }
+
+ public boolean opQMarkZeroOne() {
+ return isOp(OP_QMARK_ZERO_ONE);
+ }
+
+ public boolean opEscQMarkZeroOne() {
+ return isOp(OP_ESC_QMARK_ZERO_ONE);
+ }
+
+ public boolean opBraceInterval() {
+ return isOp(OP_BRACE_INTERVAL);
+ }
+
+ public boolean opEscBraceInterval() {
+ return isOp(OP_ESC_BRACE_INTERVAL);
+ }
+
+ public boolean opVBarAlt() {
+ return isOp(OP_VBAR_ALT);
+ }
+
+ public boolean opEscVBarAlt() {
+ return isOp(OP_ESC_VBAR_ALT);
+ }
+
+ public boolean opLParenSubexp() {
+ return isOp(OP_LPAREN_SUBEXP);
+ }
+
+ public boolean opEscLParenSubexp() {
+ return isOp(OP_ESC_LPAREN_SUBEXP);
+ }
+
+ public boolean opEscAZBufAnchor() {
+ return isOp(OP_ESC_AZ_BUF_ANCHOR);
+ }
+
+ public boolean opEscCapitalGBeginAnchor() {
+ return isOp(OP_ESC_CAPITAL_G_BEGIN_ANCHOR);
+ }
+
+ public boolean opDecimalBackref() {
+ return isOp(OP_DECIMAL_BACKREF);
+ }
+
+ public boolean opBracketCC() {
+ return isOp(OP_BRACKET_CC);
+ }
+
+ public boolean opEscWWord() {
+ return isOp(OP_ESC_W_WORD);
+ }
+
+ public boolean opEscLtGtWordBeginEnd() {
+ return isOp(OP_ESC_LTGT_WORD_BEGIN_END);
+ }
+
+ public boolean opEscBWordBound() {
+ return isOp(OP_ESC_B_WORD_BOUND);
+ }
+
+ public boolean opEscSWhiteSpace() {
+ return isOp(OP_ESC_S_WHITE_SPACE);
+ }
+
+ public boolean opEscDDigit() {
+ return isOp(OP_ESC_D_DIGIT);
+ }
+
+ public boolean opLineAnchor() {
+ return isOp(OP_LINE_ANCHOR);
+ }
+
+ public boolean opPosixBracket() {
+ return isOp(OP_POSIX_BRACKET);
+ }
+
+ public boolean opQMarkNonGreedy() {
+ return isOp(OP_QMARK_NON_GREEDY);
+ }
+
+ public boolean opEscControlChars() {
+ return isOp(OP_ESC_CONTROL_CHARS);
+ }
+
+ public boolean opEscCControl() {
+ return isOp(OP_ESC_C_CONTROL);
+ }
+
+ public boolean opEscOctal3() {
+ return isOp(OP_ESC_OCTAL3);
+ }
+
+ public boolean opEscXHex2() {
+ return isOp(OP_ESC_X_HEX2);
+ }
+
+ public boolean opEscXBraceHex8() {
+ return isOp(OP_ESC_X_BRACE_HEX8);
+ }
+
+
+ /**
+ * OP
+ *
+ */
+ protected boolean isOp2(int opm) {
+ return (op2 & opm) != 0;
+ }
+
+ public boolean op2EscCapitalQQuote() {
+ return isOp2(OP2_ESC_CAPITAL_Q_QUOTE);
+ }
+
+ public boolean op2QMarkGroupEffect() {
+ return isOp2(OP2_QMARK_GROUP_EFFECT);
+ }
+
+ public boolean op2OptionPerl() {
+ return isOp2(OP2_OPTION_PERL);
+ }
+
+ public boolean op2OptionRuby() {
+ return isOp2(OP2_OPTION_RUBY);
+ }
+
+ public boolean op2PlusPossessiveRepeat() {
+ return isOp2(OP2_PLUS_POSSESSIVE_REPEAT);
+ }
+
+ public boolean op2PlusPossessiveInterval() {
+ return isOp2(OP2_PLUS_POSSESSIVE_INTERVAL);
+ }
+
+ public boolean op2CClassSetOp() {
+ return isOp2(OP2_CCLASS_SET_OP);
+ }
+
+ public boolean op2QMarkLtNamedGroup() {
+ return isOp2(OP2_QMARK_LT_NAMED_GROUP);
+ }
+
+ public boolean op2EscKNamedBackref() {
+ return isOp2(OP2_ESC_K_NAMED_BACKREF);
+ }
+
+ public boolean op2EscGSubexpCall() {
+ return isOp2(OP2_ESC_G_SUBEXP_CALL);
+ }
+
+ public boolean op2AtMarkCaptureHistory() {
+ return isOp2(OP2_ATMARK_CAPTURE_HISTORY);
+ }
+
+ public boolean op2EscCapitalCBarControl() {
+ return isOp2(OP2_ESC_CAPITAL_C_BAR_CONTROL);
+ }
+
+ public boolean op2EscCapitalMBarMeta() {
+ return isOp2(OP2_ESC_CAPITAL_M_BAR_META);
+ }
+
+ public boolean op2EscVVtab() {
+ return isOp2(OP2_ESC_V_VTAB);
+ }
+
+ public boolean op2EscUHex4() {
+ return isOp2(OP2_ESC_U_HEX4);
+ }
+
+ public boolean op2EscGnuBufAnchor() {
+ return isOp2(OP2_ESC_GNU_BUF_ANCHOR);
+ }
+
+ public boolean op2EscPBraceCharProperty() {
+ return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY);
+ }
+
+ public boolean op2EscPBraceCircumflexNot() {
+ return isOp2(OP2_ESC_P_BRACE_CIRCUMFLEX_NOT);
+ }
+
+ public boolean op2EscHXDigit() {
+ return isOp2(OP2_ESC_H_XDIGIT);
+ }
+
+ public boolean op2IneffectiveEscape() {
+ return isOp2(OP2_INEFFECTIVE_ESCAPE);
+ }
+
+ /**
+ * BEHAVIOR
+ *
+ */
+ protected boolean isBehavior(int bvm) {
+ return (behavior & bvm) != 0;
+ }
+
+ public boolean contextIndepRepeatOps() {
+ return isBehavior(CONTEXT_INDEP_REPEAT_OPS);
+ }
+
+ public boolean contextInvalidRepeatOps() {
+ return isBehavior(CONTEXT_INVALID_REPEAT_OPS);
+ }
+
+ public boolean allowUnmatchedCloseSubexp() {
+ return isBehavior(ALLOW_UNMATCHED_CLOSE_SUBEXP);
+ }
+
+ public boolean allowInvalidInterval() {
+ return isBehavior(ALLOW_INVALID_INTERVAL);
+ }
+
+ public boolean allowIntervalLowAbbrev() {
+ return isBehavior(ALLOW_INTERVAL_LOW_ABBREV);
+ }
+
+ public boolean strictCheckBackref() {
+ return isBehavior(STRICT_CHECK_BACKREF);
+ }
+
+ public boolean differentLengthAltLookBehind() {
+ return isBehavior(DIFFERENT_LEN_ALT_LOOK_BEHIND);
+ }
+
+ public boolean captureOnlyNamedGroup() {
+ return isBehavior(CAPTURE_ONLY_NAMED_GROUP);
+ }
+
+ public boolean allowMultiplexDefinitionName() {
+ return isBehavior(ALLOW_MULTIPLEX_DEFINITION_NAME);
+ }
+
+ public boolean fixedIntervalIsGreedyOnly() {
+ return isBehavior(FIXED_INTERVAL_IS_GREEDY_ONLY);
+ }
+
+
+ public boolean notNewlineInNegativeCC() {
+ return isBehavior(NOT_NEWLINE_IN_NEGATIVE_CC);
+ }
+
+ public boolean backSlashEscapeInCC() {
+ return isBehavior(BACKSLASH_ESCAPE_IN_CC);
+ }
+
+ public boolean allowEmptyRangeInCC() {
+ return isBehavior(ALLOW_EMPTY_RANGE_IN_CC);
+ }
+
+ public boolean allowDoubleRangeOpInCC() {
+ return isBehavior(ALLOW_DOUBLE_RANGE_OP_IN_CC);
+ }
+
+ public boolean warnCCOpNotEscaped() {
+ return isBehavior(WARN_CC_OP_NOT_ESCAPED);
+ }
+
+ public boolean warnReduntantNestedRepeat() {
+ return isBehavior(WARN_REDUNDANT_NESTED_REPEAT);
+ }
+
+ public static final Syntax RUBY = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_QMARK_GROUP_EFFECT |
+ OP2_OPTION_RUBY |
+ OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF |
+ OP2_ESC_G_SUBEXP_CALL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_PLUS_POSSESSIVE_REPEAT |
+ OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB |
+ OP2_ESC_H_XDIGIT ),
+
+ ( GNU_REGEX_BV |
+ ALLOW_INTERVAL_LOW_ABBREV |
+ DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ CAPTURE_ONLY_NAMED_GROUP |
+ ALLOW_MULTIPLEX_DEFINITION_NAME |
+ FIXED_INTERVAL_IS_GREEDY_ONLY |
+ WARN_CC_OP_NOT_ESCAPED |
+ WARN_REDUNDANT_NESTED_REPEAT ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax DEFAULT = RUBY;
+
+ public static final Syntax ASIS = new Syntax(
+ 0,
+
+ OP2_INEFFECTIVE_ESCAPE,
+
+ 0,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PosixBasic = new Syntax(
+ (POSIX_COMMON_OP | OP_ESC_LPAREN_SUBEXP |
+ OP_ESC_BRACE_INTERVAL ),
+
+ 0,
+
+ 0,
+
+ ( Option.SINGLELINE | Option.MULTILINE ),
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PosixExtended = new Syntax(
+ ( POSIX_COMMON_OP | OP_LPAREN_SUBEXP |
+ OP_BRACE_INTERVAL |
+ OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE |OP_VBAR_ALT ),
+
+ 0,
+
+ ( CONTEXT_INDEP_ANCHORS |
+ CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS |
+ ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ALLOW_DOUBLE_RANGE_OP_IN_CC ),
+
+ ( Option.SINGLELINE | Option.MULTILINE ),
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Emacs = new Syntax(
+ ( OP_DOT_ANYCHAR | OP_BRACKET_CC |
+ OP_ESC_BRACE_INTERVAL |
+ OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF |
+ OP_QMARK_ZERO_ONE | OP_DECIMAL_BACKREF |
+ OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS ),
+
+ OP2_ESC_GNU_BUF_ANCHOR,
+
+ ALLOW_EMPTY_RANGE_IN_CC,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Grep = new Syntax(
+ ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET |
+ OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP |
+ OP_ESC_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_ESC_PLUS_ONE_INF |
+ OP_ESC_QMARK_ZERO_ONE | OP_LINE_ANCHOR |
+ OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND |
+ OP_ESC_LTGT_WORD_BEGIN_END | OP_DECIMAL_BACKREF ),
+
+ 0,
+
+ ( ALLOW_EMPTY_RANGE_IN_CC | NOT_NEWLINE_IN_NEGATIVE_CC ),
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax GnuRegex = new Syntax(
+ GNU_REGEX_OP,
+ 0,
+ GNU_REGEX_BV,
+
+ Option.NONE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Java = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT |
+ OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT |
+ OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP |
+ OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY ),
+
+ ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax Perl = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ),
+
+ GNU_REGEX_BV,
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax PerlNG = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_OCTAL3 | OP_ESC_X_HEX2 |
+ OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS |
+ OP_ESC_C_CONTROL )
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_ESC_CAPITAL_Q_QUOTE |
+ OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL |
+ OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ OP2_QMARK_LT_NAMED_GROUP |
+ OP2_ESC_K_NAMED_BACKREF |
+ OP2_ESC_G_SUBEXP_CALL ),
+
+ ( GNU_REGEX_BV |
+ CAPTURE_ONLY_NAMED_GROUP |
+ ALLOW_MULTIPLEX_DEFINITION_NAME ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+
+ public static final Syntax JAVASCRIPT = new Syntax(
+ (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY |
+ OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL | OP_ESC_X_HEX2)
+ & ~OP_ESC_LTGT_WORD_BEGIN_END ),
+
+ ( OP2_QMARK_GROUP_EFFECT | OP2_CCLASS_SET_OP |
+ OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 ),
+
+ ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ),
+
+ Option.SINGLELINE,
+
+ new MetaCharTable(
+ '\\', /* esc */
+ INEFFECTIVE_META_CHAR, /* anychar '.' */
+ INEFFECTIVE_META_CHAR, /* anytime '*' */
+ INEFFECTIVE_META_CHAR, /* zero or one time '?' */
+ INEFFECTIVE_META_CHAR, /* one or more time '+' */
+ INEFFECTIVE_META_CHAR /* anychar anytime */
+ )
+ );
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Token.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,172 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
+
+final class Token {
+ TokenType type;
+ boolean escaped;
+ int base; /* is number: 8, 16 (used in [....]) */
+ int backP;
+
+ // union fields
+ private int INT1, INT2, INT3, INT4, INT5;
+ private int []INTA1;
+
+ // union accessors
+ int getC() {
+ return INT1;
+ }
+ void setC(int c) {
+ INT1 = c;
+ }
+
+ int getCode() {
+ return INT1;
+ }
+ void setCode(int code) {
+ INT1 = code;
+ }
+
+ int getAnchor() {
+ return INT1;
+ }
+ void setAnchor(int anchor) {
+ INT1 = anchor;
+ }
+
+ int getSubtype() {
+ return INT1;
+ }
+ void setSubtype(int subtype) {
+ INT1 = subtype;
+ }
+
+ // repeat union member
+ int getRepeatLower() {
+ return INT1;
+ }
+ void setRepeatLower(int lower) {
+ INT1 = lower;
+ }
+
+ int getRepeatUpper() {
+ return INT2;
+ }
+ void setRepeatUpper(int upper) {
+ INT2 = upper;
+ }
+
+ boolean getRepeatGreedy() {
+ return INT3 != 0;
+ }
+ void setRepeatGreedy(boolean greedy) {
+ INT3 = greedy ? 1 : 0;
+ }
+
+ boolean getRepeatPossessive() {
+ return INT4 != 0;
+ }
+ void setRepeatPossessive(boolean possessive) {
+ INT4 = possessive ? 1 : 0;
+ }
+
+ // backref union member
+ int getBackrefNum() {
+ return INT1;
+ }
+ void setBackrefNum(int num) {
+ INT1 = num;
+ }
+
+ int getBackrefRef1() {
+ return INT2;
+ }
+ void setBackrefRef1(int ref1) {
+ INT2 = ref1;
+ }
+
+ int[]getBackrefRefs() {
+ return INTA1;
+ }
+ void setBackrefRefs(int[]refs) {
+ INTA1 = refs;
+ }
+
+ boolean getBackrefByName() {
+ return INT3 != 0;
+ }
+ void setBackrefByName(boolean byName) {
+ INT3 = byName ? 1 : 0;
+ }
+
+ // USE_BACKREF_AT_LEVEL
+ boolean getBackrefExistLevel() {
+ return INT4 != 0;
+ }
+ void setBackrefExistLevel(boolean existLevel) {
+ INT4 = existLevel ? 1 : 0;
+ }
+
+ int getBackrefLevel() {
+ return INT5;
+ }
+ void setBackrefLevel(int level) {
+ INT5 = level;
+ }
+
+ // call union member
+ int getCallNameP() {
+ return INT1;
+ }
+ void setCallNameP(int nameP) {
+ INT1 = nameP;
+ }
+
+ int getCallNameEnd() {
+ return INT2;
+ }
+ void setCallNameEnd(int nameEnd) {
+ INT2 = nameEnd;
+ }
+
+ int getCallGNum() {
+ return INT3;
+ }
+ void setCallGNum(int gnum) {
+ INT3 = gnum;
+ }
+
+ // prop union member
+ int getPropCType() {
+ return INT1;
+ }
+ void setPropCType(int ctype) {
+ INT1 = ctype;
+ }
+
+ boolean getPropNot() {
+ return INT2 != 0;
+ }
+ void setPropNot(boolean not) {
+ INT2 = not ? 1 : 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/UnsetAddrList.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,69 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
+import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+
+public final class UnsetAddrList {
+ int num;
+ Node[]targets;
+ int[]offsets;
+
+ public UnsetAddrList(int size) {
+ targets = new Node[size];
+ offsets = new int[size];
+ }
+
+ public void add(int offset, Node node) {
+ if (num >= offsets.length) {
+ Node []ttmp = new Node[targets.length << 1];
+ System.arraycopy(targets, 0, ttmp, 0, num);
+ targets = ttmp;
+ int[]otmp = new int[offsets.length << 1];
+ System.arraycopy(offsets, 0, otmp, 0, num);
+ offsets = otmp;
+ }
+ targets[num] = node;
+ offsets[num] = offset;
+
+ num++;
+ }
+
+ public void fix(Regex regex) {
+ for (int i=0; i<num; i++) {
+ EncloseNode en = (EncloseNode)targets[i];
+ if (!en.isAddrFixed()) new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ regex.code[offsets[i]] = en.callAddr; // is this safe ?
+ }
+ }
+
+ public String toString() {
+ StringBuilder value = new StringBuilder();
+ if (num > 0) {
+ for (int i=0; i<num; i++) {
+ value.append("offset + " + offsets[i] + " target: " + targets[i].getAddressName());
+ }
+ }
+ return value.toString();
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/WarnCallback.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,33 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+/**
+ * @author <a href="mailto:ola.bini@gmail.com">Ola Bini</a>
+ */
+public interface WarnCallback {
+ WarnCallback DEFAULT = new WarnCallback() {
+ public void warn(String message) {
+ System.err.println(message);
+ }
+ };
+
+ void warn(String message);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Warnings.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni;
+
+public interface Warnings {
+ final String INVALID_BACKREFERENCE = "invalid back reference";
+ final String INVALID_SUBEXP_CALL = "invalid subexp call";
+ final String INVALID_UNICODE_PROPERTY = "invalid Unicode Property \\<%n>";
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/AnchorNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,92 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
+
+public final class AnchorNode extends Node implements AnchorType {
+ public int type;
+ public Node target;
+ public int charLength;
+
+ public AnchorNode(int type) {
+ this.type = type;
+ charLength = -1;
+ }
+
+ @Override
+ public int getType() {
+ return ANCHOR;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Anchor";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n type: " + typeToString());
+ value.append("\n target: " + pad(target, level + 1));
+ return value.toString();
+ }
+
+ public String typeToString() {
+ StringBuilder type = new StringBuilder();
+ if (isType(BEGIN_BUF)) type.append("BEGIN_BUF ");
+ if (isType(BEGIN_LINE)) type.append("BEGIN_LINE ");
+ if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION ");
+ if (isType(END_BUF)) type.append("END_BUF ");
+ if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF ");
+ if (isType(END_LINE)) type.append("END_LINE ");
+ if (isType(WORD_BOUND)) type.append("WORD_BOUND ");
+ if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND ");
+ if (isType(WORD_BEGIN)) type.append("WORD_BEGIN ");
+ if (isType(WORD_END)) type.append("WORD_END ");
+ if (isType(PREC_READ)) type.append("PREC_READ ");
+ if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT ");
+ if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND ");
+ if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT ");
+ if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR ");
+ if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML ");
+ return type.toString();
+ }
+
+ private boolean isType(int type) {
+ return (this.type & type) != 0;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/AnyCharNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,40 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+public final class AnyCharNode extends Node {
+ public AnyCharNode(){}
+
+ @Override
+ public int getType() {
+ return CANY;
+ }
+
+ @Override
+ public String getName() {
+ return "Any Char";
+ }
+
+ @Override
+ public String toString(int level) {
+ String value = "";
+ return value;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/BackRefNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
+
+public final class BackRefNode extends StateNode {
+ //private static int NODE_BACKREFS_SIZE = 6;
+
+ //int state;
+ public int backNum;
+ public int back[];
+
+ public int nestLevel;
+
+ public BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) {
+ this.backNum = backNum;
+ if (byName) setNameRef();
+
+ for (int i=0; i<backNum; i++) {
+ if (backRefs[i] <= env.numMem && env.memNodes[backRefs[i]] == null) {
+ setRecursion(); /* /...(\1).../ */
+ break;
+ }
+ }
+
+ back = new int[backNum];
+ System.arraycopy(backRefs, 0, back, 0, backNum); // shall we really dup it ???
+ }
+
+ // #ifdef USE_BACKREF_AT_LEVEL
+ public BackRefNode(int backNum, int[]backRefs, boolean byName, boolean existLevel, int nestLevel, ScanEnvironment env) {
+ this(backNum, backRefs, byName, env);
+
+ if (existLevel) {
+ //state |= NST_NEST_LEVEL;
+ setNestLevel();
+ this.nestLevel = nestLevel;
+ }
+ }
+
+ @Override
+ public int getType() {
+ return BREF;
+ }
+
+ @Override
+ public String getName() {
+ return "Back Ref";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n backNum: " + backNum);
+ String backs = "";
+ for (int i=0; i<back.length; i++) backs += back[i] + ", ";
+ value.append("\n back: " + backs);
+ value.append("\n nextLevel: " + nestLevel);
+ return value.toString();
+ }
+
+ public void renumber(int[]map) {
+ if (!isNameRef()) throw new ValueException(ErrorMessages.ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED);
+
+ int oldNum = backNum;
+
+ int pos = 0;
+ for (int i=0; i<oldNum; i++) {
+ int n = map[back[i]];
+ if (n > 0) {
+ back[pos] = n;
+ pos++;
+ }
+ }
+ backNum = pos;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/CClassNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,545 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.*;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.AsciiTables;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
+import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
+
+public final class CClassNode extends Node {
+ private static final int FLAG_NCCLASS_NOT = 1<<0;
+ private static final int FLAG_NCCLASS_SHARE = 1<<1;
+
+ int flags;
+ public final BitSet bs = new BitSet(); // conditional creation ?
+ public CodeRangeBuffer mbuf; /* multi-byte info or NULL */
+
+ private int ctype; // for hashing purposes
+
+ // node_new_cclass
+ public CClassNode() {}
+
+ public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) {
+ this(not, sbOut, ranges);
+ this.ctype = ctype;
+ }
+
+ public void clear() {
+ bs.clear();
+ flags = 0;
+ mbuf = null;
+ }
+
+ // node_new_cclass_by_codepoint_range, only used by shared Char Classes
+ public CClassNode(boolean not, int sbOut, int[]ranges) {
+ if (not) setNot();
+ // bs.clear();
+
+ if (sbOut > 0 && ranges != null) {
+ int n = ranges[0];
+ for (int i=0; i<n; i++) {
+ int from = ranges[i * 2 + 1];
+ int to = ranges[i * 2 + 2];
+ for (int j=from; j<=to; j++) {
+ if (j >= sbOut) {
+ setupBuffer(ranges);
+ return;
+ }
+ bs.set(j);
+ }
+ }
+ }
+ setupBuffer(ranges);
+ }
+
+ @Override
+ public int getType() {
+ return CCLASS;
+ }
+
+ @Override
+ public String getName() {
+ return "Character Class";
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof CClassNode)) return false;
+ CClassNode cc = (CClassNode)other;
+ return ctype == cc.ctype && isNot() == cc.isNot();
+ }
+
+ @Override
+ public int hashCode() {
+ if (Config.USE_SHARED_CCLASS_TABLE) {
+ int hash = 0;
+ hash += ctype;
+ if (isNot()) hash++;
+ return hash + (hash >> 5);
+ } else {
+ return super.hashCode();
+ }
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n flags: " + flagsToString());
+ value.append("\n bs: " + pad(bs, level + 1));
+ value.append("\n mbuf: " + pad(mbuf, level + 1));
+
+ return value.toString();
+ }
+
+ public String flagsToString() {
+ StringBuilder flags = new StringBuilder();
+ if (isNot()) flags.append("NOT ");
+ if (isShare()) flags.append("SHARE ");
+ return flags.toString();
+ }
+
+ private void setupBuffer(int[]ranges) {
+ if (ranges != null) {
+ if (ranges[0] == 0) return;
+ mbuf = new CodeRangeBuffer(ranges);
+ }
+ }
+
+ public boolean isEmpty() {
+ return mbuf == null && bs.isEmpty();
+ }
+
+ public void addCodeRangeToBuf(int from, int to) {
+ mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
+ }
+
+ public void addCodeRange(ScanEnvironment env, int from, int to) {
+ mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
+ }
+
+ public void addAllMultiByteRange() {
+ mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
+ }
+
+ public void clearNotFlag() {
+ if (isNot()) {
+ bs.invert();
+
+ mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
+ clearNot();
+ }
+ }
+
+ // and_cclass
+ public void and(CClassNode other) {
+ boolean not1 = isNot();
+ BitSet bsr1 = bs;
+ CodeRangeBuffer buf1 = mbuf;
+ boolean not2 = other.isNot();
+ BitSet bsr2 = other.bs;
+ CodeRangeBuffer buf2 = other.mbuf;
+
+ if (not1) {
+ BitSet bs1 = new BitSet();
+ bsr1.invertTo(bs1);
+ bsr1 = bs1;
+ }
+
+ if (not2) {
+ BitSet bs2 = new BitSet();
+ bsr2.invertTo(bs2);
+ bsr2 = bs2;
+ }
+
+ bsr1.and(bsr2);
+
+ if (bsr1 != bs) {
+ bs.copy(bsr1);
+ bsr1 = bs;
+ }
+
+ if (not1) {
+ bs.invert();
+ }
+
+ CodeRangeBuffer pbuf = null;
+
+ if (not1 && not2) {
+ pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
+ } else {
+ pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
+
+ if (not1) {
+ pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
+ }
+ }
+ mbuf = pbuf;
+
+ }
+
+ // or_cclass
+ public void or(CClassNode other) {
+ boolean not1 = isNot();
+ BitSet bsr1 = bs;
+ CodeRangeBuffer buf1 = mbuf;
+ boolean not2 = other.isNot();
+ BitSet bsr2 = other.bs;
+ CodeRangeBuffer buf2 = other.mbuf;
+
+ if (not1) {
+ BitSet bs1 = new BitSet();
+ bsr1.invertTo(bs1);
+ bsr1 = bs1;
+ }
+
+ if (not2) {
+ BitSet bs2 = new BitSet();
+ bsr2.invertTo(bs2);
+ bsr2 = bs2;
+ }
+
+ bsr1.or(bsr2);
+
+ if (bsr1 != bs) {
+ bs.copy(bsr1);
+ bsr1 = bs;
+ }
+
+ if (not1) {
+ bs.invert();
+ }
+
+ CodeRangeBuffer pbuf = null;
+ if (not1 && not2) {
+ pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
+ } else {
+ pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
+ if (not1) {
+ pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
+ }
+ }
+ mbuf = pbuf;
+ }
+
+ // add_ctype_to_cc_by_range // Encoding out!
+ public void addCTypeByRange(int ctype, boolean not, int sbOut, int mbr[]) {
+ int n = mbr[0];
+
+ if (!not) {
+ for (int i=0; i<n; i++) {
+ for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
+ if (j >= sbOut) {
+ if (Config.VANILLA) {
+ if (j == mbr[i * 2 + 2]) {
+ i++;
+ } else if (j > mbr[i * 2 + 1]) {
+ addCodeRangeToBuf(j, mbr[i * 2 + 2]);
+ i++;
+ }
+ } else {
+ if (j >= mbr[i * 2 + 1]) {
+ addCodeRangeToBuf(j, mbr[i * 2 + 2]);
+ i++;
+ }
+ }
+ // !goto sb_end!, remove duplication!
+ for (; i<n; i++) {
+ addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ }
+ return;
+ }
+ bs.set(j);
+ }
+ }
+ // !sb_end:!
+ for (int i=0; i<n; i++) {
+ addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
+ }
+
+ } else {
+ int prev = 0;
+
+ for (int i=0; i<n; i++) {
+ for (int j=prev; j < mbr[2 * i + 1]; j++) {
+ if (j >= sbOut) {
+ // !goto sb_end2!, remove duplication
+ prev = sbOut;
+ for (i=0; i<n; i++) {
+ if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
+ prev = mbr[i * 2 + 2] + 1;
+ }
+ if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
+ return;
+ }
+ bs.set(j);
+ }
+ prev = mbr[2 * i + 2] + 1;
+ }
+
+ for (int j=prev; j<sbOut; j++) {
+ bs.set(j);
+ }
+
+ // !sb_end2:!
+ prev = sbOut;
+ for (int i=0; i<n; i++) {
+ if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
+ prev = mbr[i * 2 + 2] + 1;
+ }
+ if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff);
+ }
+ }
+
+ public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
+ if (Config.NON_UNICODE_SDW) {
+ switch(ctype) {
+ case CharacterType.D:
+ case CharacterType.S:
+ case CharacterType.W:
+ ctype ^= CharacterType.SPECIAL_MASK;
+
+ if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) {
+ // \s in JavaScript includes unicode characters.
+ break;
+ }
+
+ if (not) {
+ for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
+ // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
+ if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
+ }
+ addAllMultiByteRange();
+ } else {
+ for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
+ // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
+ if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
+ }
+ }
+ return;
+ }
+ }
+
+ int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut);
+ if (ranges != null) {
+ addCTypeByRange(ctype, not, sbOut.value, ranges);
+ return;
+ }
+
+ switch(ctype) {
+ case CharacterType.ALPHA:
+ case CharacterType.BLANK:
+ case CharacterType.CNTRL:
+ case CharacterType.DIGIT:
+ case CharacterType.LOWER:
+ case CharacterType.PUNCT:
+ case CharacterType.SPACE:
+ case CharacterType.UPPER:
+ case CharacterType.XDIGIT:
+ case CharacterType.ASCII:
+ case CharacterType.ALNUM:
+ if (not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
+ }
+ addAllMultiByteRange();
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
+ }
+ }
+ break;
+
+ case CharacterType.GRAPH:
+ case CharacterType.PRINT:
+ if (not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
+ }
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
+ }
+ addAllMultiByteRange();
+ }
+ break;
+
+ case CharacterType.WORD:
+ if (!not) {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (EncodingHelper.isWord(c)) bs.set(c);
+ }
+
+ addAllMultiByteRange();
+ } else {
+ for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
+ if (!EncodingHelper.isWord(c)) bs.set(c);
+ }
+ }
+ break;
+
+ default:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ } // switch
+ }
+
+ public static final class CCStateArg {
+ public int v;
+ public int vs;
+ public boolean vsIsRaw;
+ public boolean vIsRaw;
+ public CCVALTYPE inType;
+ public CCVALTYPE type;
+ public CCSTATE state;
+ }
+
+ public void nextStateClass(CCStateArg arg, ScanEnvironment env) {
+ if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
+
+ if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
+ if (arg.type == CCVALTYPE.SB) {
+ bs.set(arg.vs);
+ } else if (arg.type == CCVALTYPE.CODE_POINT) {
+ addCodeRange(env, arg.vs, arg.vs);
+ }
+ }
+ arg.state = CCSTATE.VALUE;
+ arg.type = CCVALTYPE.CLASS;
+ }
+
+ public void nextStateValue(CCStateArg arg, ScanEnvironment env) {
+
+ switch(arg.state) {
+ case VALUE:
+ if (arg.type == CCVALTYPE.SB) {
+ if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+ bs.set(arg.vs);
+ } else if (arg.type == CCVALTYPE.CODE_POINT) {
+ addCodeRange(env, arg.vs, arg.vs);
+ }
+ break;
+
+ case RANGE:
+ if (arg.inType == arg.type) {
+ if (arg.inType == CCVALTYPE.SB) {
+ if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
+
+ if (arg.vs > arg.v) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ // goto ccs_range_end
+ arg.state = CCSTATE.COMPLETE;
+ break;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ bs.setRange(arg.vs, arg.v);
+ } else {
+ addCodeRange(env, arg.vs, arg.v);
+ }
+ } else {
+ if (arg.vs > arg.v) {
+ if (env.syntax.allowEmptyRangeInCC()) {
+ // goto ccs_range_end
+ arg.state = CCSTATE.COMPLETE;
+ break;
+ } else {
+ throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
+ }
+ }
+ bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
+ addCodeRange(env, arg.vs, arg.v);
+ }
+ // ccs_range_end:
+ arg.state = CCSTATE.COMPLETE;
+ break;
+
+ case COMPLETE:
+ case START:
+ arg.state = CCSTATE.VALUE;
+ break;
+
+ default:
+ break;
+
+ } // switch
+
+ arg.vsIsRaw = arg.vIsRaw;
+ arg.vs = arg.v;
+ arg.type = arg.inType;
+ }
+
+ // onig_is_code_in_cc_len
+ public boolean isCodeInCCLength(int code) {
+ boolean found;
+
+ if (code > 0xff) {
+ if (mbuf == null) {
+ found = false;
+ } else {
+ found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code);
+ }
+ } else {
+ found = bs.at(code);
+ }
+
+ if (isNot()) {
+ return !found;
+ } else {
+ return found;
+ }
+ }
+
+ // onig_is_code_in_cc
+ public boolean isCodeInCC(int code) {
+ return isCodeInCCLength(code);
+ }
+
+ public void setNot() {
+ flags |= FLAG_NCCLASS_NOT;
+ }
+
+ public void clearNot() {
+ flags &= ~FLAG_NCCLASS_NOT;
+ }
+
+ public boolean isNot() {
+ return (flags & FLAG_NCCLASS_NOT) != 0;
+ }
+
+ public void setShare() {
+ flags |= FLAG_NCCLASS_SHARE;
+ }
+
+ public void clearShare() {
+ flags &= ~FLAG_NCCLASS_SHARE;
+ }
+
+ public boolean isShare() {
+ return (flags & FLAG_NCCLASS_SHARE) != 0;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/CTypeNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,50 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+public final class CTypeNode extends Node {
+ public int ctype;
+ public boolean not;
+
+ public CTypeNode(int type, boolean not) {
+ this.ctype= type;
+ this.not = not;
+ }
+
+ @Override
+ public int getType() {
+ return CTYPE;
+ }
+
+ @Override
+ public String getName() {
+ return "Character Type";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n ctype: " + ctype);
+ value.append("\n not: " + not);
+
+ return value.toString();
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/CallNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,86 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import java.util.Set;
+
+import jdk.nashorn.internal.runtime.regexp.joni.UnsetAddrList;
+import jdk.nashorn.internal.runtime.regexp.joni.WarnCallback;
+
+public final class CallNode extends StateNode {
+ public char[] name;
+ public int nameP;
+ public int nameEnd;
+
+ public int groupNum;
+ public Node target; // is it an EncloseNode always ?
+ public UnsetAddrList unsetAddrList;
+
+ public CallNode(char[] name, int nameP, int nameEnd, int gnum) {
+ this.name = name;
+ this.nameP = nameP;
+ this.nameEnd = nameEnd;
+ this.groupNum = gnum; /* call by number if gnum != 0 */
+ }
+
+ @Override
+ public int getType() {
+ return CALL;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Call";
+ }
+
+ @Override
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (target == null || target.parent == this)
+ warnings.warn(this.getAddressName() + " doesn't point to a target or the target has been stolen");
+ // do not recurse here
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n name: " + new String(name, nameP, nameEnd - nameP));
+ value.append("\n groupNum: " + groupNum);
+ value.append("\n target: " + pad(target.getAddressName(), level + 1));
+ value.append("\n unsetAddrList: " + pad(unsetAddrList, level + 1));
+
+ return value.toString();
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/ConsAltNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,152 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import java.util.Set;
+
+import jdk.nashorn.internal.runtime.regexp.joni.WarnCallback;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
+
+public final class ConsAltNode extends Node {
+ public Node car;
+ public ConsAltNode cdr;
+ private int type; // List or Alt
+
+ private ConsAltNode(Node car, ConsAltNode cdr, int type) {
+ this.car = car;
+ if (car != null) car.parent = this;
+ this.cdr = cdr;
+ if (cdr != null) cdr.parent = this;
+
+ this.type = type;
+ }
+
+ public static ConsAltNode newAltNode(Node left, ConsAltNode right) {
+ return new ConsAltNode(left, right, ALT);
+ }
+
+ public static ConsAltNode newListNode(Node left, ConsAltNode right) {
+ return new ConsAltNode(left, right, LIST);
+ }
+
+ public static ConsAltNode listAdd(ConsAltNode list, Node x) {
+ ConsAltNode n = newListNode(x, null);
+
+ if (list != null) {
+ while (list.cdr != null) {
+ list = list.cdr;
+ }
+ list.setCdr(n);
+ }
+ return n;
+ }
+
+ public void toListNode() {
+ type = LIST;
+ }
+
+ public void toAltNode() {
+ type = ALT;
+ }
+
+ @Override
+ public int getType() {
+ return type;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ car = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return car;
+ }
+
+ @Override
+ public void swap(Node with) {
+ if (cdr != null) {
+ cdr.parent = with;
+ if (with instanceof ConsAltNode) {
+ ConsAltNode withCan = (ConsAltNode)with;
+ withCan.cdr.parent = this;
+ ConsAltNode tmp = cdr;
+ cdr = withCan.cdr;
+ withCan.cdr = tmp;
+ }
+ }
+ super.swap(with);
+ }
+
+ @Override
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (!set.contains(this)) {
+ set.add(this);
+ if (car != null) {
+ if (car.parent != this) {
+ warnings.warn("broken list car: " + this.getAddressName() + " -> " + car.getAddressName());
+ }
+ car.verifyTree(set,warnings);
+ }
+ if (cdr != null) {
+ if (cdr.parent != this) {
+ warnings.warn("broken list cdr: " + this.getAddressName() + " -> " + cdr.getAddressName());
+ }
+ cdr.verifyTree(set,warnings);
+ }
+ }
+ }
+
+ public Node setCar(Node ca) {
+ car = ca;
+ ca.parent = this;
+ return car;
+ }
+
+ public ConsAltNode setCdr(ConsAltNode cd) {
+ cdr = cd;
+ cd.parent = this;
+ return cdr;
+ }
+
+ @Override
+ public String getName() {
+ switch (type) {
+ case ALT:
+ return "Alt";
+ case LIST:
+ return "List";
+ default:
+ throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
+ }
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n car: " + pad(car, level + 1));
+ value.append("\n cdr: " + (cdr == null ? "NULL" : cdr.toString()));
+
+ return value.toString();
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/EncloseNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,151 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.Config;
+import jdk.nashorn.internal.runtime.regexp.joni.Option;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
+
+public final class EncloseNode extends StateNode implements EncloseType {
+
+ public int type; // enclose type
+ public int regNum;
+ public int option;
+ public Node target; /* EncloseNode : ENCLOSE_MEMORY */
+ public int callAddr; // AbsAddrType
+ public int minLength; // OnigDistance
+ public int maxLength; // OnigDistance
+ public int charLength;
+ public int optCount; // referenced count in optimize_node_left()
+
+ // node_new_enclose / onig_node_new_enclose
+ public EncloseNode(int type) {
+ this.type = type;
+ callAddr = -1;
+ }
+
+ // node_new_enclose_memory
+ public EncloseNode(int option, boolean isNamed) {
+ this(MEMORY);
+ if (isNamed) setNamedGroup();
+ if (Config.USE_SUBEXP_CALL) this.option = option;
+ }
+
+ // node_new_option
+ public EncloseNode(int option, int i) {
+ this(OPTION);
+ this.option = option;
+ }
+
+ @Override
+ public int getType() {
+ return ENCLOSE;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ @Override
+ public String getName() {
+ return "Enclose";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n type: " + typeToString());
+ value.append("\n regNum: " + regNum);
+ value.append("\n option: " + Option.toString(option));
+ value.append("\n target: " + pad(target, level + 1));
+ value.append("\n callAddr: " + callAddr);
+ value.append("\n minLength: " + minLength);
+ value.append("\n maxLength: " + maxLength);
+ value.append("\n charLength: " + charLength);
+ value.append("\n optCount: " + optCount);
+
+ return value.toString();
+ }
+
+ public String typeToString() {
+ StringBuilder types = new StringBuilder();
+ if (isStopBacktrack()) types.append("STOP_BACKTRACK ");
+ if (isMemory()) types.append("MEMORY ");
+ if (isOption()) types.append("OPTION ");
+
+ return types.toString();
+ }
+
+ public void setEncloseStatus(int flag) {
+ state |= flag;
+ }
+
+ public void clearEncloseStatus(int flag) {
+ state &= ~flag;
+ }
+
+ public void clearMemory() {
+ type &= ~MEMORY;
+ }
+
+ public void setMemory() {
+ type |= MEMORY;
+ }
+
+ public boolean isMemory() {
+ return (type & MEMORY) != 0;
+ }
+
+ public void clearOption() {
+ type &= ~OPTION;
+ }
+
+ public void setOption() {
+ type |= OPTION;
+ }
+
+ public boolean isOption() {
+ return (type & OPTION) != 0;
+ }
+
+ public void clearStopBacktrack() {
+ type &= ~STOP_BACKTRACK;
+ }
+
+ public void setStopBacktrack() {
+ type |= STOP_BACKTRACK;
+ }
+
+ public boolean isStopBacktrack() {
+ return (type & STOP_BACKTRACK) != 0;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/Node.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,135 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import java.util.Set;
+
+import jdk.nashorn.internal.runtime.regexp.joni.Config;
+import jdk.nashorn.internal.runtime.regexp.joni.WarnCallback;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
+
+public abstract class Node implements NodeType {
+ public Node parent;
+
+ public abstract int getType();
+
+ public final int getType2Bit() {
+ return 1 << getType();
+ }
+
+ protected void setChild(Node tgt){} // default definition
+ protected Node getChild(){return null;}; // default definition
+
+ public void swap(Node with) {
+ Node tmp;
+
+ //if (getChild() != null) getChild().parent = with;
+ //if (with.getChild() != null) with.getChild().parent = this;
+
+ //tmp = getChild();
+ //setChild(with.getChild());
+ //with.setChild(tmp);
+
+ if (parent != null) parent.setChild(with);
+
+ if (with.parent != null) with.parent.setChild(this);
+
+ tmp = parent;
+ parent = with.parent;
+ with.parent = tmp;
+ }
+
+ // overridden by ConsAltNode and CallNode
+ public void verifyTree(Set<Node> set, WarnCallback warnings) {
+ if (!set.contains(this) && getChild() != null) {
+ set.add(this);
+ if (getChild().parent != this) {
+ warnings.warn("broken link to child: " + this.getAddressName() + " -> " + getChild().getAddressName());
+ }
+ getChild().verifyTree(set, warnings);
+ }
+ }
+
+ public abstract String getName();
+ protected abstract String toString(int level);
+
+ public String getAddressName() {
+ return getName() + ":0x" + Integer.toHexString(System.identityHashCode(this));
+ }
+
+ public final String toString() {
+ StringBuilder s = new StringBuilder();
+ s.append("<" + getAddressName() + " (" + (parent == null ? "NULL" : parent.getAddressName()) + ")>");
+ return s + toString(0);
+ }
+
+ protected static String pad(Object value, int level) {
+ if (value == null) return "NULL";
+
+ StringBuilder pad = new StringBuilder(" ");
+ for (int i=0; i<level; i++) pad.append(pad);
+
+ return value.toString().replace("\n", "\n" + pad);
+ }
+
+ public final boolean isInvalidQuantifier() {
+ if (!Config.VANILLA) return false;
+
+ ConsAltNode node;
+
+ switch(getType()) {
+
+ case ANCHOR:
+ return true;
+
+ case ENCLOSE:
+ /* allow enclosed elements */
+ /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
+ break;
+
+ case LIST:
+ node = (ConsAltNode)this;
+ do {
+ if (!node.car.isInvalidQuantifier()) return false;
+ } while ((node = node.cdr) != null);
+ return false;
+
+ case ALT:
+ node = (ConsAltNode)this;
+ do {
+ if (node.car.isInvalidQuantifier()) return true;
+ } while ((node = node.cdr) != null);
+ break;
+
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ public final boolean isAllowedInLookBehind() {
+ return (getType2Bit() & ALLOWED_IN_LB) != 0;
+ }
+
+ public final boolean isSimple() {
+ return (getType2Bit() & SIMPLE) != 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/QuantifierNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,272 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.Config;
+import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
+
+public final class QuantifierNode extends StateNode {
+
+ public Node target;
+ public int lower;
+ public int upper;
+ public boolean greedy;
+
+ public int targetEmptyInfo;
+
+ public Node headExact;
+ public Node nextHeadExact;
+ public boolean isRefered; /* include called node. don't eliminate even if {0} */
+
+ // USE_COMBINATION_EXPLOSION_CHECK
+ public int combExpCheckNum; /* 1,2,3...: check, 0: no check */
+
+ public QuantifierNode(int lower, int upper, boolean byNumber) {
+ this.lower = lower;
+ this.upper = upper;
+ greedy = true;
+ targetEmptyInfo = TargetInfo.ISNOT_EMPTY;
+
+ if (byNumber) setByNumber();
+ }
+
+ @Override
+ public int getType() {
+ return QTFR;
+ }
+
+ @Override
+ protected void setChild(Node newChild) {
+ target = newChild;
+ }
+
+ @Override
+ protected Node getChild() {
+ return target;
+ }
+
+ public void setTarget(Node tgt) {
+ target = tgt;
+ tgt.parent = this;
+ }
+
+ public StringNode convertToString(int flag) {
+ StringNode sn = new StringNode();
+ sn.flag = flag;
+ sn.swap(this);
+ return sn;
+ }
+
+ @Override
+ public String getName() {
+ return "Quantifier";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder(super.toString(level));
+ value.append("\n target: " + pad(target, level + 1));
+ value.append("\n lower: " + lower);
+ value.append("\n upper: " + upper);
+ value.append("\n greedy: " + greedy);
+ value.append("\n targetEmptyInfo: " + targetEmptyInfo);
+ value.append("\n headExact: " + pad(headExact, level + 1));
+ value.append("\n nextHeadExact: " + pad(nextHeadExact, level + 1));
+ value.append("\n isRefered: " + isRefered);
+ value.append("\n combExpCheckNum: " + combExpCheckNum);
+
+ return value.toString();
+ }
+
+ public boolean isAnyCharStar() {
+ return greedy && isRepeatInfinite(upper) && target.getType() == CANY;
+ }
+
+ /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
+ protected int popularNum() {
+ if (greedy) {
+ if (lower == 0) {
+ if (upper == 1) return 0;
+ else if (isRepeatInfinite(upper)) return 1;
+ } else if (lower == 1) {
+ if (isRepeatInfinite(upper)) return 2;
+ }
+ } else {
+ if (lower == 0) {
+ if (upper == 1) return 3;
+ else if (isRepeatInfinite(upper)) return 4;
+ } else if (lower == 1) {
+ if (isRepeatInfinite(upper)) return 5;
+ }
+ }
+ return -1;
+ }
+
+ protected void set(QuantifierNode other) {
+ setTarget(other.target);
+ other.target = null;
+ lower = other.lower;
+ upper = other.upper;
+ greedy = other.greedy;
+ targetEmptyInfo = other.targetEmptyInfo;
+
+ //setHeadExact(other.headExact);
+ //setNextHeadExact(other.nextHeadExact);
+ headExact = other.headExact;
+ nextHeadExact = other.nextHeadExact;
+ isRefered = other.isRefered;
+ combExpCheckNum = other.combExpCheckNum;
+ }
+
+ public void reduceNestedQuantifier(QuantifierNode other) {
+ int pnum = popularNum();
+ int cnum = other.popularNum();
+
+ if (pnum < 0 || cnum < 0) return;
+
+ switch(Reduce.REDUCE_TABLE[cnum][pnum]) {
+ case DEL:
+ // no need to set the parent here...
+ // swap ?
+ set(other); // *pnode = *cnode; ???
+ break;
+
+ case A:
+ setTarget(other.target);
+ lower = 0;
+ upper = REPEAT_INFINITE;
+ greedy = true;
+ break;
+
+ case AQ:
+ setTarget(other.target);
+ lower = 0;
+ upper = REPEAT_INFINITE;
+ greedy = false;
+ break;
+
+ case QQ:
+ setTarget(other.target);
+ lower = 0;
+ upper = 1;
+ greedy = false;
+ break;
+
+ case P_QQ:
+ setTarget(other);
+ lower = 0;
+ upper = 1;
+ greedy = false;
+ other.lower = 1;
+ other.upper = REPEAT_INFINITE;
+ other.greedy = true;
+ return;
+
+ case PQ_Q:
+ setTarget(other);
+ lower = 0;
+ upper = 1;
+ greedy = true;
+ other.lower = 1;
+ other.upper = REPEAT_INFINITE;
+ other.greedy = false;
+ return;
+
+ case ASIS:
+ setTarget(other);
+ return;
+ }
+ // ??? remove the parent from target ???
+ other.target = null; // remove target from reduced quantifier
+ }
+
+ public int setQuantifier(Node tgt, boolean group, ScanEnvironment env, char[] chars, int p, int end) {
+ if (lower == 1 && upper == 1) return 1;
+
+ switch(tgt.getType()) {
+
+ case STR:
+ if (!group) {
+ StringNode sn = (StringNode)tgt;
+ if (sn.canBeSplit()) {
+ StringNode n = sn.splitLastChar();
+ if (n != null) {
+ setTarget(n);
+ return 2;
+ }
+ }
+ }
+ break;
+
+ case QTFR:
+ /* check redundant double repeat. */
+ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
+ QuantifierNode qnt = (QuantifierNode)tgt;
+ int nestQNum = popularNum();
+ int targetQNum = qnt.popularNum();
+
+ if (Config.USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR) {
+ if (!isByNumber() && !qnt.isByNumber() && env.syntax.warnReduntantNestedRepeat()) {
+ switch(Reduce.REDUCE_TABLE[targetQNum][nestQNum]) {
+ case ASIS:
+ break;
+
+ case DEL:
+ env.reg.warnings.warn(new String(chars, p, end) +
+ " redundant nested repeat operator");
+ break;
+
+ default:
+ env.reg.warnings.warn(new String(chars, p, end) +
+ " nested repeat operator " + Reduce.PopularQStr[targetQNum] +
+ " and " + Reduce.PopularQStr[nestQNum] + " was replaced with '" +
+ Reduce.ReduceQStr[Reduce.REDUCE_TABLE[targetQNum][nestQNum].ordinal()] + "'");
+ }
+ }
+ } // USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+
+ if (targetQNum >= 0) {
+ if (nestQNum >= 0) {
+ reduceNestedQuantifier(qnt);
+ return 0;
+ } else if (targetQNum == 1 || targetQNum == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (!isRepeatInfinite(upper) && upper > 1 && greedy) {
+ upper = lower == 0 ? 1 : lower;
+ }
+ }
+ }
+
+ default:
+ break;
+ }
+
+ setTarget(tgt);
+ return 0;
+ }
+
+ public static final int REPEAT_INFINITE = -1;
+ public static boolean isRepeatInfinite(int n) {
+ return n == REPEAT_INFINITE;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/StateNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,232 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeStatus;
+
+public abstract class StateNode extends Node implements NodeStatus {
+ protected int state;
+
+ @Override
+ public String toString(int level) {
+ return "\n state: " + stateToString();
+ }
+
+ public String stateToString() {
+ StringBuilder states = new StringBuilder();
+ if (isMinFixed()) states.append("MIN_FIXED ");
+ if (isMaxFixed()) states.append("MAX_FIXED ");
+ if (isMark1()) states.append("MARK1 ");
+ if (isMark2()) states.append("MARK2 ");
+ if (isMemBackrefed()) states.append("MEM_BACKREFED ");
+ if (isStopBtSimpleRepeat()) states.append("STOP_BT_SIMPLE_REPEAT ");
+ if (isRecursion()) states.append("RECURSION ");
+ if (isCalled()) states.append("CALLED ");
+ if (isAddrFixed()) states.append("ADDR_FIXED ");
+ if (isNamedGroup()) states.append("NAMED_GROUP ");
+ if (isNameRef()) states.append("NAME_REF ");
+ if (isInRepeat()) states.append("IN_REPEAT ");
+ if (isNestLevel()) states.append("NEST_LEVEL ");
+ if (isByNumber()) states.append("BY_NUMBER ");
+
+ return states.toString();
+ }
+
+ public boolean isMinFixed() {
+ return (state & NST_MIN_FIXED) != 0;
+ }
+
+ public void setMinFixed() {
+ state |= NST_MIN_FIXED;
+ }
+
+ public void clearMinFixed() {
+ state &= ~NST_MIN_FIXED;
+ }
+
+ public boolean isMaxFixed() {
+ return (state & NST_MAX_FIXED) != 0;
+ }
+
+ public void setMaxFixed() {
+ state |= NST_MAX_FIXED;
+ }
+
+ public void clearMaxFixed() {
+ state &= ~NST_MAX_FIXED;
+ }
+
+ public boolean isCLenFixed() {
+ return (state & NST_CLEN_FIXED) != 0;
+ }
+
+ public void setCLenFixed() {
+ state |= NST_CLEN_FIXED;
+ }
+
+ public void clearCLenFixed() {
+ state &= ~NST_CLEN_FIXED;
+ }
+
+ public boolean isMark1() {
+ return (state & NST_MARK1) != 0;
+ }
+
+ public void setMark1() {
+ state |= NST_MARK1;
+ }
+
+ public void clearMark1() {
+ state &= ~NST_MARK1;
+ }
+
+ public boolean isMark2() {
+ return (state & NST_MARK2) != 0;
+ }
+
+ public void setMark2() {
+ state |= NST_MARK2;
+ }
+
+ public void clearMark2() {
+ state &= ~NST_MARK2;
+ }
+
+ public boolean isMemBackrefed() {
+ return (state & NST_MEM_BACKREFED) != 0;
+ }
+
+ public void setMemBackrefed() {
+ state |= NST_MEM_BACKREFED;
+ }
+
+ public void clearMemBackrefed() {
+ state &= ~NST_MEM_BACKREFED;
+ }
+
+ public boolean isStopBtSimpleRepeat() {
+ return (state & NST_STOP_BT_SIMPLE_REPEAT) != 0;
+ }
+
+ public void setStopBtSimpleRepeat() {
+ state |= NST_STOP_BT_SIMPLE_REPEAT;
+ }
+
+ public void clearStopBtSimpleRepeat() {
+ state &= ~NST_STOP_BT_SIMPLE_REPEAT;
+ }
+
+ public boolean isRecursion() {
+ return (state & NST_RECURSION) != 0;
+ }
+
+ public void setRecursion() {
+ state |= NST_RECURSION;
+ }
+
+ public void clearRecursion() {
+ state &= ~NST_RECURSION;
+ }
+
+ public boolean isCalled() {
+ return (state & NST_CALLED) != 0;
+ }
+
+ public void setCalled() {
+ state |= NST_CALLED;
+ }
+
+ public void clearCAlled() {
+ state &= ~NST_CALLED;
+ }
+
+ public boolean isAddrFixed() {
+ return (state & NST_ADDR_FIXED) != 0;
+ }
+
+ public void setAddrFixed() {
+ state |= NST_ADDR_FIXED;
+ }
+
+ public void clearAddrFixed() {
+ state &= ~NST_ADDR_FIXED;
+ }
+
+ public boolean isNamedGroup() {
+ return (state & NST_NAMED_GROUP) != 0;
+ }
+
+ public void setNamedGroup() {
+ state |= NST_NAMED_GROUP;
+ }
+
+ public void clearNamedGroup() {
+ state &= ~NST_NAMED_GROUP;
+ }
+
+ public boolean isNameRef() {
+ return (state & NST_NAME_REF) != 0;
+ }
+
+ public void setNameRef() {
+ state |= NST_NAME_REF;
+ }
+
+ public void clearNameRef() {
+ state &= ~NST_NAME_REF;
+ }
+
+ public boolean isInRepeat() {
+ return (state & NST_IN_REPEAT) != 0;
+ }
+
+ public void setInRepeat() {
+ state |= NST_IN_REPEAT;
+ }
+
+ public void clearInRepeat() {
+ state &= ~NST_IN_REPEAT;
+ }
+
+ public boolean isNestLevel() {
+ return (state & NST_NEST_LEVEL) != 0;
+ }
+
+ public void setNestLevel() {
+ state |= NST_NEST_LEVEL;
+ }
+
+ public void clearNestLevel() {
+ state &= ~NST_NEST_LEVEL;
+ }
+
+ public boolean isByNumber() {
+ return (state & NST_BY_NUMBER) != 0;
+ }
+
+ public void setByNumber() {
+ state |= NST_BY_NUMBER;
+ }
+
+ public void clearByNumber() {
+ state &= ~NST_BY_NUMBER;
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ast/StringNode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,207 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.ast;
+
+import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper;
+import jdk.nashorn.internal.runtime.regexp.joni.constants.StringType;
+
+public final class StringNode extends Node implements StringType {
+
+ private static final int NODE_STR_MARGIN = 16;
+ private static final int NODE_STR_BUF_SIZE = 24;
+ public static final StringNode EMPTY = new StringNode(null, Integer.MAX_VALUE, Integer.MAX_VALUE);
+
+ public char[] chars;
+ public int p;
+ public int end;
+
+ public int flag;
+
+ public StringNode() {
+ this.chars = new char[NODE_STR_BUF_SIZE];
+ }
+
+ public StringNode(char[] chars, int p, int end) {
+ this.chars = chars;
+ this.p = p;
+ this.end = end;
+ setShared();
+ }
+
+ public StringNode(char c) {
+ this();
+ chars[end++] = c;
+ }
+
+ /* Ensure there is ahead bytes available in node's buffer
+ * (assumes that the node is not shared)
+ */
+ public void ensure(int ahead) {
+ int len = (end - p) + ahead;
+ if (len >= chars.length) {
+ char[] tmp = new char[len + NODE_STR_MARGIN];
+ System.arraycopy(chars, p, tmp, 0, end - p);
+ chars = tmp;
+ }
+ }
+
+ /* COW and/or ensure there is ahead bytes available in node's buffer
+ */
+ private void modifyEnsure(int ahead) {
+ if (isShared()) {
+ int len = (end - p) + ahead;
+ char[] tmp = new char[len + NODE_STR_MARGIN];
+ System.arraycopy(chars, p, tmp, 0, end - p);
+ chars = tmp;
+ end = end - p;
+ p = 0;
+ clearShared();
+ } else {
+ ensure(ahead);
+ }
+ }
+
+ @Override
+ public int getType() {
+ return STR;
+ }
+
+ @Override
+ public String getName() {
+ return "String";
+ }
+
+ @Override
+ public String toString(int level) {
+ StringBuilder value = new StringBuilder();
+ value.append("\n bytes: '");
+ for (int i=p; i<end; i++) {
+ if (chars[i] >= 0x20 && chars[i] < 0x7f) {
+ value.append(chars[i]);
+ } else {
+ value.append(String.format("[0x%04x]", chars[i]));
+ }
+ }
+ value.append("'");
+ return value.toString();
+ }
+
+ public int length() {
+ return end - p;
+ }
+
+ public StringNode splitLastChar() {
+ StringNode n = null;
+
+ if (end > p) {
+ int prev = EncodingHelper.prevCharHead(p, end);
+ if (prev != -1 && prev > p) { /* can be splitted. */
+ n = new StringNode(chars, prev, end);
+ if (isRaw()) n.setRaw();
+ end = prev;
+ }
+ }
+ return n;
+ }
+
+ public boolean canBeSplit() {
+ if (end > p) {
+ return 1 < (end - p);
+ }
+ return false;
+ }
+
+ public void set(char[] chars, int p, int end) {
+ this.chars = chars;
+ this.p = p;
+ this.end = end;
+ setShared();
+ }
+
+ public void cat(char[] cat, int catP, int catEnd) {
+ int len = catEnd - catP;
+ modifyEnsure(len);
+ System.arraycopy(cat, catP, chars, end, len);
+ end += len;
+ }
+
+ public void cat(char c) {
+ modifyEnsure(1);
+ chars[end++] = c;
+ }
+
+ public void catCode(int code) {
+ cat((char)code);
+ }
+
+ public void clear() {
+ if (chars.length > NODE_STR_BUF_SIZE) chars = new char[NODE_STR_BUF_SIZE];
+ flag = 0;
+ p = end = 0;
+ }
+
+ public void setRaw() {
+ flag |= NSTR_RAW;
+ }
+
+ public void clearRaw() {
+ flag &= ~NSTR_RAW;
+ }
+
+ public boolean isRaw() {
+ return (flag & NSTR_RAW) != 0;
+ }
+
+ public void setAmbig() {
+ flag |= NSTR_AMBIG;
+ }
+
+ public void clearAmbig() {
+ flag &= ~NSTR_AMBIG;
+ }
+
+ public boolean isAmbig() {
+ return (flag & NSTR_AMBIG) != 0;
+ }
+
+ public void setDontGetOptInfo() {
+ flag |= NSTR_DONT_GET_OPT_INFO;
+ }
+
+ public void clearDontGetOptInfo() {
+ flag &= ~NSTR_DONT_GET_OPT_INFO;
+ }
+
+ public boolean isDontGetOptInfo() {
+ return (flag & NSTR_DONT_GET_OPT_INFO) != 0;
+ }
+
+ public void setShared() {
+ flag |= NSTR_SHARED;
+ }
+
+ public void clearShared() {
+ flag &= ~NSTR_SHARED;
+ }
+
+ public boolean isShared() {
+ return (flag & NSTR_SHARED) != 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/bench/AbstractBench.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,49 @@
+package jdk.nashorn.internal.runtime.regexp.joni.bench;
+
+import jdk.nashorn.internal.runtime.regexp.joni.Option;
+import jdk.nashorn.internal.runtime.regexp.joni.Regex;
+import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
+
+public abstract class AbstractBench {
+ protected void bench(String _reg, String _str, int warmup, int times) throws Exception {
+ char[] reg = _reg.toCharArray();
+ char[] str = _str.toCharArray();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,Syntax.DEFAULT);
+
+ System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times");
+
+ for(int j=0;j<warmup;j++) {
+ long before = System.currentTimeMillis();
+ for(int i = 0; i < times; i++) {
+ p.matcher(str, 0, str.length).search(0, str.length, Option.NONE);
+ }
+ long time = System.currentTimeMillis() - before;
+ System.err.println(": " + time + "ms");
+ }
+ }
+
+ protected void benchBestOf(String _reg, String _str, int warmup, int times) throws Exception {
+ char[] reg = _reg.toCharArray();
+ char[] str = _str.toCharArray();
+
+ Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,Syntax.DEFAULT);
+
+ System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times");
+
+ long best = Long.MAX_VALUE;
+
+ for(int j=0;j<warmup;j++) {
+ long before = System.currentTimeMillis();
+ for(int i = 0; i < times; i++) {
+ p.matcher(str, 0, str.length).search(0, str.length, Option.NONE);
+ }
+ long time = System.currentTimeMillis() - before;
+ if(time < best) {
+ best = time;
+ }
+ System.err.print(".");
+ }
+ System.err.println(": " + best + "ms");
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/bench/BenchGreedyBacktrack.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,7 @@
+package jdk.nashorn.internal.runtime.regexp.joni.bench;
+
+public class BenchGreedyBacktrack extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ new BenchGreedyBacktrack().bench(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,1000000);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/bench/BenchRailsRegs.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,31 @@
+package jdk.nashorn.internal.runtime.regexp.joni.bench;
+
+public class BenchRailsRegs extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ final String[][] regexps = {{"a.*?[b-z]{2,4}aaaaaa","afdgdsgderaabxxaaaaaaaaaaaaaaaaaaaaaaaa"},
+ {"://","/shop/viewCategory.shtml?category=DOGS"},
+ {"^\\w+\\://[^/]+(/.*|$)$","/shop/viewCategory.shtml?category=DOGS"},
+ {"\\A/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/signonForm\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/newAccountForm\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/newAccount\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/viewCart\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/index\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A/shop/viewCategory\\.shtml/?\\Z","/shop/viewCategory.shtml"},
+ {"\\A(?:::)?([A-Z]\\w*(?:::[A-Z]\\w*)*)\\z","CategoriesController"},
+ {"\\Ainsert","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"},
+ {"\\A\\(?\\s*(select|show)","SELECT * FROM sessions WHERE (session_id = '1b341ffe23b5298676d535fcabd3d0d7') LIMIT 1"},
+ {".*?\n","1b341ffe23b5298676d535fcabd3d0d7"},
+ {"^find_(all_by|by)_([_a-zA-Z]\\w*)$","find_by_string_id"},
+ {"\\.rjs$","categories/show.rhtml"},
+ {"^[-a-z]+://","petstore.css"},
+ {"^get$",""},
+ {"^post$",""},
+ {"^[^:]+","www.example.com"},
+ {"(=|\\?|_before_type_cast)$", "updated_on"},
+ {"^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/"}};
+ for(String[] reg : regexps) {
+ new BenchRailsRegs().benchBestOf(reg[0],reg[1],10,1000000);
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/bench/BenchSeveralRegexps.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,17 @@
+package jdk.nashorn.internal.runtime.regexp.joni.bench;
+
+public class BenchSeveralRegexps extends AbstractBench {
+ public static void main(String[] args) throws Exception {
+ int BASE = 1000000;
+
+ new BenchSeveralRegexps().benchBestOf("a"," a",10,4*BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*?=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE);
+
+ new BenchSeveralRegexps().benchBestOf("^(.*?)=(.*?);","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*_p","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE);
+
+ new BenchSeveralRegexps().benchBestOf(".*=","_petstore_session_id=1b341ffe23b5298676d535fcabd3d0d7; path=/",10,4*BASE);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/AnchorType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,58 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface AnchorType {
+ final int BEGIN_BUF = (1<<0);
+ final int BEGIN_LINE = (1<<1);
+ final int BEGIN_POSITION = (1<<2);
+ final int END_BUF = (1<<3);
+ final int SEMI_END_BUF = (1<<4);
+ final int END_LINE = (1<<5);
+
+ final int WORD_BOUND = (1<<6);
+ final int NOT_WORD_BOUND = (1<<7);
+ final int WORD_BEGIN = (1<<8);
+ final int WORD_END = (1<<9);
+ final int PREC_READ = (1<<10);
+ final int PREC_READ_NOT = (1<<11);
+ final int LOOK_BEHIND = (1<<12);
+ final int LOOK_BEHIND_NOT = (1<<13);
+
+ final int ANYCHAR_STAR = (1<<14); /* ".*" optimize info */
+ final int ANYCHAR_STAR_ML = (1<<15); /* ".*" optimize info (multi-line) */
+
+ final int ANYCHAR_STAR_MASK = (ANYCHAR_STAR | ANYCHAR_STAR_ML);
+ final int END_BUF_MASK = (END_BUF | SEMI_END_BUF);
+
+ final int ALLOWED_IN_LB = ( LOOK_BEHIND |
+ BEGIN_LINE |
+ END_LINE |
+ BEGIN_BUF |
+ BEGIN_POSITION );
+
+ final int ALLOWED_IN_LB_NOT = ( LOOK_BEHIND |
+ LOOK_BEHIND_NOT |
+ BEGIN_LINE |
+ END_LINE |
+ BEGIN_BUF |
+ BEGIN_POSITION );
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/Arguments.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface Arguments {
+ final int SPECIAL = -1;
+ final int NON = 0;
+ final int RELADDR = 1;
+ final int ABSADDR = 2;
+ final int LENGTH = 3;
+ final int MEMNUM = 4;
+ final int OPTION = 5;
+ final int STATE_CHECK = 6;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/AsmConstants.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,49 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface AsmConstants {
+ final int THIS = 0;
+
+ // argument indexes
+ final int RANGE = 1;
+ final int SSTART = 2;
+ final int SPREV = 3;
+
+ // local var indexes
+ final int S = 4; // current index
+ final int BYTES = 5; // string
+ final int LAST_INDEX = BYTES + 1;
+
+ // frequently used field names (all ints)
+ final String STR = "str";
+ final String END = "end";
+ final String MSA_START = "msaStart";
+ final String MSA_OPTONS = "msaOptions";
+ final String MSA_BEST_LEN = "msaBestLen";
+ final String MSA_BEST_S = "msaBestS";
+ final String MSA_BEGIN = "msaBegin";
+ final String MSA_END = "msaEnd";
+
+ // generated field names
+ final String BITSET = "bitset";
+ final String CODERANGE = "range";
+ final String TEMPLATE = "template";
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/CCSTATE.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public enum CCSTATE {
+ VALUE,
+ RANGE,
+ COMPLETE,
+ START
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/CCVALTYPE.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public enum CCVALTYPE {
+ SB,
+ CODE_POINT,
+ CLASS
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/EncloseType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,29 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface EncloseType {
+ final int MEMORY = 1<<0;
+ final int OPTION = 1<<1;
+ final int STOP_BACKTRACK = 1<<2;
+
+ final int ALLOWED_IN_LB = MEMORY;
+ final int ALLOWED_IN_LB_NOT = 0;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/MetaChar.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,31 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface MetaChar {
+ final int ESCAPE = 0;
+ final int ANYCHAR = 1;
+ final int ANYTIME = 2;
+ final int ZERO_OR_ONE_TIME = 3;
+ final int ONE_OR_MORE_TIME = 4;
+ final int ANYCHAR_ANYTIME = 5;
+
+ final int INEFFECTIVE_META_CHAR = 0;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/NodeStatus.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,39 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface NodeStatus {
+ /* status bits */
+ final int NST_MIN_FIXED = (1<<0);
+ final int NST_MAX_FIXED = (1<<1);
+ final int NST_CLEN_FIXED = (1<<2);
+ final int NST_MARK1 = (1<<3);
+ final int NST_MARK2 = (1<<4);
+ final int NST_MEM_BACKREFED = (1<<5);
+ final int NST_STOP_BT_SIMPLE_REPEAT= (1<<6);
+ final int NST_RECURSION = (1<<7);
+ final int NST_CALLED = (1<<8);
+ final int NST_ADDR_FIXED = (1<<9);
+ final int NST_NAMED_GROUP = (1<<10);
+ final int NST_NAME_REF = (1<<11);
+ final int NST_IN_REPEAT = (1<<12); /* STK_REPEAT is nested in stack. */
+ final int NST_NEST_LEVEL = (1<<13);
+ final int NST_BY_NUMBER = (1<<14); /* {n,m} */
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/NodeType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,66 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface NodeType {
+ /* node type */
+ final int STR = 0;
+ final int CCLASS = 1;
+ final int CTYPE = 2;
+ final int CANY = 3;
+ final int BREF = 4;
+ final int QTFR = 5;
+ final int ENCLOSE = 6;
+ final int ANCHOR = 7;
+ final int LIST = 8;
+ final int ALT = 9;
+ final int CALL = 10;
+
+ final int BIT_STR = 1 << STR;
+ final int BIT_CCLASS = 1 << CCLASS;
+ final int BIT_CTYPE = 1 << CTYPE;
+ final int BIT_CANY = 1 << CANY;
+ final int BIT_BREF = 1 << BREF;
+ final int BIT_QTFR = 1 << QTFR;
+ final int BIT_ENCLOSE = 1 << ENCLOSE;
+ final int BIT_ANCHOR = 1 << ANCHOR;
+ final int BIT_LIST = 1 << LIST;
+ final int BIT_ALT = 1 << ALT;
+ final int BIT_CALL = 1 << CALL;
+
+ /* allowed node types in look-behind */
+ final int ALLOWED_IN_LB = ( BIT_LIST |
+ BIT_ALT |
+ BIT_STR |
+ BIT_CCLASS |
+ BIT_CTYPE |
+ BIT_CANY |
+ BIT_ANCHOR |
+ BIT_ENCLOSE |
+ BIT_QTFR |
+ BIT_CALL );
+
+ final int SIMPLE = ( BIT_STR |
+ BIT_CCLASS |
+ BIT_CTYPE |
+ BIT_CANY |
+ BIT_BREF);
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/OPCode.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,387 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+import jdk.nashorn.internal.runtime.regexp.joni.Config;
+
+public interface OPCode {
+ final int FINISH = 0; /* matching process terminator (no more alternative) */
+ final int END = 1; /* pattern code terminator (success end) */
+
+ final int EXACT1 = 2; /* single byte, N = 1 */
+ final int EXACT2 = 3; /* single byte, N = 2 */
+ final int EXACT3 = 4; /* single byte, N = 3 */
+ final int EXACT4 = 5; /* single byte, N = 4 */
+ final int EXACT5 = 6; /* single byte, N = 5 */
+ final int EXACTN = 7; /* single byte */
+ final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */
+ final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */
+ final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */
+ final int EXACTMB2N = 11; /* mb-length = 2 */
+ final int EXACTMB3N = 12; /* mb-length = 3 */
+ final int EXACTMBN = 13; /* other length */
+
+ final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC = 15; /* single byte, ignore case */
+
+ final int CCLASS = 16;
+ final int CCLASS_MB = 17;
+ final int CCLASS_MIX = 18;
+ final int CCLASS_NOT = 19;
+ final int CCLASS_MB_NOT = 20;
+ final int CCLASS_MIX_NOT = 21;
+ final int CCLASS_NODE = 22; /* pointer to CClassNode node */
+
+ final int ANYCHAR = 23; /* "." */
+ final int ANYCHAR_ML = 24; /* "." multi-line */
+ final int ANYCHAR_STAR = 25; /* ".*" */
+ final int ANYCHAR_ML_STAR = 26; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT = 27;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT = 28;
+
+ final int WORD = 29;
+ final int NOT_WORD = 30;
+ final int WORD_BOUND = 31;
+ final int NOT_WORD_BOUND = 32;
+ final int WORD_BEGIN = 33;
+ final int WORD_END = 34;
+
+ final int BEGIN_BUF = 35;
+ final int END_BUF = 36;
+ final int BEGIN_LINE = 37;
+ final int END_LINE = 38;
+ final int SEMI_END_BUF = 39;
+ final int BEGIN_POSITION = 40;
+
+ final int BACKREF1 = 41;
+ final int BACKREF2 = 42;
+ final int BACKREFN = 43;
+ final int BACKREFN_IC = 44;
+ final int BACKREF_MULTI = 45;
+ final int BACKREF_MULTI_IC = 46;
+ final int BACKREF_WITH_LEVEL = 47; /* \k<xxx+n>, \k<xxx-n> */
+
+ final int MEMORY_START = 48;
+ final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */
+ final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */
+ final int MEMORY_END = 52;
+ final int MEMORY_END_REC = 53; /* push marker to stack */
+
+ final int FAIL = 54; /* pop stack and move */
+ final int JUMP = 55;
+ final int PUSH = 56;
+ final int POP = 57;
+ final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */
+ final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */
+
+ final int REPEAT = 60; /* {n,m} */
+ final int REPEAT_NG = 61; /* {n,m}? (non greedy) */
+ final int REPEAT_INC = 62;
+ final int REPEAT_INC_NG = 63; /* non greedy */
+ final int REPEAT_INC_SG = 64; /* search and get in stack */
+ final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */
+
+ final int NULL_CHECK_START = 66; /* null loop checker start */
+ final int NULL_CHECK_END = 67; /* null loop checker end */
+ final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */
+ final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */
+
+ final int PUSH_POS = 70; /* (?=...) start */
+ final int POP_POS = 71; /* (?=...) end */
+ final int PUSH_POS_NOT = 72; /* (?!...) start */
+ final int FAIL_POS = 73; /* (?!...) end */
+ final int PUSH_STOP_BT = 74; /* (?>...) start */
+ final int POP_STOP_BT = 75; /* (?>...) end */
+ final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */
+ final int PUSH_LOOK_BEHIND_NOT = 77; /* (?<!...) start */
+ final int FAIL_LOOK_BEHIND_NOT = 78; /* (?<!...) end */
+
+ final int CALL = 79; /* \g<name> */
+ final int RETURN = 80;
+
+ final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */
+ final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */
+ final int STATE_CHECK = 83; /* check only */
+ final int STATE_CHECK_ANYCHAR_STAR = 84;
+ final int STATE_CHECK_ANYCHAR_ML_STAR = 85;
+
+ /* no need: IS_DYNAMIC_OPTION() == 0 */
+ final int SET_OPTION_PUSH = 86; /* set option and push recover option */
+ final int SET_OPTION = 87; /* set option */
+
+ // single byte versions
+ final int ANYCHAR_SB = 88; /* "." */
+ final int ANYCHAR_ML_SB = 89; /* "." multi-line */
+ final int ANYCHAR_STAR_SB = 90; /* ".*" */
+ final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */
+ final int ANYCHAR_STAR_PEEK_NEXT_SB = 92;
+ final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93;
+ final int STATE_CHECK_ANYCHAR_STAR_SB = 94;
+ final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95;
+
+ final int CCLASS_SB = 96;
+ final int CCLASS_NOT_SB = 97;
+ final int WORD_SB = 98;
+ final int NOT_WORD_SB = 99;
+ final int WORD_BOUND_SB = 100;
+ final int NOT_WORD_BOUND_SB = 101;
+ final int WORD_BEGIN_SB = 102;
+ final int WORD_END_SB = 103;
+
+ final int LOOK_BEHIND_SB = 104;
+
+ final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */
+ final int EXACTN_IC_SB = 106; /* single byte, ignore case */
+
+
+ public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] {
+ "finish", /*OP_FINISH*/
+ "end", /*OP_END*/
+ "exact1", /*OP_EXACT1*/
+ "exact2", /*OP_EXACT2*/
+ "exact3", /*OP_EXACT3*/
+ "exact4", /*OP_EXACT4*/
+ "exact5", /*OP_EXACT5*/
+ "exactn", /*OP_EXACTN*/
+ "exactmb2-n1", /*OP_EXACTMB2N1*/
+ "exactmb2-n2", /*OP_EXACTMB2N2*/
+ "exactmb2-n3", /*OP_EXACTMB2N3*/
+ "exactmb2-n", /*OP_EXACTMB2N*/
+ "exactmb3n", /*OP_EXACTMB3N*/
+ "exactmbn", /*OP_EXACTMBN*/
+ "exact1-ic", /*OP_EXACT1_IC*/
+ "exactn-ic", /*OP_EXACTN_IC*/
+ "cclass", /*OP_CCLASS*/
+ "cclass-mb", /*OP_CCLASS_MB*/
+ "cclass-mix", /*OP_CCLASS_MIX*/
+ "cclass-not", /*OP_CCLASS_NOT*/
+ "cclass-mb-not", /*OP_CCLASS_MB_NOT*/
+ "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/
+ "cclass-node", /*OP_CCLASS_NODE*/
+ "anychar", /*OP_ANYCHAR*/
+ "anychar-ml", /*OP_ANYCHAR_ML*/
+ "anychar*", /*OP_ANYCHAR_STAR*/
+ "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/
+ "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ "word", /*OP_WORD*/
+ "not-word", /*OP_NOT_WORD*/
+ "word-bound", /*OP_WORD_BOUND*/
+ "not-word-bound", /*OP_NOT_WORD_BOUND*/
+ "word-begin", /*OP_WORD_BEGIN*/
+ "word-end", /*OP_WORD_END*/
+ "begin-buf", /*OP_BEGIN_BUF*/
+ "end-buf", /*OP_END_BUF*/
+ "begin-line", /*OP_BEGIN_LINE*/
+ "end-line", /*OP_END_LINE*/
+ "semi-end-buf", /*OP_SEMI_END_BUF*/
+ "begin-position", /*OP_BEGIN_POSITION*/
+ "backref1", /*OP_BACKREF1*/
+ "backref2", /*OP_BACKREF2*/
+ "backrefn", /*OP_BACKREFN*/
+ "backrefn-ic", /*OP_BACKREFN_IC*/
+ "backref_multi", /*OP_BACKREF_MULTI*/
+ "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/
+ "backref_at_level", /*OP_BACKREF_AT_LEVEL*/
+ "mem-start", /*OP_MEMORY_START*/
+ "mem-start-push", /*OP_MEMORY_START_PUSH*/
+ "mem-end-push", /*OP_MEMORY_END_PUSH*/
+ "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
+ "mem-end", /*OP_MEMORY_END*/
+ "mem-end-rec", /*OP_MEMORY_END_REC*/
+ "fail", /*OP_FAIL*/
+ "jump", /*OP_JUMP*/
+ "push", /*OP_PUSH*/
+ "pop", /*OP_POP*/
+ "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/
+ "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/
+ "repeat", /*OP_REPEAT*/
+ "repeat-ng", /*OP_REPEAT_NG*/
+ "repeat-inc", /*OP_REPEAT_INC*/
+ "repeat-inc-ng", /*OP_REPEAT_INC_NG*/
+ "repeat-inc-sg", /*OP_REPEAT_INC_SG*/
+ "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/
+ "null-check-start", /*OP_NULL_CHECK_START*/
+ "null-check-end", /*OP_NULL_CHECK_END*/
+ "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/
+ "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/
+ "push-pos", /*OP_PUSH_POS*/
+ "pop-pos", /*OP_POP_POS*/
+ "push-pos-not", /*OP_PUSH_POS_NOT*/
+ "fail-pos", /*OP_FAIL_POS*/
+ "push-stop-bt", /*OP_PUSH_STOP_BT*/
+ "pop-stop-bt", /*OP_POP_STOP_BT*/
+ "look-behind", /*OP_LOOK_BEHIND*/
+ "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/
+ "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
+ "call", /*OP_CALL*/
+ "return", /*OP_RETURN*/
+ "state-check-push", /*OP_STATE_CHECK_PUSH*/
+ "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
+ "state-check", /*OP_STATE_CHECK*/
+ "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+ "set-option-push", /*OP_SET_OPTION_PUSH*/
+ "set-option", /*OP_SET_OPTION*/
+
+ // single byte versions
+ "anychar-sb", /*OP_ANYCHAR*/
+ "anychar-ml-sb", /*OP_ANYCHAR_ML*/
+ "anychar*-sb", /*OP_ANYCHAR_STAR*/
+ "anychar-ml*-sb", /*OP_ANYCHAR_ML_STAR*/
+ "anychar*-peek-next-sb", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ "anychar-ml*-peek-next-sb", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ "state-check-anychar*-sb", /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ "state-check-anychar-ml*-sb", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+
+ "cclass-sb", /*OP_CCLASS*/
+ "cclass-not-sb", /*OP_CCLASS_NOT*/
+
+ "word-sb", /*OP_WORD*/
+ "not-word-sb", /*OP_NOT_WORD*/
+ "word-bound-sb", /*OP_WORD_BOUND*/
+ "not-word-bound-sb", /*OP_NOT_WORD_BOUND*/
+ "word-begin-sb", /*OP_WORD_BEGIN*/
+ "word-end-sb", /*OP_WORD_END*/
+
+ "look-behind-sb", /*OP_LOOK_BEHIND*/
+
+ "exact1-ic-sb", /*OP_EXACT1_IC*/
+ "exactn-ic-sb", /*OP_EXACTN_IC*/
+
+ } : null;
+
+ public final int OpCodeArgTypes[] = Config.DEBUG_COMPILE ? new int[] {
+ Arguments.NON, /*OP_FINISH*/
+ Arguments.NON, /*OP_END*/
+ Arguments.SPECIAL, /*OP_EXACT1*/
+ Arguments.SPECIAL, /*OP_EXACT2*/
+ Arguments.SPECIAL, /*OP_EXACT3*/
+ Arguments.SPECIAL, /*OP_EXACT4*/
+ Arguments.SPECIAL, /*OP_EXACT5*/
+ Arguments.SPECIAL, /*OP_EXACTN*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N1*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N2*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N3*/
+ Arguments.SPECIAL, /*OP_EXACTMB2N*/
+ Arguments.SPECIAL, /*OP_EXACTMB3N*/
+ Arguments.SPECIAL, /*OP_EXACTMBN*/
+ Arguments.SPECIAL, /*OP_EXACT1_IC*/
+ Arguments.SPECIAL, /*OP_EXACTN_IC*/
+ Arguments.SPECIAL, /*OP_CCLASS*/
+ Arguments.SPECIAL, /*OP_CCLASS_MB*/
+ Arguments.SPECIAL, /*OP_CCLASS_MIX*/
+ Arguments.SPECIAL, /*OP_CCLASS_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/
+ Arguments.SPECIAL, /*OP_CCLASS_NODE*/
+ Arguments.NON, /*OP_ANYCHAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML*/
+ Arguments.NON, /*OP_ANYCHAR_STAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ Arguments.NON, /*OP_WORD*/
+ Arguments.NON, /*OP_NOT_WORD*/
+ Arguments.NON, /*OP_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_WORD_BOUND*/
+ Arguments.NON, /*OP_WORD_BEGIN*/
+ Arguments.NON, /*OP_WORD_END*/
+ Arguments.NON, /*OP_BEGIN_BUF*/
+ Arguments.NON, /*OP_END_BUF*/
+ Arguments.NON, /*OP_BEGIN_LINE*/
+ Arguments.NON, /*OP_END_LINE*/
+ Arguments.NON, /*OP_SEMI_END_BUF*/
+ Arguments.NON, /*OP_BEGIN_POSITION*/
+ Arguments.NON, /*OP_BACKREF1*/
+ Arguments.NON, /*OP_BACKREF2*/
+ Arguments.MEMNUM, /*OP_BACKREFN*/
+ Arguments.SPECIAL, /*OP_BACKREFN_IC*/
+ Arguments.SPECIAL, /*OP_BACKREF_MULTI*/
+ Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/
+ Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/
+ Arguments.MEMNUM, /*OP_MEMORY_START*/
+ Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
+ Arguments.MEMNUM, /*OP_MEMORY_END*/
+ Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
+ Arguments.NON, /*OP_FAIL*/
+ Arguments.RELADDR, /*OP_JUMP*/
+ Arguments.RELADDR, /*OP_PUSH*/
+ Arguments.NON, /*OP_POP*/
+ Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/
+ Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_REPEAT*/
+ Arguments.SPECIAL, /*OP_REPEAT_NG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/
+ Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_START*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/
+ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/
+ Arguments.NON, /*OP_PUSH_POS*/
+ Arguments.NON, /*OP_POP_POS*/
+ Arguments.RELADDR, /*OP_PUSH_POS_NOT*/
+ Arguments.NON, /*OP_FAIL_POS*/
+ Arguments.NON, /*OP_PUSH_STOP_BT*/
+ Arguments.NON, /*OP_POP_STOP_BT*/
+ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
+ Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/
+ Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
+ Arguments.ABSADDR, /*OP_CALL*/
+ Arguments.NON, /*OP_RETURN*/
+ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
+ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+ Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
+ Arguments.OPTION, /*OP_SET_OPTION*/
+
+ // single byte versions
+ Arguments.NON, /*OP_ANYCHAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML*/
+ Arguments.NON, /*OP_ANYCHAR_STAR*/
+ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
+ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
+ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
+
+ Arguments.SPECIAL, /*OP_CCLASS*/
+ Arguments.SPECIAL, /*OP_CCLASS_NOT*/
+
+ Arguments.NON, /*OP_WORD*/
+ Arguments.NON, /*OP_NOT_WORD*/
+ Arguments.NON, /*OP_WORD_BOUND*/
+ Arguments.NON, /*OP_NOT_WORD_BOUND*/
+ Arguments.NON, /*OP_WORD_BEGIN*/
+ Arguments.NON, /*OP_WORD_END*/
+
+ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
+
+ Arguments.SPECIAL, /*OP_EXACT1_IC*/
+ Arguments.SPECIAL, /*OP_EXACTN_IC*/
+ } : null;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/OPSize.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,76 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface OPSize {
+
+ // this might be helpful for potential byte[] migration
+ final int OPCODE = 1;
+ final int RELADDR = 1;
+ final int ABSADDR = 1;
+ final int LENGTH = 1;
+ final int MEMNUM = 1;
+ final int STATE_CHECK_NUM = 1;
+ final int REPEATNUM = 1;
+ final int OPTION = 1;
+ final int CODE_POINT = 1;
+ final int POINTER = 1;
+ final int INDEX = 1;
+
+ /* op-code + arg size */
+
+ final int ANYCHAR_STAR = OPCODE;
+ final int ANYCHAR_STAR_PEEK_NEXT = (OPCODE + 1);
+ final int JUMP = (OPCODE + RELADDR);
+ final int PUSH = (OPCODE + RELADDR);
+ final int POP = OPCODE;
+ final int PUSH_OR_JUMP_EXACT1 = (OPCODE + RELADDR + 1);
+ final int PUSH_IF_PEEK_NEXT = (OPCODE + RELADDR + 1);
+ final int REPEAT_INC = (OPCODE + MEMNUM);
+ final int REPEAT_INC_NG = (OPCODE + MEMNUM);
+ final int PUSH_POS = OPCODE;
+ final int PUSH_POS_NOT = (OPCODE + RELADDR);
+ final int POP_POS = OPCODE;
+ final int FAIL_POS = OPCODE;
+ final int SET_OPTION = (OPCODE + OPTION);
+ final int SET_OPTION_PUSH = (OPCODE + OPTION);
+ final int FAIL = OPCODE;
+ final int MEMORY_START = (OPCODE + MEMNUM);
+ final int MEMORY_START_PUSH = (OPCODE + MEMNUM);
+ final int MEMORY_END_PUSH = (OPCODE + MEMNUM);
+ final int MEMORY_END_PUSH_REC = (OPCODE + MEMNUM);
+ final int MEMORY_END = (OPCODE + MEMNUM);
+ final int MEMORY_END_REC = (OPCODE + MEMNUM);
+ final int PUSH_STOP_BT = OPCODE;
+ final int POP_STOP_BT = OPCODE;
+ final int NULL_CHECK_START = (OPCODE + MEMNUM);
+ final int NULL_CHECK_END = (OPCODE + MEMNUM);
+ final int LOOK_BEHIND = (OPCODE + LENGTH);
+ final int PUSH_LOOK_BEHIND_NOT = (OPCODE + RELADDR + LENGTH);
+ final int FAIL_LOOK_BEHIND_NOT = OPCODE;
+ final int CALL = (OPCODE + ABSADDR);
+ final int RETURN = OPCODE;
+
+ // #ifdef USE_COMBINATION_EXPLOSION_CHECK
+ final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM);
+ final int STATE_CHECK_PUSH = (OPCODE + STATE_CHECK_NUM + RELADDR);
+ final int STATE_CHECK_PUSH_OR_JUMP = (OPCODE + STATE_CHECK_NUM + RELADDR);
+ final int STATE_CHECK_ANYCHAR_STAR = (OPCODE + STATE_CHECK_NUM);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/Reduce.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,61 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.A;
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.AQ;
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.ASIS;
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.DEL;
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.PQ_Q;
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.P_QQ;
+import static jdk.nashorn.internal.runtime.regexp.joni.constants.Reduce.ReduceType.QQ;
+
+public interface Reduce {
+
+ enum ReduceType {
+ ASIS, /* as is */
+ DEL, /* delete parent */
+ A, /* to '*' */
+ AQ, /* to '*?' */
+ QQ, /* to '??' */
+ P_QQ, /* to '+)??' */
+ PQ_Q, /* to '+?)?' */
+ }
+
+ final ReduceType[][]REDUCE_TABLE = {
+ {DEL, A, A, QQ, AQ, ASIS}, /* '?' */
+ {DEL, DEL, DEL, P_QQ, P_QQ, DEL}, /* '*' */
+ {A, A, DEL, ASIS, P_QQ, DEL}, /* '+' */
+ {DEL, AQ, AQ, DEL, AQ, AQ}, /* '??' */
+ {DEL, DEL, DEL, DEL, DEL, DEL}, /* '*?' */
+ {ASIS, PQ_Q, DEL, AQ, AQ, DEL} /* '+?' */
+ };
+
+
+ final String PopularQStr[] = new String[] {
+ "?", "*", "+", "??", "*?", "+?"
+ };
+
+ String ReduceQStr[]= new String[] {
+ "", "", "*", "*?", "??", "+ and ??", "+? and ?"
+ };
+
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/RegexState.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+// we dont need this ATM
+public interface RegexState {
+ final int NORMAL = 0;
+ final int SEARCHING = 1;
+ final int COMPILING = -1;
+ final int MODIFY = -2;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/StackPopLevel.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface StackPopLevel {
+ final int FREE = 0;
+ final int MEM_START = 1;
+ final int ALL = 2;
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/StackType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,51 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface StackType {
+ /** stack **/
+ final int INVALID_STACK_INDEX = -1;
+
+ /* stack type */
+ /* used by normal-POP */
+ final int ALT = 0x0001;
+ final int LOOK_BEHIND_NOT = 0x0002;
+ final int POS_NOT = 0x0003;
+ /* handled by normal-POP */
+ final int MEM_START = 0x0100;
+ final int MEM_END = 0x8200;
+ final int REPEAT_INC = 0x0300;
+ final int STATE_CHECK_MARK = 0x1000;
+ /* avoided by normal-POP */
+ final int NULL_CHECK_START = 0x3000;
+ final int NULL_CHECK_END = 0x5000; /* for recursive call */
+ final int MEM_END_MARK = 0x8400;
+ final int POS = 0x0500; /* used when POP-POS */
+ final int STOP_BT = 0x0600; /* mark for "(?>...)" */
+ final int REPEAT = 0x0700;
+ final int CALL_FRAME = 0x0800;
+ final int RETURN = 0x0900;
+ final int VOID = 0x0a00; /* for fill a blank */
+
+ /* stack type check mask */
+ final int MASK_POP_USED = 0x00ff;
+ final int MASK_TO_VOID_TARGET = 0x10ff;
+ final int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/StringType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface StringType {
+ final int NSTR_RAW = 1<<0;
+ final int NSTR_AMBIG = 1<<1;
+ final int NSTR_DONT_GET_OPT_INFO = 1<<2;
+ final int NSTR_SHARED = 1<<3;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/SyntaxProperties.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,124 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface SyntaxProperties {
+ /* syntax (operators); */
+ final int OP_VARIABLE_META_CHARACTERS = (1<<0);
+ final int OP_DOT_ANYCHAR = (1<<1); /* . */
+ final int OP_ASTERISK_ZERO_INF = (1<<2); /* * */
+ final int OP_ESC_ASTERISK_ZERO_INF = (1<<3);
+ final int OP_PLUS_ONE_INF = (1<<4); /* + */
+ final int OP_ESC_PLUS_ONE_INF = (1<<5);
+ final int OP_QMARK_ZERO_ONE = (1<<6); /* ? */
+ final int OP_ESC_QMARK_ZERO_ONE = (1<<7);
+ final int OP_BRACE_INTERVAL = (1<<8); /* {lower,upper} */
+ final int OP_ESC_BRACE_INTERVAL = (1<<9); /* \{lower,upper\} */
+ final int OP_VBAR_ALT = (1<<10); /* | */
+ final int OP_ESC_VBAR_ALT = (1<<11); /* \| */
+ final int OP_LPAREN_SUBEXP = (1<<12); /* (...); */
+ final int OP_ESC_LPAREN_SUBEXP = (1<<13); /* \(...\); */
+ final int OP_ESC_AZ_BUF_ANCHOR = (1<<14); /* \A, \Z, \z */
+ final int OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15); /* \G */
+ final int OP_DECIMAL_BACKREF = (1<<16); /* \num */
+ final int OP_BRACKET_CC = (1<<17); /* [...] */
+ final int OP_ESC_W_WORD = (1<<18); /* \w, \W */
+ final int OP_ESC_LTGT_WORD_BEGIN_END = (1<<19); /* \<. \> */
+ final int OP_ESC_B_WORD_BOUND = (1<<20); /* \b, \B */
+ final int OP_ESC_S_WHITE_SPACE = (1<<21); /* \s, \S */
+ final int OP_ESC_D_DIGIT = (1<<22); /* \d, \D */
+ final int OP_LINE_ANCHOR = (1<<23); /* ^, $ */
+ final int OP_POSIX_BRACKET = (1<<24); /* [:xxxx:] */
+ final int OP_QMARK_NON_GREEDY = (1<<25); /* ??,*?,+?,{n,m}? */
+ final int OP_ESC_CONTROL_CHARS = (1<<26); /* \n,\r,\t,\a ... */
+ final int OP_ESC_C_CONTROL = (1<<27); /* \cx */
+ final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */
+ final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */
+ final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */
+
+ final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */
+ final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */
+ final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */
+ final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */
+ final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */
+ final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */
+ final int OP2_CCLASS_SET_OP = (1<<6); /* [...&&..[..]..] */
+ final int OP2_QMARK_LT_NAMED_GROUP = (1<<7); /* (?<name>...); */
+ final int OP2_ESC_K_NAMED_BACKREF = (1<<8); /* \k<name> */
+ final int OP2_ESC_G_SUBEXP_CALL = (1<<9); /* \g<name>, \g<n> */
+ final int OP2_ATMARK_CAPTURE_HISTORY = (1<<10); /* (?@..);,(?@<x>..); */
+ final int OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11); /* \C-x */
+ final int OP2_ESC_CAPITAL_M_BAR_META = (1<<12); /* \M-x */
+ final int OP2_ESC_V_VTAB = (1<<13); /* \v as VTAB */
+ final int OP2_ESC_U_HEX4 = (1<<14); /* \\uHHHH */
+ final int OP2_ESC_GNU_BUF_ANCHOR = (1<<15); /* \`, \' */
+ final int OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16); /* \p{...}, \P{...} */
+ final int OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17); /* \p{^..}, \P{^..} */
+ /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */
+ final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */
+ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */
+
+ /* syntax (behavior); */
+ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */
+ final int CONTEXT_INDEP_REPEAT_OPS = (1<<0); /* ?, *, +, {n,m} */
+ final int CONTEXT_INVALID_REPEAT_OPS = (1<<1); /* error or ignore */
+ final int ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2); /* ...);... */
+ final int ALLOW_INVALID_INTERVAL = (1<<3); /* {??? */
+ final int ALLOW_INTERVAL_LOW_ABBREV = (1<<4); /* {,n} => {0,n} */
+ final int STRICT_CHECK_BACKREF = (1<<5); /* /(\1);/,/\1();/ ..*/
+ final int DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6); /* (?<=a|bc); */
+ final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */
+ final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?<x>);(?<x>); */
+ final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */
+
+ /* syntax (behavior); in char class [...] */
+ final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */
+ final int BACKSLASH_ESCAPE_IN_CC = (1<<21); /* [..\w..] etc.. */
+ final int ALLOW_EMPTY_RANGE_IN_CC = (1<<22);
+ final int ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23); /* [0-9-a]=[0-9\-a] */
+ /* syntax (behavior); warning */
+ final int WARN_CC_OP_NOT_ESCAPED = (1<<24); /* [,-,] */
+ final int WARN_REDUNDANT_NESTED_REPEAT = (1<<25); /* (?:a*);+ */
+
+ final int POSIX_COMMON_OP =
+ OP_DOT_ANYCHAR | OP_POSIX_BRACKET |
+ OP_DECIMAL_BACKREF |
+ OP_BRACKET_CC | OP_ASTERISK_ZERO_INF |
+ OP_LINE_ANCHOR |
+ OP_ESC_CONTROL_CHARS;
+
+ final int GNU_REGEX_OP =
+ OP_DOT_ANYCHAR | OP_BRACKET_CC |
+ OP_POSIX_BRACKET | OP_DECIMAL_BACKREF |
+ OP_BRACE_INTERVAL | OP_LPAREN_SUBEXP |
+ OP_VBAR_ALT |
+ OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF |
+ OP_QMARK_ZERO_ONE |
+ OP_ESC_AZ_BUF_ANCHOR | OP_ESC_CAPITAL_G_BEGIN_ANCHOR |
+ OP_ESC_W_WORD |
+ OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END |
+ OP_ESC_S_WHITE_SPACE | OP_ESC_D_DIGIT |
+ OP_LINE_ANCHOR;
+
+ final int GNU_REGEX_BV =
+ CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS |
+ CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL |
+ BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/TargetInfo.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,27 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface TargetInfo {
+ final int ISNOT_EMPTY = 0;
+ final int IS_EMPTY = 1;
+ final int IS_EMPTY_MEM = 2;
+ final int IS_EMPTY_REC = 3;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/TokenType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,48 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public enum TokenType {
+ EOT, /* end of token */
+ RAW_BYTE,
+ CHAR,
+ STRING,
+ CODE_POINT,
+ ANYCHAR,
+ CHAR_TYPE,
+ BACKREF,
+ CALL,
+ ANCHOR,
+ OP_REPEAT,
+ INTERVAL,
+ ANYCHAR_ANYTIME, /* SQL '%' == .* */
+ ALT,
+ SUBEXP_OPEN,
+ SUBEXP_CLOSE,
+ CC_OPEN,
+ QUOTE_OPEN,
+ CHAR_PROPERTY, /* \p{...}, \P{...} */
+ /* in cc */
+ CC_CLOSE,
+ CC_RANGE,
+ POSIX_BRACKET_OPEN,
+ CC_AND, /* && */
+ CC_CC_OPEN /* [ */
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/constants/Traverse.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,26 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.constants;
+
+public interface Traverse {
+ final int TRAVERSE_CALLBACK_AT_FIRST = 1;
+ final int TRAVERSE_CALLBACK_AT_LAST = 2;
+ final int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/AsciiTables.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,157 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.encoding;
+
+public class AsciiTables {
+
+ public static final short AsciiCtypeTable[] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+
+ public static final byte ToLowerCaseTable[] = {
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\141', (byte)'\142', (byte)'\143', (byte)'\144', (byte)'\145', (byte)'\146', (byte)'\147',
+ (byte)'\150', (byte)'\151', (byte)'\152', (byte)'\153', (byte)'\154', (byte)'\155', (byte)'\156', (byte)'\157',
+ (byte)'\160', (byte)'\161', (byte)'\162', (byte)'\163', (byte)'\164', (byte)'\165', (byte)'\166', (byte)'\167',
+ (byte)'\170', (byte)'\171', (byte)'\172', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377',
+ };
+
+ public static final byte ToUpperCaseTable[] = {
+ (byte)'\000', (byte)'\001', (byte)'\002', (byte)'\003', (byte)'\004', (byte)'\005', (byte)'\006', (byte)'\007',
+ (byte)'\010', (byte)'\011', (byte)'\012', (byte)'\013', (byte)'\014', (byte)'\015', (byte)'\016', (byte)'\017',
+ (byte)'\020', (byte)'\021', (byte)'\022', (byte)'\023', (byte)'\024', (byte)'\025', (byte)'\026', (byte)'\027',
+ (byte)'\030', (byte)'\031', (byte)'\032', (byte)'\033', (byte)'\034', (byte)'\035', (byte)'\036', (byte)'\037',
+ (byte)'\040', (byte)'\041', (byte)'\042', (byte)'\043', (byte)'\044', (byte)'\045', (byte)'\046', (byte)'\047',
+ (byte)'\050', (byte)'\051', (byte)'\052', (byte)'\053', (byte)'\054', (byte)'\055', (byte)'\056', (byte)'\057',
+ (byte)'\060', (byte)'\061', (byte)'\062', (byte)'\063', (byte)'\064', (byte)'\065', (byte)'\066', (byte)'\067',
+ (byte)'\070', (byte)'\071', (byte)'\072', (byte)'\073', (byte)'\074', (byte)'\075', (byte)'\076', (byte)'\077',
+ (byte)'\100', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107',
+ (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117',
+ (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127',
+ (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\133', (byte)'\134', (byte)'\135', (byte)'\136', (byte)'\137',
+ (byte)'\140', (byte)'\101', (byte)'\102', (byte)'\103', (byte)'\104', (byte)'\105', (byte)'\106', (byte)'\107',
+ (byte)'\110', (byte)'\111', (byte)'\112', (byte)'\113', (byte)'\114', (byte)'\115', (byte)'\116', (byte)'\117',
+ (byte)'\120', (byte)'\121', (byte)'\122', (byte)'\123', (byte)'\124', (byte)'\125', (byte)'\126', (byte)'\127',
+ (byte)'\130', (byte)'\131', (byte)'\132', (byte)'\173', (byte)'\174', (byte)'\175', (byte)'\176', (byte)'\177',
+ (byte)'\200', (byte)'\201', (byte)'\202', (byte)'\203', (byte)'\204', (byte)'\205', (byte)'\206', (byte)'\207',
+ (byte)'\210', (byte)'\211', (byte)'\212', (byte)'\213', (byte)'\214', (byte)'\215', (byte)'\216', (byte)'\217',
+ (byte)'\220', (byte)'\221', (byte)'\222', (byte)'\223', (byte)'\224', (byte)'\225', (byte)'\226', (byte)'\227',
+ (byte)'\230', (byte)'\231', (byte)'\232', (byte)'\233', (byte)'\234', (byte)'\235', (byte)'\236', (byte)'\237',
+ (byte)'\240', (byte)'\241', (byte)'\242', (byte)'\243', (byte)'\244', (byte)'\245', (byte)'\246', (byte)'\247',
+ (byte)'\250', (byte)'\251', (byte)'\252', (byte)'\253', (byte)'\254', (byte)'\255', (byte)'\256', (byte)'\257',
+ (byte)'\260', (byte)'\261', (byte)'\262', (byte)'\263', (byte)'\264', (byte)'\265', (byte)'\266', (byte)'\267',
+ (byte)'\270', (byte)'\271', (byte)'\272', (byte)'\273', (byte)'\274', (byte)'\275', (byte)'\276', (byte)'\277',
+ (byte)'\300', (byte)'\301', (byte)'\302', (byte)'\303', (byte)'\304', (byte)'\305', (byte)'\306', (byte)'\307',
+ (byte)'\310', (byte)'\311', (byte)'\312', (byte)'\313', (byte)'\314', (byte)'\315', (byte)'\316', (byte)'\317',
+ (byte)'\320', (byte)'\321', (byte)'\322', (byte)'\323', (byte)'\324', (byte)'\325', (byte)'\326', (byte)'\327',
+ (byte)'\330', (byte)'\331', (byte)'\332', (byte)'\333', (byte)'\334', (byte)'\335', (byte)'\336', (byte)'\337',
+ (byte)'\340', (byte)'\341', (byte)'\342', (byte)'\343', (byte)'\344', (byte)'\345', (byte)'\346', (byte)'\347',
+ (byte)'\350', (byte)'\351', (byte)'\352', (byte)'\353', (byte)'\354', (byte)'\355', (byte)'\356', (byte)'\357',
+ (byte)'\360', (byte)'\361', (byte)'\362', (byte)'\363', (byte)'\364', (byte)'\365', (byte)'\366', (byte)'\367',
+ (byte)'\370', (byte)'\371', (byte)'\372', (byte)'\373', (byte)'\374', (byte)'\375', (byte)'\376', (byte)'\377',
+ };
+
+ public static final int LowerMap[][] = {
+ {0x41, 0x61},
+ {0x42, 0x62},
+ {0x43, 0x63},
+ {0x44, 0x64},
+ {0x45, 0x65},
+ {0x46, 0x66},
+ {0x47, 0x67},
+ {0x48, 0x68},
+ {0x49, 0x69},
+ {0x4a, 0x6a},
+ {0x4b, 0x6b},
+ {0x4c, 0x6c},
+ {0x4d, 0x6d},
+ {0x4e, 0x6e},
+ {0x4f, 0x6f},
+ {0x50, 0x70},
+ {0x51, 0x71},
+ {0x52, 0x72},
+ {0x53, 0x73},
+ {0x54, 0x74},
+ {0x55, 0x75},
+ {0x56, 0x76},
+ {0x57, 0x77},
+ {0x58, 0x78},
+ {0x59, 0x79},
+ {0x5a, 0x7a}
+ };
+}
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/CharacterType.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,79 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.encoding;
+
+public interface CharacterType {
+
+ final int NEWLINE = 0;
+ final int ALPHA = 1;
+ final int BLANK = 2;
+ final int CNTRL = 3;
+ final int DIGIT = 4;
+ final int GRAPH = 5;
+ final int LOWER = 6;
+ final int PRINT = 7;
+ final int PUNCT = 8;
+ final int SPACE = 9;
+ final int UPPER = 10;
+ final int XDIGIT = 11;
+ final int WORD = 12;
+ final int ALNUM = 13; /* alpha || digit */
+ final int ASCII = 14;
+
+ final int SPECIAL_MASK = 256;
+ final int S = SPECIAL_MASK | SPACE;
+ final int D = SPECIAL_MASK | DIGIT;
+ final int W = SPECIAL_MASK | WORD;
+
+ final int LETTER_MASK = (1 << Character.UPPERCASE_LETTER)
+ | (1 << Character.LOWERCASE_LETTER)
+ | (1 << Character.TITLECASE_LETTER)
+ | (1 << Character.MODIFIER_LETTER)
+ | (1 << Character.OTHER_LETTER);
+ final int ALPHA_MASK = LETTER_MASK
+ | (1 << Character.COMBINING_SPACING_MARK)
+ | (1 << Character.NON_SPACING_MARK)
+ | (1 << Character.ENCLOSING_MARK);
+ final int ALNUM_MASK = ALPHA_MASK
+ | (1 << Character.DECIMAL_DIGIT_NUMBER);
+ final int WORD_MASK = ALNUM_MASK
+ | (1 << Character.CONNECTOR_PUNCTUATION);
+ final int PUNCT_MASK = (1 << Character.CONNECTOR_PUNCTUATION)
+ | (1 << Character.DASH_PUNCTUATION)
+ | (1 << Character.END_PUNCTUATION)
+ | (1 << Character.FINAL_QUOTE_PUNCTUATION)
+ | (1 << Character.INITIAL_QUOTE_PUNCTUATION)
+ | (1 << Character.OTHER_PUNCTUATION)
+ | (1 << Character.START_PUNCTUATION);
+ final int CNTRL_MASK = (1 << Character.CONTROL)
+ | (1 << Character.FORMAT)
+ | (1 << Character.PRIVATE_USE)
+ | (1 << Character.SURROGATE);
+ final int SPACE_MASK = (1 << Character.SPACE_SEPARATOR)
+ | (1 << Character.LINE_SEPARATOR) // 0x2028
+ | (1 << Character.PARAGRAPH_SEPARATOR); // 0x2029
+ final int GRAPH_MASK = SPACE_MASK
+ | (1 << Character.CONTROL)
+ | (1 << Character.SURROGATE);
+ final int PRINT_MASK = (1 << Character.CONTROL)
+ | (1 << Character.SURROGATE);
+
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/IntHolder.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,24 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.encoding;
+
+public class IntHolder {
+ public int value;
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/ObjPtr.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.encoding;
+
+public final class ObjPtr<T> {
+ public ObjPtr() {
+ this(null);
+ }
+
+ public ObjPtr(T p) {
+ this.p = p;
+ }
+
+ public T p;
+
+ static final ObjPtr<Void> NULL = new ObjPtr<Void>();
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/PosixBracket.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,77 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS".toCharArray(), WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.encoding;
+
+import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
+import jdk.nashorn.internal.runtime.regexp.joni.exception.JOniException;
+
+import java.util.HashMap;
+
+public class PosixBracket {
+
+ public static final char[][] PBSNamesLower = {
+ "alnum".toCharArray(),
+ "alpha".toCharArray(),
+ "blank".toCharArray(),
+ "cntrl".toCharArray(),
+ "digit".toCharArray(),
+ "graph".toCharArray(),
+ "lower".toCharArray(),
+ "print".toCharArray(),
+ "punct".toCharArray(),
+ "space".toCharArray(),
+ "upper".toCharArray(),
+ "xdigit".toCharArray(),
+ "ascii".toCharArray(),
+ "word".toCharArray()
+ };
+
+ public static final int PBSValues[] = {
+ CharacterType.ALNUM,
+ CharacterType.ALPHA,
+ CharacterType.BLANK,
+ CharacterType.CNTRL,
+ CharacterType.DIGIT,
+ CharacterType.GRAPH,
+ CharacterType.LOWER,
+ CharacterType.PRINT,
+ CharacterType.PUNCT,
+ CharacterType.SPACE,
+ CharacterType.UPPER,
+ CharacterType.XDIGIT,
+ CharacterType.ASCII,
+ CharacterType.WORD,
+ };
+
+ public static int propertyNameToCType(String name) {
+ name = name.toLowerCase();
+ if (!PBSTableUpper.containsKey(name)) {
+ throw new JOniException(ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME.replaceAll("%n", name));
+ }
+ return PBSTableUpper.get(name);
+ }
+
+ private static final HashMap<String,Integer> PBSTableUpper = new HashMap<String,Integer>();
+
+ static {
+ for (int i=0; i<PBSValues.length; i++) PBSTableUpper.put(new String(PBSNamesLower[i]), PBSValues[i]);
+ }
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/encoding/Ptr.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,35 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.encoding;
+
+public final class Ptr {
+ public Ptr() {
+ this(0);
+ }
+
+ public Ptr(int p) {
+ this.p = p;
+ }
+
+ public int p;
+
+ public static final Ptr NULL = new Ptr(0);
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/exception/ErrorMessages.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.exception;
+
+import jdk.nashorn.internal.runtime.regexp.joni.Config;
+
+public interface ErrorMessages {
+ final String MISMATCH = "mismatch";
+ final String NO_SUPPORT_CONFIG = "no support in this configuration";
+
+ /* from jcodings */
+ final String ERR_INVALID_CHAR_PROPERTY_NAME = "invalid character property name <%n>";
+ final String ERR_INVALID_CODE_POINT_VALUE = "invalid code point value";
+ final String ERR_TOO_BIG_WIDE_CHAR_VALUE = "too big wide-char value";
+ final String ERR_TOO_LONG_WIDE_CHAR_VALUE = "too long wide-char value";
+
+ /* internal error */
+ final String ERR_MEMORY = "fail to memory allocation";
+ final String ERR_MATCH_STACK_LIMIT_OVER = "match-stack limit over";
+ final String ERR_TYPE_BUG = "undefined type (bug)";
+ final String ERR_PARSER_BUG = "internal parser error (bug)";
+ final String ERR_STACK_BUG = "stack error (bug)";
+ final String ERR_UNDEFINED_BYTECODE = "undefined bytecode (bug)";
+ final String ERR_UNEXPECTED_BYTECODE = "unexpected bytecode (bug)";
+ final String ERR_DEFAULT_ENCODING_IS_NOT_SETTED = "default multibyte-encoding is not setted";
+ final String ERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR = "can't convert to wide-char on specified multibyte-encoding";
+
+ /* general error */
+ final String ERR_INVALID_ARGUMENT = "invalid argument";
+
+ /* syntax error */
+ final String ERR_END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace";
+ final String ERR_END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket";
+ final String ERR_EMPTY_CHAR_CLASS = "empty char-class";
+ final String ERR_PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class";
+ final String ERR_END_PATTERN_AT_ESCAPE = "end pattern at escape";
+ final String ERR_END_PATTERN_AT_META = "end pattern at meta";
+ final String ERR_END_PATTERN_AT_CONTROL = "end pattern at control";
+ final String ERR_META_CODE_SYNTAX = "invalid meta-code syntax";
+ final String ERR_CONTROL_CODE_SYNTAX = "invalid control-code syntax";
+ final String ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range";
+ final String ERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range";
+ final String ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class";
+ final String ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified";
+ final String ERR_TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid";
+ final String ERR_NESTED_REPEAT_OPERATOR = "nested repeat operator";
+ final String ERR_UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis";
+ final String ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis";
+ final String ERR_END_PATTERN_IN_GROUP = "end pattern in group";
+ final String ERR_UNDEFINED_GROUP_OPTION = "undefined group option";
+ final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type";
+ final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind";
+ final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}";
+
+ /* values error (syntax error) */
+ final String ERR_TOO_BIG_NUMBER = "too big number";
+ final String ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range";
+ final String ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range";
+ final String ERR_EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class";
+ final String ERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range";
+ final String ERR_TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified";
+ final String ERR_TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string";
+ final String ERR_TOO_BIG_BACKREF_NUMBER = "too big backref number";
+ final String ERR_INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number";
+ final String ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)";
+ final String ERR_INVALID_WIDE_CHAR_VALUE = "invalid wide-char value";
+ final String ERR_EMPTY_GROUP_NAME = "group name is empty";
+ final String ERR_INVALID_GROUP_NAME = "invalid group name <%n>";
+ final String ERR_INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>";
+ final String ERR_UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference";
+ final String ERR_UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference";
+ final String ERR_MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>";
+ final String ERR_MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call";
+ final String ERR_NEVER_ENDING_RECURSION = "never ending recursion";
+ final String ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history";
+ final String ERR_NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination";
+ final String ERR_INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options";
+ final String ERR_OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count";
+ final String ERR_TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value";
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/exception/InternalException.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.exception;
+
+public class InternalException extends JOniException{
+ private static final long serialVersionUID = -3871816465397927992L;
+
+ public InternalException(String message) {
+ super(message);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/exception/JOniException.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.exception;
+
+public class JOniException extends RuntimeException{
+ private static final long serialVersionUID = -6027192180014164667L;
+
+ public JOniException(String message) {
+ super(message);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/exception/SyntaxException.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,28 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.exception;
+
+public class SyntaxException extends JOniException{
+ private static final long serialVersionUID = 7862720128961874288L;
+
+ public SyntaxException(String message) {
+ super(message);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/exception/ValueException.java Fri Feb 22 17:00:22 2013 +0100
@@ -0,0 +1,37 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+package jdk.nashorn.internal.runtime.regexp.joni.exception;
+
+public class ValueException extends SyntaxException{
+ private static final long serialVersionUID = -196013852479929134L;
+
+ public ValueException(String message) {
+ super(message);
+ }
+
+ public ValueException(String message, String str) {
+ super(message.replaceAll("%n", str));
+ }
+
+ public ValueException(String message, byte[]bytes, int p, int end) {
+ this(message, new String(bytes, p, end - p));
+ }
+
+}