nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java
author hannesw
Wed, 10 Apr 2013 14:05:11 +0200
changeset 16939 9e3a9eda5775
parent 16938 1a8ffed97564
child 16940 d5c597aa3d47
permissions -rw-r--r--
8011749: Bugs with empty character class handling Reviewed-by: lagergren, attila
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     1
/*
16151
97c1e756ae1e 8005663: Update copyright year to 2013
jlaskey
parents: 16147
diff changeset
     2
 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     4
 *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    10
 *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    15
 * accompanied this code).
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    16
 *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    20
 *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    23
 * questions.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    24
 */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    25
16258
0e25f785df4d 8008093: Make RegExp engine pluggable
hannesw
parents: 16241
diff changeset
    26
package jdk.nashorn.internal.runtime.regexp;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    27
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    28
import java.util.HashMap;
16781
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
    29
import java.util.Iterator;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    30
import java.util.LinkedList;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    31
import java.util.List;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    32
import java.util.Map;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    33
import java.util.regex.PatternSyntaxException;
16258
0e25f785df4d 8008093: Make RegExp engine pluggable
hannesw
parents: 16241
diff changeset
    34
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
    35
import jdk.nashorn.internal.parser.Lexer;
16226
0e4f37e6cc40 8007915: Nashorn IR, codegen, parser packages and Context instance should be inaccessible to user code
sundar
parents: 16151
diff changeset
    36
import jdk.nashorn.internal.parser.Scanner;
16258
0e25f785df4d 8008093: Make RegExp engine pluggable
hannesw
parents: 16241
diff changeset
    37
import jdk.nashorn.internal.runtime.BitVector;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    38
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    39
/**
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    40
 * Scan a JavaScript regexp, converting to Java regex if necessary.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    41
 *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    42
 */
16226
0e4f37e6cc40 8007915: Nashorn IR, codegen, parser packages and Context instance should be inaccessible to user code
sundar
parents: 16151
diff changeset
    43
final class RegExpScanner extends Scanner {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    44
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    45
    /**
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
    46
     * String builder used to rewrite the pattern for the currently used regexp factory.
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    47
     */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    48
    private final StringBuilder sb;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    49
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    50
    /** Expected token table */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    51
    private final Map<Character, Integer> expected = new HashMap<>();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    52
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    53
    /** Capturing parenthesis that have been found so far. */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    54
    private final List<Capture> caps = new LinkedList<>();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    55
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    56
    /** Forward references to capturing parenthesis to be resolved later.*/
16781
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
    57
    private final LinkedList<Integer> forwardReferences = new LinkedList<>();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    58
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    59
    /** Current level of zero-width negative lookahead assertions. */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    60
    private int negativeLookaheadLevel;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    61
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
    62
    /** Are we currently inside a character class? */
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
    63
    private boolean inCharClass = false;
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
    64
16274
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
    65
    /** Are we currently inside a negated character class? */
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
    66
    private boolean inNegativeClass = false;
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
    67
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    68
    private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?";
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    69
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    70
    private static class Capture {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    71
        /**
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    72
         * Zero-width negative lookaheads enclosing the capture.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    73
         */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    74
        private final int negativeLookaheadLevel;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    75
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    76
        Capture(final int negativeLookaheadLevel) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    77
            this.negativeLookaheadLevel = negativeLookaheadLevel;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    78
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    79
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    80
        public int getNegativeLookaheadLevel() {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    81
            return negativeLookaheadLevel;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    82
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    83
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    84
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    85
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    86
    /**
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    87
     * Constructor
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    88
     * @param string the JavaScript regexp to parse
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    89
     */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    90
    private RegExpScanner(final String string) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    91
        super(string);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    92
        sb = new StringBuilder(limit);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    93
        reset(0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    94
        expected.put(']', 0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    95
        expected.put('}', 0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    96
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    97
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    98
    private void processForwardReferences() {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
    99
16781
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   100
        Iterator<Integer> iterator = forwardReferences.descendingIterator();
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   101
        while (iterator.hasNext()) {
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   102
            final int pos = iterator.next();
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   103
            final int num = iterator.next();
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   104
            if (num > caps.size()) {
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   105
                // Non-existing backreference. If the number begins with a valid octal convert it to
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   106
                // Unicode escape and append the rest to a literal character sequence.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   107
                final StringBuilder buffer = new StringBuilder();
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   108
                octalOrLiteral(Integer.toString(num), buffer);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   109
                sb.insert(pos, buffer);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   110
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   111
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   112
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   113
        forwardReferences.clear();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   114
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   115
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   116
    /**
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   117
     * Scan a JavaScript regexp string returning a Java safe regex string.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   118
     *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   119
     * @param string
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   120
     *            JavaScript regexp string.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   121
     * @return Java safe regex string.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   122
     */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   123
    public static RegExpScanner scan(final String string) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   124
        final RegExpScanner scanner = new RegExpScanner(string);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   125
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   126
        try {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   127
            scanner.disjunction();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   128
        } catch (final Exception e) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   129
            throw new PatternSyntaxException(e.getMessage(), string, scanner.position);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   130
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   131
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   132
        scanner.processForwardReferences();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   133
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   134
        // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   135
        if (scanner.position != string.length()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   136
            final String p = scanner.getStringBuilder().toString();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   137
            throw new PatternSyntaxException(string, p, p.length() + 1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   138
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   139
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   140
        return scanner;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   141
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   142
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   143
    final StringBuilder getStringBuilder() {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   144
        return sb;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   145
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   146
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   147
    String getJavaPattern() {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   148
        return sb.toString();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   149
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   150
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   151
    BitVector getGroupsInNegativeLookahead() {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   152
        BitVector vec = null;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   153
        for (int i = 0; i < caps.size(); i++) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   154
            final Capture cap = caps.get(i);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   155
            if (cap.getNegativeLookaheadLevel() > 0) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   156
                if (vec == null) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   157
                    vec = new BitVector(caps.size() + 1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   158
                }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   159
                vec.set(i + 1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   160
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   161
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   162
        return vec;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   163
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   164
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   165
    /**
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   166
     * Commit n characters to the builder and to a given token
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   167
     * @param n     Number of characters.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   168
     * @return Committed token
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   169
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   170
    private boolean commit(final int n) {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   171
        switch (n) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   172
        case 1:
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   173
            sb.append(ch0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   174
            skip(1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   175
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   176
        case 2:
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   177
            sb.append(ch0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   178
            sb.append(ch1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   179
            skip(2);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   180
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   181
        case 3:
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   182
            sb.append(ch0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   183
            sb.append(ch1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   184
            sb.append(ch2);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   185
            skip(3);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   186
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   187
        default:
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   188
            assert false : "Should not reach here";
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   189
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   190
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   191
        return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   192
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   193
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   194
    /**
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   195
     * Restart the buffers back at an earlier position.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   196
     *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   197
     * @param startIn
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   198
     *            Position in the input stream.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   199
     * @param startOut
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   200
     *            Position in the output stream.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   201
     */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   202
    private void restart(final int startIn, final int startOut) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   203
        reset(startIn);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   204
        sb.setLength(startOut);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   205
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   206
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   207
    private void push(final char ch) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   208
        expected.put(ch, expected.get(ch) + 1);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   209
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   210
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   211
    private void pop(final char ch) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   212
        expected.put(ch, Math.min(0, expected.get(ch) - 1));
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   213
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   214
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   215
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   216
     * Recursive descent tokenizer starts below.
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   217
     */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   218
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   219
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   220
     * Disjunction ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   221
     *      Alternative
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   222
     *      Alternative | Disjunction
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   223
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   224
    private void disjunction() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   225
        while (true) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   226
            alternative();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   227
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   228
            if (ch0 == '|') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   229
                commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   230
            } else {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   231
                break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   232
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   233
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   234
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   235
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   236
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   237
     * Alternative ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   238
     *      [empty]
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   239
     *      Alternative Term
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   240
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   241
    private void alternative() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   242
        while (term()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   243
            // do nothing
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   244
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   245
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   246
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   247
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   248
     * Term ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   249
     *      Assertion
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   250
     *      Atom
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   251
     *      Atom Quantifier
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   252
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   253
    private boolean term() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   254
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   255
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   256
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   257
        if (assertion()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   258
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   259
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   260
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   261
        if (atom()) {
16939
9e3a9eda5775 8011749: Bugs with empty character class handling
hannesw
parents: 16938
diff changeset
   262
            // Check for character classes that never or always match
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   263
            if (sb.toString().endsWith("[]")) {
16939
9e3a9eda5775 8011749: Bugs with empty character class handling
hannesw
parents: 16938
diff changeset
   264
                sb.setLength(sb.length() - 1);
9e3a9eda5775 8011749: Bugs with empty character class handling
hannesw
parents: 16938
diff changeset
   265
                sb.append("^\\s\\S]");
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   266
            } else if (sb.toString().endsWith("[^]")) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   267
                sb.setLength(sb.length() - 2);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   268
                sb.append("\\s\\S]");
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   269
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   270
16939
9e3a9eda5775 8011749: Bugs with empty character class handling
hannesw
parents: 16938
diff changeset
   271
            quantifier();
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   272
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   273
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   274
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   275
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   276
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   277
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   278
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   279
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   280
     * Assertion ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   281
     *      ^
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   282
     *      $
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   283
     *      \b
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   284
     *      \B
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   285
     *      ( ? = Disjunction )
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   286
     *      ( ? ! Disjunction )
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   287
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   288
    private boolean assertion() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   289
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   290
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   291
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   292
        switch (ch0) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   293
        case '^':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   294
        case '$':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   295
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   296
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   297
        case '\\':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   298
            if (ch1 == 'b' || ch1 == 'B') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   299
                return commit(2);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   300
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   301
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   302
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   303
        case '(':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   304
            if (ch1 != '?') {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   305
                break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   306
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   307
            if (ch2 != '=' && ch2 != '!') {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   308
                break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   309
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   310
            final boolean isNegativeLookahead = (ch2 == '!');
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   311
            commit(3);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   312
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   313
            if (isNegativeLookahead) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   314
                negativeLookaheadLevel++;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   315
            }
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   316
            disjunction();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   317
            if (isNegativeLookahead) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   318
                negativeLookaheadLevel--;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   319
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   320
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   321
            if (ch0 == ')') {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   322
                return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   323
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   324
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   325
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   326
        default:
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   327
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   328
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   329
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   330
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   331
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   332
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   333
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   334
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   335
     * Quantifier ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   336
     *      QuantifierPrefix
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   337
     *      QuantifierPrefix ?
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   338
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   339
    private boolean quantifier() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   340
        if (quantifierPrefix()) {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   341
            if (ch0 == '?') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   342
                commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   343
            }
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   344
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   345
        }
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   346
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   347
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   348
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   349
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   350
     * QuantifierPrefix ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   351
     *      *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   352
     *      +
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   353
     *      ?
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   354
     *      { DecimalDigits }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   355
     *      { DecimalDigits , }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   356
     *      { DecimalDigits , DecimalDigits }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   357
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   358
    private boolean quantifierPrefix() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   359
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   360
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   361
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   362
        switch (ch0) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   363
        case '*':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   364
        case '+':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   365
        case '?':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   366
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   367
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   368
        case '{':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   369
            commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   370
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   371
            if (!decimalDigits()) {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   372
                break; // not a quantifier - back out
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   373
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   374
            push('}');
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   375
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   376
            if (ch0 == ',') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   377
                commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   378
                decimalDigits();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   379
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   380
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   381
            if (ch0 == '}') {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   382
                pop('}');
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   383
                commit(1);
16781
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   384
            } else {
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   385
                // Bad quantifier should be rejected but is accepted by all major engines
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   386
                restart(startIn, startOut);
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   387
                return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   388
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   389
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   390
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   391
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   392
        default:
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   393
            break;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   394
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   395
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   396
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   397
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   398
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   399
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   400
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   401
     * Atom ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   402
     *      PatternCharacter
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   403
     *      .
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   404
     *      \ AtomEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   405
     *      CharacterClass
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   406
     *      ( Disjunction )
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   407
     *      ( ? : Disjunction )
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   408
     *
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   409
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   410
    private boolean atom() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   411
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   412
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   413
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   414
        if (patternCharacter()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   415
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   416
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   417
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   418
        if (ch0 == '.') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   419
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   420
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   421
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   422
        if (ch0 == '\\') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   423
            commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   424
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   425
            if (atomEscape()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   426
                return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   427
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   428
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   429
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   430
        if (characterClass()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   431
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   432
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   433
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   434
        if (ch0 == '(') {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   435
            boolean capturingParens = true;
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   436
            commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   437
            if (ch0 == '?' && ch1 == ':') {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   438
                capturingParens = false;
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   439
                commit(2);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   440
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   441
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   442
            disjunction();
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   443
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   444
            if (ch0 == ')') {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   445
                commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   446
                if (capturingParens) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   447
                    caps.add(new Capture(negativeLookaheadLevel));
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   448
                }
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   449
                return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   450
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   451
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   452
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   453
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   454
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   455
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   456
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   457
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   458
     * PatternCharacter ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   459
     *      SourceCharacter but not any of: ^$\.*+?()[]{}|
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   460
     */
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   461
    @SuppressWarnings("fallthrough")
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   462
    private boolean patternCharacter() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   463
        if (atEOF()) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   464
            return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   465
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   466
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   467
        switch (ch0) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   468
        case '^':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   469
        case '$':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   470
        case '\\':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   471
        case '.':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   472
        case '*':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   473
        case '+':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   474
        case '?':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   475
        case '(':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   476
        case ')':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   477
        case '[':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   478
        case '|':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   479
            return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   480
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   481
        case '}':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   482
        case ']':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   483
            final int n = expected.get(ch0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   484
            if (n != 0) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   485
                return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   486
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   487
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   488
       case '{':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   489
           // if not a valid quantifier escape curly brace to match itself
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   490
           // this ensures compatibility with other JS implementations
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   491
           if (!quantifierPrefix()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   492
               sb.append('\\');
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   493
               return commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   494
           }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   495
           return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   496
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   497
        default:
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   498
            return commit(1); // SOURCECHARACTER
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   499
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   500
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   501
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   502
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   503
     * AtomEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   504
     *      DecimalEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   505
     *      CharacterEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   506
     *      CharacterClassEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   507
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   508
    private boolean atomEscape() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   509
        // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   510
        return decimalEscape() || characterClassEscape() || characterEscape() || identityEscape();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   511
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   512
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   513
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   514
     * CharacterEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   515
     *      ControlEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   516
     *      c ControlLetter
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   517
     *      HexEscapeSequence
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   518
     *      UnicodeEscapeSequence
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   519
     *      IdentityEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   520
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   521
    private boolean characterEscape() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   522
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   523
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   524
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   525
        if (controlEscape()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   526
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   527
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   528
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   529
        if (ch0 == 'c') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   530
            commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   531
            if (controlLetter()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   532
                return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   533
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   534
            restart(startIn, startOut);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   535
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   536
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   537
        if (hexEscapeSequence() || unicodeEscapeSequence()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   538
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   539
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   540
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   541
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   542
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   543
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   544
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   545
    private boolean scanEscapeSequence(final char leader, final int length) {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   546
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   547
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   548
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   549
        if (ch0 != leader) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   550
            return false;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   551
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   552
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   553
        commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   554
        for (int i = 0; i < length; i++) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   555
            final char ch0l = Character.toLowerCase(ch0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   556
            if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   557
                commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   558
            } else {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   559
                restart(startIn, startOut);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   560
                return false;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   561
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   562
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   563
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   564
        return true;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   565
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   566
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   567
    private boolean hexEscapeSequence() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   568
        return scanEscapeSequence('x', 2);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   569
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   570
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   571
    private boolean unicodeEscapeSequence() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   572
        return scanEscapeSequence('u', 4);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   573
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   574
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   575
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   576
     * ControlEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   577
     *      one of fnrtv
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   578
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   579
    private boolean controlEscape() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   580
        switch (ch0) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   581
        case 'f':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   582
        case 'n':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   583
        case 'r':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   584
        case 't':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   585
        case 'v':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   586
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   587
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   588
        default:
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   589
            return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   590
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   591
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   592
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   593
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   594
     * ControlLetter ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   595
     *      one of abcdefghijklmnopqrstuvwxyz
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   596
     *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   597
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   598
    private boolean controlLetter() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   599
        final char c = Character.toUpperCase(ch0);
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   600
        if (c >= 'A' && c <= 'Z') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   601
            // for some reason java regexps don't like control characters on the
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   602
            // form "\\ca".match([string with ascii 1 at char0]). Translating
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   603
            // them to unicode does it though.
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   604
            sb.setLength(sb.length() - 1);
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   605
            unicode(c - 'A' + 1, sb);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   606
            skip(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   607
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   608
        }
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   609
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   610
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   611
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   612
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   613
     * IdentityEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   614
     *      SourceCharacter but not IdentifierPart
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   615
     *      <ZWJ>  (200c)
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   616
     *      <ZWNJ> (200d)
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   617
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   618
    private boolean identityEscape() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   619
        if (atEOF()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   620
            throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   621
        }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   622
        // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
16781
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   623
        if (ch0 == 'c') {
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   624
            // Ignore invalid control letter escape if within a character class
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   625
            if (inCharClass && ch1 != ']') {
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   626
                sb.setLength(sb.length() - 1);
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   627
                skip(2);
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   628
                return true;
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   629
            } else {
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   630
                sb.append('\\'); // Treat invalid \c control sequence as \\c
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   631
            }
41eadf003eff 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
hannesw
parents: 16525
diff changeset
   632
        } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   633
            sb.setLength(sb.length() - 1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   634
        }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   635
        return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   636
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   637
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   638
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   639
     * DecimalEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   640
     *      DecimalIntegerLiteral [lookahead DecimalDigit]
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   641
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   642
    private boolean decimalEscape() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   643
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   644
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   645
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   646
        if (ch0 == '0' && !isOctalDigit(ch1)) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   647
            skip(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   648
            //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   649
            sb.append("\u0000");
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   650
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   651
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   652
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   653
        if (isDecimalDigit(ch0)) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   654
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   655
            if (ch0 == '0') {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   656
                // We know this is an octal escape.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   657
                if (inCharClass) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   658
                    // Convert octal escape to unicode escape if inside character class.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   659
                    int octalValue = 0;
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   660
                    while (isOctalDigit(ch0)) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   661
                        octalValue = octalValue * 8 + ch0 - '0';
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   662
                        skip(1);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   663
                    }
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   664
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   665
                    unicode(octalValue, sb);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   666
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   667
                } else {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   668
                    // Copy decimal escape as-is
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   669
                    decimalDigits();
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   670
                }
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   671
            } else {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   672
                // This should be a backreference, but could also be an octal escape or even a literal string.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   673
                int decimalValue = 0;
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   674
                while (isDecimalDigit(ch0)) {
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   675
                    decimalValue = decimalValue * 10 + ch0 - '0';
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   676
                    skip(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   677
                }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   678
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   679
                if (inCharClass) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   680
                    // No backreferences in character classes. Encode as unicode escape or literal char sequence
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   681
                    sb.setLength(sb.length() - 1);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   682
                    octalOrLiteral(Integer.toString(decimalValue), sb);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   683
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   684
                } else if (decimalValue <= caps.size() && caps.get(decimalValue - 1).getNegativeLookaheadLevel() > 0) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   685
                    //  Captures that live inside a negative lookahead are dead after the
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   686
                    //  lookahead and will be undefined if referenced from outside.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   687
                    if (caps.get(decimalValue - 1).getNegativeLookaheadLevel() > negativeLookaheadLevel) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   688
                        sb.setLength(sb.length() - 1);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   689
                    } else {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   690
                        sb.append(decimalValue);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   691
                    }
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   692
                } else if (decimalValue > caps.size()) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   693
                    // Forward reference to a capture group. Forward references are always undefined so we can omit
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   694
                    // it from the output buffer. However, if the target capture does not exist, we need to rewrite
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   695
                    // the reference as hex escape or literal string, so register the reference for later processing.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   696
                    sb.setLength(sb.length() - 1);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   697
                    forwardReferences.add(decimalValue);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   698
                    forwardReferences.add(sb.length());
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   699
                } else {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   700
                    // Append as backreference
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   701
                    sb.append(decimalValue);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   702
                }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   703
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   704
            }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   705
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   706
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   707
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   708
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   709
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   710
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   711
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   712
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   713
     * CharacterClassEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   714
     *  one of dDsSwW
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   715
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   716
    private boolean characterClassEscape() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   717
        switch (ch0) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   718
        // java.util.regex requires translation of \s and \S to explicit character list
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   719
        case 's':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   720
            if (RegExpFactory.usesJavaUtilRegex()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   721
                sb.setLength(sb.length() - 1);
16274
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   722
                // No nested class required if we already are inside a character class
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   723
                if (inCharClass) {
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   724
                    sb.append(Lexer.getWhitespaceRegExp());
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   725
                } else {
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   726
                    sb.append('[').append(Lexer.getWhitespaceRegExp()).append(']');
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   727
                }
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   728
                skip(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   729
                return true;
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   730
            }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   731
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   732
        case 'S':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   733
            if (RegExpFactory.usesJavaUtilRegex()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   734
                sb.setLength(sb.length() - 1);
16274
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   735
                // In negative class we must use intersection to get double negation ("not anything else than space")
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   736
                sb.append(inNegativeClass ? "&&[" : "[^").append(Lexer.getWhitespaceRegExp()).append(']');
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   737
                skip(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   738
                return true;
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   739
            }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   740
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   741
        case 'd':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   742
        case 'D':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   743
        case 'w':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   744
        case 'W':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   745
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   746
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   747
        default:
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   748
            return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   749
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   750
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   751
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   752
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   753
     * CharacterClass ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   754
     *      [ [lookahead {^}] ClassRanges ]
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   755
     *      [ ^ ClassRanges ]
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   756
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   757
    private boolean characterClass() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   758
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   759
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   760
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   761
        if (ch0 == '[') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   762
            try {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   763
                inCharClass = true;
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   764
                push(']');
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   765
                commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   766
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   767
                if (ch0 == '^') {
16274
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   768
                    inNegativeClass = true;
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   769
                    commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   770
                }
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   771
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   772
                if (classRanges() && ch0 == ']') {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   773
                    pop(']');
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   774
                    return commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   775
                }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   776
            } finally {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   777
                inCharClass = false;  // no nested character classes in JavaScript
16274
c3f35c5e0d1c 8008370: coffee script compiler doesn't work with Nashorn
hannesw
parents: 16271
diff changeset
   778
                inNegativeClass = false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   779
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   780
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   781
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   782
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   783
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   784
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   785
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   786
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   787
     * ClassRanges ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   788
     *      [empty]
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   789
     *      NonemptyClassRanges
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   790
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   791
    private boolean classRanges() {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   792
        nonemptyClassRanges();
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   793
        return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   794
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   795
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   796
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   797
     * NonemptyClassRanges ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   798
     *      ClassAtom
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   799
     *      ClassAtom NonemptyClassRangesNoDash
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   800
     *      ClassAtom - ClassAtom ClassRanges
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   801
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   802
    private boolean nonemptyClassRanges() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   803
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   804
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   805
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   806
        if (classAtom()) {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   807
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   808
            if (ch0 == '-') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   809
                commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   810
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   811
                if (classAtom() && classRanges()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   812
                    return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   813
                }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   814
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   815
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   816
            nonemptyClassRangesNoDash();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   817
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   818
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   819
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   820
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   821
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   822
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   823
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   824
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   825
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   826
     * NonemptyClassRangesNoDash ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   827
     *      ClassAtom
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   828
     *      ClassAtomNoDash NonemptyClassRangesNoDash
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   829
     *      ClassAtomNoDash - ClassAtom ClassRanges
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   830
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   831
    private boolean nonemptyClassRangesNoDash() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   832
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   833
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   834
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   835
        if (classAtomNoDash()) {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   836
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   837
            // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   838
            if (ch0 == '-') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   839
               commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   840
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   841
               if (classAtom() && classRanges()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   842
                   return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   843
               }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   844
               //fallthru
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   845
           }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   846
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   847
            nonemptyClassRangesNoDash();
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   848
            return true; // still a class atom
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   849
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   850
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   851
        if (classAtom()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   852
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   853
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   854
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   855
        restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   856
        return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   857
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   858
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   859
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   860
     * ClassAtom : - ClassAtomNoDash
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   861
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   862
    private boolean classAtom() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   863
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   864
        if (ch0 == '-') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   865
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   866
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   867
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   868
        return classAtomNoDash();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   869
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   870
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   871
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   872
     * ClassAtomNoDash ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   873
     *      SourceCharacter but not one of \ or ] or -
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   874
     *      \ ClassEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   875
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   876
    private boolean classAtomNoDash() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   877
        final int startIn  = position;
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   878
        final int startOut = sb.length();
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   879
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   880
        switch (ch0) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   881
        case ']':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   882
        case '-':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   883
        case '\0':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   884
            return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   885
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   886
        case '[':
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   887
            // unescaped left square bracket - add escape
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   888
            sb.append('\\');
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   889
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   890
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   891
        case '\\':
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   892
            commit(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   893
            if (classEscape()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   894
                return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   895
            }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   896
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   897
            restart(startIn, startOut);
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   898
            return false;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   899
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   900
        default:
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   901
            return commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   902
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   903
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   904
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   905
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   906
     * ClassEscape ::
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   907
     *      DecimalEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   908
     *      b
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   909
     *      CharacterEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   910
     *      CharacterClassEscape
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   911
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   912
    private boolean classEscape() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   913
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   914
        if (decimalEscape()) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   915
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   916
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   917
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   918
        if (ch0 == 'b') {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   919
            sb.setLength(sb.length() - 1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   920
            sb.append('\b');
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   921
            skip(1);
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   922
            return true;
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   923
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   924
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   925
        // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   926
        return characterEscape() || characterClassEscape() || identityEscape();
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   927
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   928
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   929
    /*
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   930
     * DecimalDigits
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   931
     */
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   932
    private boolean decimalDigits() {
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   933
        if (!isDecimalDigit(ch0)) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   934
            return false;
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   935
        }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   936
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   937
        while (isDecimalDigit(ch0)) {
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   938
            commit(1);
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   939
        }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   940
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   941
        return true;
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   942
    }
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   943
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   944
    private void unicode(final int value, final StringBuilder buffer) {
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   945
        final String hex = Integer.toHexString(value);
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   946
        buffer.append('u');
16271
4817d7bb7434 8009240: RegExpScanner code is inefficient and too complex
hannesw
parents: 16258
diff changeset
   947
        for (int i = 0; i < 4 - hex.length(); i++) {
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   948
            buffer.append('0');
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   949
        }
16938
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   950
        buffer.append(hex);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   951
    }
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   952
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   953
    // Convert what would have been a backreference into a unicode escape, or a number literal, or both.
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   954
    private void octalOrLiteral(final String numberLiteral, final StringBuilder buffer) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   955
        final int length = numberLiteral.length();
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   956
        int octalValue = 0;
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   957
        int pos = 0;
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   958
        // Maximum value for octal escape is 0377 (255) so we stop the loop at 32
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   959
        while (pos < length && octalValue < 0x20) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   960
            final char ch = numberLiteral.charAt(pos);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   961
            if (isOctalDigit(ch)) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   962
                octalValue = octalValue * 8 + ch - '0';
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   963
            } else {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   964
                break;
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   965
            }
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   966
            pos++;
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   967
        }
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   968
        if (octalValue > 0) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   969
            buffer.append('\\');
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   970
            unicode(octalValue, buffer);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   971
            buffer.append(numberLiteral.substring(pos));
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   972
        } else {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   973
            buffer.append(numberLiteral);
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   974
        }
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   975
    }
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   976
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   977
    private static boolean isOctalDigit(final char ch) {
1a8ffed97564 8011714: Regexp decimal escape handling still not correct
hannesw
parents: 16781
diff changeset
   978
        return ch >= '0' && ch <= '7';
16147
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   979
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   980
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   981
    private static boolean isDecimalDigit(final char ch) {
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   982
        return ch >= '0' && ch <= '9';
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   983
    }
e63b63819133 8005403: Open-source Nashorn
jlaskey
parents:
diff changeset
   984
}