jaxp/src/com/sun/org/apache/regexp/internal/RECompiler.java
author ehelin
Mon, 31 Mar 2014 14:02:40 +0200
changeset 23544 e6362a5ba011
parent 12457 c348e06f0e82
permissions -rw-r--r--
8033251: Use DWARF debug symbols for Linux 32-bit as default Reviewed-by: dcubed, dholmes, coleenp
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6
7f561c08de6b Initial load
duke
parents:
diff changeset
     1
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     2
 * reserved comment block
7f561c08de6b Initial load
duke
parents:
diff changeset
     3
 * DO NOT REMOVE OR ALTER!
7f561c08de6b Initial load
duke
parents:
diff changeset
     4
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
     5
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     6
 * Copyright 1999-2004 The Apache Software Foundation.
7f561c08de6b Initial load
duke
parents:
diff changeset
     7
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
     8
 * Licensed under the Apache License, Version 2.0 (the "License");
7f561c08de6b Initial load
duke
parents:
diff changeset
     9
 * you may not use this file except in compliance with the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    10
 * You may obtain a copy of the License at
7f561c08de6b Initial load
duke
parents:
diff changeset
    11
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    12
 *     http://www.apache.org/licenses/LICENSE-2.0
7f561c08de6b Initial load
duke
parents:
diff changeset
    13
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    14
 * Unless required by applicable law or agreed to in writing, software
7f561c08de6b Initial load
duke
parents:
diff changeset
    15
 * distributed under the License is distributed on an "AS IS" BASIS,
7f561c08de6b Initial load
duke
parents:
diff changeset
    16
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
7f561c08de6b Initial load
duke
parents:
diff changeset
    17
 * See the License for the specific language governing permissions and
7f561c08de6b Initial load
duke
parents:
diff changeset
    18
 * limitations under the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    19
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    20
7f561c08de6b Initial load
duke
parents:
diff changeset
    21
package com.sun.org.apache.regexp.internal;
7f561c08de6b Initial load
duke
parents:
diff changeset
    22
7f561c08de6b Initial load
duke
parents:
diff changeset
    23
import com.sun.org.apache.regexp.internal.RE;
7f561c08de6b Initial load
duke
parents:
diff changeset
    24
import java.util.Hashtable;
7f561c08de6b Initial load
duke
parents:
diff changeset
    25
7f561c08de6b Initial load
duke
parents:
diff changeset
    26
/**
7f561c08de6b Initial load
duke
parents:
diff changeset
    27
 * A regular expression compiler class.  This class compiles a pattern string into a
7f561c08de6b Initial load
duke
parents:
diff changeset
    28
 * regular expression program interpretable by the RE evaluator class.  The 'recompile'
7f561c08de6b Initial load
duke
parents:
diff changeset
    29
 * command line tool uses this compiler to pre-compile regular expressions for use
7f561c08de6b Initial load
duke
parents:
diff changeset
    30
 * with RE.  For a description of the syntax accepted by RECompiler and what you can
7f561c08de6b Initial load
duke
parents:
diff changeset
    31
 * do with regular expressions, see the documentation for the RE matcher class.
7f561c08de6b Initial load
duke
parents:
diff changeset
    32
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    33
 * @see RE
7f561c08de6b Initial load
duke
parents:
diff changeset
    34
 * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
    35
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    36
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
7f561c08de6b Initial load
duke
parents:
diff changeset
    37
 * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
7f561c08de6b Initial load
duke
parents:
diff changeset
    38
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    39
public class RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
    40
{
7f561c08de6b Initial load
duke
parents:
diff changeset
    41
    // The compiled program
7f561c08de6b Initial load
duke
parents:
diff changeset
    42
    char[] instruction;                                 // The compiled RE 'program' instruction buffer
7f561c08de6b Initial load
duke
parents:
diff changeset
    43
    int lenInstruction;                                 // The amount of the program buffer currently in use
7f561c08de6b Initial load
duke
parents:
diff changeset
    44
7f561c08de6b Initial load
duke
parents:
diff changeset
    45
    // Input state for compiling regular expression
7f561c08de6b Initial load
duke
parents:
diff changeset
    46
    String pattern;                                     // Input string
7f561c08de6b Initial load
duke
parents:
diff changeset
    47
    int len;                                            // Length of the pattern string
7f561c08de6b Initial load
duke
parents:
diff changeset
    48
    int idx;                                            // Current input index into ac
7f561c08de6b Initial load
duke
parents:
diff changeset
    49
    int parens;                                         // Total number of paren pairs
7f561c08de6b Initial load
duke
parents:
diff changeset
    50
7f561c08de6b Initial load
duke
parents:
diff changeset
    51
    // Node flags
7f561c08de6b Initial load
duke
parents:
diff changeset
    52
    static final int NODE_NORMAL   = 0;                 // No flags (nothing special)
7f561c08de6b Initial load
duke
parents:
diff changeset
    53
    static final int NODE_NULLABLE = 1;                 // True if node is potentially null
7f561c08de6b Initial load
duke
parents:
diff changeset
    54
    static final int NODE_TOPLEVEL = 2;                 // True if top level expr
7f561c08de6b Initial load
duke
parents:
diff changeset
    55
7f561c08de6b Initial load
duke
parents:
diff changeset
    56
    // Special types of 'escapes'
7f561c08de6b Initial load
duke
parents:
diff changeset
    57
    static final int ESC_MASK      = 0xffff0;           // Escape complexity mask
7f561c08de6b Initial load
duke
parents:
diff changeset
    58
    static final int ESC_BACKREF   = 0xfffff;           // Escape is really a backreference
7f561c08de6b Initial load
duke
parents:
diff changeset
    59
    static final int ESC_COMPLEX   = 0xffffe;           // Escape isn't really a true character
7f561c08de6b Initial load
duke
parents:
diff changeset
    60
    static final int ESC_CLASS     = 0xffffd;           // Escape represents a whole class of characters
7f561c08de6b Initial load
duke
parents:
diff changeset
    61
7f561c08de6b Initial load
duke
parents:
diff changeset
    62
    // {m,n} stacks
7f561c08de6b Initial load
duke
parents:
diff changeset
    63
    int maxBrackets = 10;                               // Maximum number of bracket pairs
7f561c08de6b Initial load
duke
parents:
diff changeset
    64
    static final int bracketUnbounded = -1;             // Unbounded value
7f561c08de6b Initial load
duke
parents:
diff changeset
    65
    int brackets = 0;                                   // Number of bracket sets
7f561c08de6b Initial load
duke
parents:
diff changeset
    66
    int[] bracketStart = null;                          // Starting point
7f561c08de6b Initial load
duke
parents:
diff changeset
    67
    int[] bracketEnd = null;                            // Ending point
7f561c08de6b Initial load
duke
parents:
diff changeset
    68
    int[] bracketMin = null;                            // Minimum number of matches
7f561c08de6b Initial load
duke
parents:
diff changeset
    69
    int[] bracketOpt = null;                            // Additional optional matches
7f561c08de6b Initial load
duke
parents:
diff changeset
    70
7f561c08de6b Initial load
duke
parents:
diff changeset
    71
    // Lookup table for POSIX character class names
7f561c08de6b Initial load
duke
parents:
diff changeset
    72
    static Hashtable hashPOSIX = new Hashtable();
7f561c08de6b Initial load
duke
parents:
diff changeset
    73
    static
7f561c08de6b Initial load
duke
parents:
diff changeset
    74
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    75
        hashPOSIX.put("alnum",     new Character(RE.POSIX_CLASS_ALNUM));
7f561c08de6b Initial load
duke
parents:
diff changeset
    76
        hashPOSIX.put("alpha",     new Character(RE.POSIX_CLASS_ALPHA));
7f561c08de6b Initial load
duke
parents:
diff changeset
    77
        hashPOSIX.put("blank",     new Character(RE.POSIX_CLASS_BLANK));
7f561c08de6b Initial load
duke
parents:
diff changeset
    78
        hashPOSIX.put("cntrl",     new Character(RE.POSIX_CLASS_CNTRL));
7f561c08de6b Initial load
duke
parents:
diff changeset
    79
        hashPOSIX.put("digit",     new Character(RE.POSIX_CLASS_DIGIT));
7f561c08de6b Initial load
duke
parents:
diff changeset
    80
        hashPOSIX.put("graph",     new Character(RE.POSIX_CLASS_GRAPH));
7f561c08de6b Initial load
duke
parents:
diff changeset
    81
        hashPOSIX.put("lower",     new Character(RE.POSIX_CLASS_LOWER));
7f561c08de6b Initial load
duke
parents:
diff changeset
    82
        hashPOSIX.put("print",     new Character(RE.POSIX_CLASS_PRINT));
7f561c08de6b Initial load
duke
parents:
diff changeset
    83
        hashPOSIX.put("punct",     new Character(RE.POSIX_CLASS_PUNCT));
7f561c08de6b Initial load
duke
parents:
diff changeset
    84
        hashPOSIX.put("space",     new Character(RE.POSIX_CLASS_SPACE));
7f561c08de6b Initial load
duke
parents:
diff changeset
    85
        hashPOSIX.put("upper",     new Character(RE.POSIX_CLASS_UPPER));
7f561c08de6b Initial load
duke
parents:
diff changeset
    86
        hashPOSIX.put("xdigit",    new Character(RE.POSIX_CLASS_XDIGIT));
7f561c08de6b Initial load
duke
parents:
diff changeset
    87
        hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART));
7f561c08de6b Initial load
duke
parents:
diff changeset
    88
        hashPOSIX.put("javapart",  new Character(RE.POSIX_CLASS_JPART));
7f561c08de6b Initial load
duke
parents:
diff changeset
    89
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    90
7f561c08de6b Initial load
duke
parents:
diff changeset
    91
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    92
     * Constructor.  Creates (initially empty) storage for a regular expression program.
7f561c08de6b Initial load
duke
parents:
diff changeset
    93
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
    94
    public RECompiler()
7f561c08de6b Initial load
duke
parents:
diff changeset
    95
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    96
        // Start off with a generous, yet reasonable, initial size
7f561c08de6b Initial load
duke
parents:
diff changeset
    97
        instruction = new char[128];
7f561c08de6b Initial load
duke
parents:
diff changeset
    98
        lenInstruction = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
    99
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   100
7f561c08de6b Initial load
duke
parents:
diff changeset
   101
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   102
     * Ensures that n more characters can fit in the program buffer.
7f561c08de6b Initial load
duke
parents:
diff changeset
   103
     * If n more can't fit, then the size is doubled until it can.
7f561c08de6b Initial load
duke
parents:
diff changeset
   104
     * @param n Number of additional characters to ensure will fit.
7f561c08de6b Initial load
duke
parents:
diff changeset
   105
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   106
    void ensure(int n)
7f561c08de6b Initial load
duke
parents:
diff changeset
   107
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   108
        // Get current program length
7f561c08de6b Initial load
duke
parents:
diff changeset
   109
        int curlen = instruction.length;
7f561c08de6b Initial load
duke
parents:
diff changeset
   110
7f561c08de6b Initial load
duke
parents:
diff changeset
   111
        // If the current length + n more is too much
7f561c08de6b Initial load
duke
parents:
diff changeset
   112
        if (lenInstruction + n >= curlen)
7f561c08de6b Initial load
duke
parents:
diff changeset
   113
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   114
            // Double the size of the program array until n more will fit
7f561c08de6b Initial load
duke
parents:
diff changeset
   115
            while (lenInstruction + n >= curlen)
7f561c08de6b Initial load
duke
parents:
diff changeset
   116
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   117
                curlen *= 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
   118
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   119
7f561c08de6b Initial load
duke
parents:
diff changeset
   120
            // Allocate new program array and move data into it
7f561c08de6b Initial load
duke
parents:
diff changeset
   121
            char[] newInstruction = new char[curlen];
7f561c08de6b Initial load
duke
parents:
diff changeset
   122
            System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction);
7f561c08de6b Initial load
duke
parents:
diff changeset
   123
            instruction = newInstruction;
7f561c08de6b Initial load
duke
parents:
diff changeset
   124
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   125
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   126
7f561c08de6b Initial load
duke
parents:
diff changeset
   127
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   128
     * Emit a single character into the program stream.
7f561c08de6b Initial load
duke
parents:
diff changeset
   129
     * @param c Character to add
7f561c08de6b Initial load
duke
parents:
diff changeset
   130
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   131
    void emit(char c)
7f561c08de6b Initial load
duke
parents:
diff changeset
   132
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   133
        // Make room for character
7f561c08de6b Initial load
duke
parents:
diff changeset
   134
        ensure(1);
7f561c08de6b Initial load
duke
parents:
diff changeset
   135
7f561c08de6b Initial load
duke
parents:
diff changeset
   136
        // Add character
7f561c08de6b Initial load
duke
parents:
diff changeset
   137
        instruction[lenInstruction++] = c;
7f561c08de6b Initial load
duke
parents:
diff changeset
   138
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   139
7f561c08de6b Initial load
duke
parents:
diff changeset
   140
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   141
     * Inserts a node with a given opcode and opdata at insertAt.  The node relative next
7f561c08de6b Initial load
duke
parents:
diff changeset
   142
     * pointer is initialized to 0.
7f561c08de6b Initial load
duke
parents:
diff changeset
   143
     * @param opcode Opcode for new node
7f561c08de6b Initial load
duke
parents:
diff changeset
   144
     * @param opdata Opdata for new node (only the low 16 bits are currently used)
7f561c08de6b Initial load
duke
parents:
diff changeset
   145
     * @param insertAt Index at which to insert the new node in the program
7f561c08de6b Initial load
duke
parents:
diff changeset
   146
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   147
    void nodeInsert(char opcode, int opdata, int insertAt)
7f561c08de6b Initial load
duke
parents:
diff changeset
   148
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   149
        // Make room for a new node
7f561c08de6b Initial load
duke
parents:
diff changeset
   150
        ensure(RE.nodeSize);
7f561c08de6b Initial load
duke
parents:
diff changeset
   151
7f561c08de6b Initial load
duke
parents:
diff changeset
   152
        // Move everything from insertAt to the end down nodeSize elements
7f561c08de6b Initial load
duke
parents:
diff changeset
   153
        System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt);
7f561c08de6b Initial load
duke
parents:
diff changeset
   154
        instruction[insertAt + RE.offsetOpcode] = opcode;
7f561c08de6b Initial load
duke
parents:
diff changeset
   155
        instruction[insertAt + RE.offsetOpdata] = (char)opdata;
7f561c08de6b Initial load
duke
parents:
diff changeset
   156
        instruction[insertAt + RE.offsetNext] = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   157
        lenInstruction += RE.nodeSize;
7f561c08de6b Initial load
duke
parents:
diff changeset
   158
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   159
7f561c08de6b Initial load
duke
parents:
diff changeset
   160
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   161
     * Appends a node to the end of a node chain
7f561c08de6b Initial load
duke
parents:
diff changeset
   162
     * @param node Start of node chain to traverse
7f561c08de6b Initial load
duke
parents:
diff changeset
   163
     * @param pointTo Node to have the tail of the chain point to
7f561c08de6b Initial load
duke
parents:
diff changeset
   164
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   165
    void setNextOfEnd(int node, int pointTo)
7f561c08de6b Initial load
duke
parents:
diff changeset
   166
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   167
        // Traverse the chain until the next offset is 0
7f561c08de6b Initial load
duke
parents:
diff changeset
   168
        int next = instruction[node + RE.offsetNext];
7f561c08de6b Initial load
duke
parents:
diff changeset
   169
        // while the 'node' is not the last in the chain
7f561c08de6b Initial load
duke
parents:
diff changeset
   170
        // and the 'node' is not the last in the program.
7f561c08de6b Initial load
duke
parents:
diff changeset
   171
        while ( next != 0 && node < lenInstruction )
7f561c08de6b Initial load
duke
parents:
diff changeset
   172
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   173
            // if the node we are supposed to point to is in the chain then
7f561c08de6b Initial load
duke
parents:
diff changeset
   174
            // point to the end of the program instead.
7f561c08de6b Initial load
duke
parents:
diff changeset
   175
            // Michael McCallum <gholam@xtra.co.nz>
7f561c08de6b Initial load
duke
parents:
diff changeset
   176
            // FIXME: // This is a _hack_ to stop infinite programs.
7f561c08de6b Initial load
duke
parents:
diff changeset
   177
            // I believe that the implementation of the reluctant matches is wrong but
7f561c08de6b Initial load
duke
parents:
diff changeset
   178
            // have not worked out a better way yet.
7f561c08de6b Initial load
duke
parents:
diff changeset
   179
            if ( node == pointTo ) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   180
              pointTo = lenInstruction;
7f561c08de6b Initial load
duke
parents:
diff changeset
   181
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   182
            node += next;
7f561c08de6b Initial load
duke
parents:
diff changeset
   183
            next = instruction[node + RE.offsetNext];
7f561c08de6b Initial load
duke
parents:
diff changeset
   184
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   185
        // if we have reached the end of the program then dont set the pointTo.
7f561c08de6b Initial load
duke
parents:
diff changeset
   186
        // im not sure if this will break any thing but passes all the tests.
7f561c08de6b Initial load
duke
parents:
diff changeset
   187
        if ( node < lenInstruction ) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   188
            // Point the last node in the chain to pointTo.
7f561c08de6b Initial load
duke
parents:
diff changeset
   189
            instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
7f561c08de6b Initial load
duke
parents:
diff changeset
   190
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   191
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   192
7f561c08de6b Initial load
duke
parents:
diff changeset
   193
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   194
     * Adds a new node
7f561c08de6b Initial load
duke
parents:
diff changeset
   195
     * @param opcode Opcode for node
7f561c08de6b Initial load
duke
parents:
diff changeset
   196
     * @param opdata Opdata for node (only the low 16 bits are currently used)
7f561c08de6b Initial load
duke
parents:
diff changeset
   197
     * @return Index of new node in program
7f561c08de6b Initial load
duke
parents:
diff changeset
   198
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   199
    int node(char opcode, int opdata)
7f561c08de6b Initial load
duke
parents:
diff changeset
   200
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   201
        // Make room for a new node
7f561c08de6b Initial load
duke
parents:
diff changeset
   202
        ensure(RE.nodeSize);
7f561c08de6b Initial load
duke
parents:
diff changeset
   203
7f561c08de6b Initial load
duke
parents:
diff changeset
   204
        // Add new node at end
7f561c08de6b Initial load
duke
parents:
diff changeset
   205
        instruction[lenInstruction + RE.offsetOpcode] = opcode;
7f561c08de6b Initial load
duke
parents:
diff changeset
   206
        instruction[lenInstruction + RE.offsetOpdata] = (char)opdata;
7f561c08de6b Initial load
duke
parents:
diff changeset
   207
        instruction[lenInstruction + RE.offsetNext] = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   208
        lenInstruction += RE.nodeSize;
7f561c08de6b Initial load
duke
parents:
diff changeset
   209
7f561c08de6b Initial load
duke
parents:
diff changeset
   210
        // Return index of new node
7f561c08de6b Initial load
duke
parents:
diff changeset
   211
        return lenInstruction - RE.nodeSize;
7f561c08de6b Initial load
duke
parents:
diff changeset
   212
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   213
7f561c08de6b Initial load
duke
parents:
diff changeset
   214
7f561c08de6b Initial load
duke
parents:
diff changeset
   215
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   216
     * Throws a new internal error exception
7f561c08de6b Initial load
duke
parents:
diff changeset
   217
     * @exception Error Thrown in the event of an internal error.
7f561c08de6b Initial load
duke
parents:
diff changeset
   218
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   219
    void internalError() throws Error
7f561c08de6b Initial load
duke
parents:
diff changeset
   220
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   221
        throw new Error("Internal error!");
7f561c08de6b Initial load
duke
parents:
diff changeset
   222
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   223
7f561c08de6b Initial load
duke
parents:
diff changeset
   224
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   225
     * Throws a new syntax error exception
7f561c08de6b Initial load
duke
parents:
diff changeset
   226
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   227
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   228
    void syntaxError(String s) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   229
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   230
        throw new RESyntaxException(s);
7f561c08de6b Initial load
duke
parents:
diff changeset
   231
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   232
7f561c08de6b Initial load
duke
parents:
diff changeset
   233
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   234
     * Allocate storage for brackets only as needed
7f561c08de6b Initial load
duke
parents:
diff changeset
   235
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   236
    void allocBrackets()
7f561c08de6b Initial load
duke
parents:
diff changeset
   237
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   238
        // Allocate bracket stacks if not already done
7f561c08de6b Initial load
duke
parents:
diff changeset
   239
        if (bracketStart == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
   240
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   241
            // Allocate storage
7f561c08de6b Initial load
duke
parents:
diff changeset
   242
            bracketStart = new int[maxBrackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
   243
            bracketEnd   = new int[maxBrackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
   244
            bracketMin   = new int[maxBrackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
   245
            bracketOpt   = new int[maxBrackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
   246
7f561c08de6b Initial load
duke
parents:
diff changeset
   247
            // Initialize to invalid values
7f561c08de6b Initial load
duke
parents:
diff changeset
   248
            for (int i = 0; i < maxBrackets; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   249
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   250
                bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   251
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   252
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   253
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   254
7f561c08de6b Initial load
duke
parents:
diff changeset
   255
    /** Enlarge storage for brackets only as needed. */
7f561c08de6b Initial load
duke
parents:
diff changeset
   256
    synchronized void reallocBrackets() {
7f561c08de6b Initial load
duke
parents:
diff changeset
   257
        // trick the tricky
7f561c08de6b Initial load
duke
parents:
diff changeset
   258
        if (bracketStart == null) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   259
            allocBrackets();
7f561c08de6b Initial load
duke
parents:
diff changeset
   260
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   261
7f561c08de6b Initial load
duke
parents:
diff changeset
   262
        int new_size = maxBrackets * 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
   263
        int[] new_bS = new int[new_size];
7f561c08de6b Initial load
duke
parents:
diff changeset
   264
        int[] new_bE = new int[new_size];
7f561c08de6b Initial load
duke
parents:
diff changeset
   265
        int[] new_bM = new int[new_size];
7f561c08de6b Initial load
duke
parents:
diff changeset
   266
        int[] new_bO = new int[new_size];
7f561c08de6b Initial load
duke
parents:
diff changeset
   267
        // Initialize to invalid values
7f561c08de6b Initial load
duke
parents:
diff changeset
   268
        for (int i=brackets; i<new_size; i++) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   269
            new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   270
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   271
        System.arraycopy(bracketStart,0, new_bS,0, brackets);
7f561c08de6b Initial load
duke
parents:
diff changeset
   272
        System.arraycopy(bracketEnd,0,   new_bE,0, brackets);
7f561c08de6b Initial load
duke
parents:
diff changeset
   273
        System.arraycopy(bracketMin,0,   new_bM,0, brackets);
7f561c08de6b Initial load
duke
parents:
diff changeset
   274
        System.arraycopy(bracketOpt,0,   new_bO,0, brackets);
7f561c08de6b Initial load
duke
parents:
diff changeset
   275
        bracketStart = new_bS;
7f561c08de6b Initial load
duke
parents:
diff changeset
   276
        bracketEnd   = new_bE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   277
        bracketMin   = new_bM;
7f561c08de6b Initial load
duke
parents:
diff changeset
   278
        bracketOpt   = new_bO;
7f561c08de6b Initial load
duke
parents:
diff changeset
   279
        maxBrackets  = new_size;
7f561c08de6b Initial load
duke
parents:
diff changeset
   280
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   281
7f561c08de6b Initial load
duke
parents:
diff changeset
   282
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   283
     * Match bracket {m,n} expression put results in bracket member variables
7f561c08de6b Initial load
duke
parents:
diff changeset
   284
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   285
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   286
    void bracket() throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   287
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   288
        // Current character must be a '{'
7f561c08de6b Initial load
duke
parents:
diff changeset
   289
        if (idx >= len || pattern.charAt(idx++) != '{')
7f561c08de6b Initial load
duke
parents:
diff changeset
   290
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   291
            internalError();
7f561c08de6b Initial load
duke
parents:
diff changeset
   292
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   293
7f561c08de6b Initial load
duke
parents:
diff changeset
   294
        // Next char must be a digit
7f561c08de6b Initial load
duke
parents:
diff changeset
   295
        if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
   296
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   297
            syntaxError("Expected digit");
7f561c08de6b Initial load
duke
parents:
diff changeset
   298
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   299
7f561c08de6b Initial load
duke
parents:
diff changeset
   300
        // Get min ('m' of {m,n}) number
7f561c08de6b Initial load
duke
parents:
diff changeset
   301
        StringBuffer number = new StringBuffer();
7f561c08de6b Initial load
duke
parents:
diff changeset
   302
        while (idx < len && Character.isDigit(pattern.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
   303
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   304
            number.append(pattern.charAt(idx++));
7f561c08de6b Initial load
duke
parents:
diff changeset
   305
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   306
        try
7f561c08de6b Initial load
duke
parents:
diff changeset
   307
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   308
            bracketMin[brackets] = Integer.parseInt(number.toString());
7f561c08de6b Initial load
duke
parents:
diff changeset
   309
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   310
        catch (NumberFormatException e)
7f561c08de6b Initial load
duke
parents:
diff changeset
   311
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   312
            syntaxError("Expected valid number");
7f561c08de6b Initial load
duke
parents:
diff changeset
   313
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   314
7f561c08de6b Initial load
duke
parents:
diff changeset
   315
        // If out of input, fail
7f561c08de6b Initial load
duke
parents:
diff changeset
   316
        if (idx >= len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   317
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   318
            syntaxError("Expected comma or right bracket");
7f561c08de6b Initial load
duke
parents:
diff changeset
   319
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   320
7f561c08de6b Initial load
duke
parents:
diff changeset
   321
        // If end of expr, optional limit is 0
7f561c08de6b Initial load
duke
parents:
diff changeset
   322
        if (pattern.charAt(idx) == '}')
7f561c08de6b Initial load
duke
parents:
diff changeset
   323
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   324
            idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   325
            bracketOpt[brackets] = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   326
            return;
7f561c08de6b Initial load
duke
parents:
diff changeset
   327
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   328
7f561c08de6b Initial load
duke
parents:
diff changeset
   329
        // Must have at least {m,} and maybe {m,n}.
7f561c08de6b Initial load
duke
parents:
diff changeset
   330
        if (idx >= len || pattern.charAt(idx++) != ',')
7f561c08de6b Initial load
duke
parents:
diff changeset
   331
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   332
            syntaxError("Expected comma");
7f561c08de6b Initial load
duke
parents:
diff changeset
   333
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   334
7f561c08de6b Initial load
duke
parents:
diff changeset
   335
        // If out of input, fail
7f561c08de6b Initial load
duke
parents:
diff changeset
   336
        if (idx >= len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   337
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   338
            syntaxError("Expected comma or right bracket");
7f561c08de6b Initial load
duke
parents:
diff changeset
   339
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   340
7f561c08de6b Initial load
duke
parents:
diff changeset
   341
        // If {m,} max is unlimited
7f561c08de6b Initial load
duke
parents:
diff changeset
   342
        if (pattern.charAt(idx) == '}')
7f561c08de6b Initial load
duke
parents:
diff changeset
   343
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   344
            idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   345
            bracketOpt[brackets] = bracketUnbounded;
7f561c08de6b Initial load
duke
parents:
diff changeset
   346
            return;
7f561c08de6b Initial load
duke
parents:
diff changeset
   347
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   348
7f561c08de6b Initial load
duke
parents:
diff changeset
   349
        // Next char must be a digit
7f561c08de6b Initial load
duke
parents:
diff changeset
   350
        if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
   351
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   352
            syntaxError("Expected digit");
7f561c08de6b Initial load
duke
parents:
diff changeset
   353
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   354
7f561c08de6b Initial load
duke
parents:
diff changeset
   355
        // Get max number
7f561c08de6b Initial load
duke
parents:
diff changeset
   356
        number.setLength(0);
7f561c08de6b Initial load
duke
parents:
diff changeset
   357
        while (idx < len && Character.isDigit(pattern.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
   358
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   359
            number.append(pattern.charAt(idx++));
7f561c08de6b Initial load
duke
parents:
diff changeset
   360
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   361
        try
7f561c08de6b Initial load
duke
parents:
diff changeset
   362
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   363
            bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
   364
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   365
        catch (NumberFormatException e)
7f561c08de6b Initial load
duke
parents:
diff changeset
   366
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   367
            syntaxError("Expected valid number");
7f561c08de6b Initial load
duke
parents:
diff changeset
   368
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   369
7f561c08de6b Initial load
duke
parents:
diff changeset
   370
        // Optional repetitions must be >= 0
7f561c08de6b Initial load
duke
parents:
diff changeset
   371
        if (bracketOpt[brackets] < 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   372
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   373
            syntaxError("Bad range");
7f561c08de6b Initial load
duke
parents:
diff changeset
   374
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   375
7f561c08de6b Initial load
duke
parents:
diff changeset
   376
        // Must have close brace
7f561c08de6b Initial load
duke
parents:
diff changeset
   377
        if (idx >= len || pattern.charAt(idx++) != '}')
7f561c08de6b Initial load
duke
parents:
diff changeset
   378
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   379
            syntaxError("Missing close brace");
7f561c08de6b Initial load
duke
parents:
diff changeset
   380
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   381
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   382
7f561c08de6b Initial load
duke
parents:
diff changeset
   383
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   384
     * Match an escape sequence.  Handles quoted chars and octal escapes as well
7f561c08de6b Initial load
duke
parents:
diff changeset
   385
     * as normal escape characters.  Always advances the input stream by the
7f561c08de6b Initial load
duke
parents:
diff changeset
   386
     * right amount. This code "understands" the subtle difference between an
7f561c08de6b Initial load
duke
parents:
diff changeset
   387
     * octal escape and a backref.  You can access the type of ESC_CLASS or
7f561c08de6b Initial load
duke
parents:
diff changeset
   388
     * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1].
7f561c08de6b Initial load
duke
parents:
diff changeset
   389
     * @return ESC_* code or character if simple escape
7f561c08de6b Initial load
duke
parents:
diff changeset
   390
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   391
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   392
    int escape() throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   393
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   394
        // "Shouldn't" happen
7f561c08de6b Initial load
duke
parents:
diff changeset
   395
        if (pattern.charAt(idx) != '\\')
7f561c08de6b Initial load
duke
parents:
diff changeset
   396
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   397
            internalError();
7f561c08de6b Initial load
duke
parents:
diff changeset
   398
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   399
7f561c08de6b Initial load
duke
parents:
diff changeset
   400
        // Escape shouldn't occur as last character in string!
7f561c08de6b Initial load
duke
parents:
diff changeset
   401
        if (idx + 1 == len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   402
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   403
            syntaxError("Escape terminates string");
7f561c08de6b Initial load
duke
parents:
diff changeset
   404
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   405
7f561c08de6b Initial load
duke
parents:
diff changeset
   406
        // Switch on character after backslash
7f561c08de6b Initial load
duke
parents:
diff changeset
   407
        idx += 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
   408
        char escapeChar = pattern.charAt(idx - 1);
7f561c08de6b Initial load
duke
parents:
diff changeset
   409
        switch (escapeChar)
7f561c08de6b Initial load
duke
parents:
diff changeset
   410
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   411
            case RE.E_BOUND:
7f561c08de6b Initial load
duke
parents:
diff changeset
   412
            case RE.E_NBOUND:
7f561c08de6b Initial load
duke
parents:
diff changeset
   413
                return ESC_COMPLEX;
7f561c08de6b Initial load
duke
parents:
diff changeset
   414
7f561c08de6b Initial load
duke
parents:
diff changeset
   415
            case RE.E_ALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
   416
            case RE.E_NALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
   417
            case RE.E_SPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   418
            case RE.E_NSPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   419
            case RE.E_DIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
   420
            case RE.E_NDIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
   421
                return ESC_CLASS;
7f561c08de6b Initial load
duke
parents:
diff changeset
   422
7f561c08de6b Initial load
duke
parents:
diff changeset
   423
            case 'u':
7f561c08de6b Initial load
duke
parents:
diff changeset
   424
            case 'x':
7f561c08de6b Initial load
duke
parents:
diff changeset
   425
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   426
                    // Exact required hex digits for escape type
7f561c08de6b Initial load
duke
parents:
diff changeset
   427
                    int hexDigits = (escapeChar == 'u' ? 4 : 2);
7f561c08de6b Initial load
duke
parents:
diff changeset
   428
7f561c08de6b Initial load
duke
parents:
diff changeset
   429
                    // Parse up to hexDigits characters from input
7f561c08de6b Initial load
duke
parents:
diff changeset
   430
                    int val = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   431
                    for ( ; idx < len && hexDigits-- > 0; idx++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   432
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   433
                        // Get char
7f561c08de6b Initial load
duke
parents:
diff changeset
   434
                        char c = pattern.charAt(idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
   435
7f561c08de6b Initial load
duke
parents:
diff changeset
   436
                        // If it's a hexadecimal digit (0-9)
7f561c08de6b Initial load
duke
parents:
diff changeset
   437
                        if (c >= '0' && c <= '9')
7f561c08de6b Initial load
duke
parents:
diff changeset
   438
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   439
                            // Compute new value
7f561c08de6b Initial load
duke
parents:
diff changeset
   440
                            val = (val << 4) + c - '0';
7f561c08de6b Initial load
duke
parents:
diff changeset
   441
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   442
                        else
7f561c08de6b Initial load
duke
parents:
diff changeset
   443
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   444
                            // If it's a hexadecimal letter (a-f)
7f561c08de6b Initial load
duke
parents:
diff changeset
   445
                            c = Character.toLowerCase(c);
7f561c08de6b Initial load
duke
parents:
diff changeset
   446
                            if (c >= 'a' && c <= 'f')
7f561c08de6b Initial load
duke
parents:
diff changeset
   447
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   448
                                // Compute new value
7f561c08de6b Initial load
duke
parents:
diff changeset
   449
                                val = (val << 4) + (c - 'a') + 10;
7f561c08de6b Initial load
duke
parents:
diff changeset
   450
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   451
                            else
7f561c08de6b Initial load
duke
parents:
diff changeset
   452
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   453
                                // If it's not a valid digit or hex letter, the escape must be invalid
7f561c08de6b Initial load
duke
parents:
diff changeset
   454
                                // because hexDigits of input have not been absorbed yet.
7f561c08de6b Initial load
duke
parents:
diff changeset
   455
                                syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar);
7f561c08de6b Initial load
duke
parents:
diff changeset
   456
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   457
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   458
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   459
                    return val;
7f561c08de6b Initial load
duke
parents:
diff changeset
   460
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   461
7f561c08de6b Initial load
duke
parents:
diff changeset
   462
            case 't':
7f561c08de6b Initial load
duke
parents:
diff changeset
   463
                return '\t';
7f561c08de6b Initial load
duke
parents:
diff changeset
   464
7f561c08de6b Initial load
duke
parents:
diff changeset
   465
            case 'n':
7f561c08de6b Initial load
duke
parents:
diff changeset
   466
                return '\n';
7f561c08de6b Initial load
duke
parents:
diff changeset
   467
7f561c08de6b Initial load
duke
parents:
diff changeset
   468
            case 'r':
7f561c08de6b Initial load
duke
parents:
diff changeset
   469
                return '\r';
7f561c08de6b Initial load
duke
parents:
diff changeset
   470
7f561c08de6b Initial load
duke
parents:
diff changeset
   471
            case 'f':
7f561c08de6b Initial load
duke
parents:
diff changeset
   472
                return '\f';
7f561c08de6b Initial load
duke
parents:
diff changeset
   473
7f561c08de6b Initial load
duke
parents:
diff changeset
   474
            case '0':
7f561c08de6b Initial load
duke
parents:
diff changeset
   475
            case '1':
7f561c08de6b Initial load
duke
parents:
diff changeset
   476
            case '2':
7f561c08de6b Initial load
duke
parents:
diff changeset
   477
            case '3':
7f561c08de6b Initial load
duke
parents:
diff changeset
   478
            case '4':
7f561c08de6b Initial load
duke
parents:
diff changeset
   479
            case '5':
7f561c08de6b Initial load
duke
parents:
diff changeset
   480
            case '6':
7f561c08de6b Initial load
duke
parents:
diff changeset
   481
            case '7':
7f561c08de6b Initial load
duke
parents:
diff changeset
   482
            case '8':
7f561c08de6b Initial load
duke
parents:
diff changeset
   483
            case '9':
7f561c08de6b Initial load
duke
parents:
diff changeset
   484
7f561c08de6b Initial load
duke
parents:
diff changeset
   485
                // An octal escape starts with a 0 or has two digits in a row
7f561c08de6b Initial load
duke
parents:
diff changeset
   486
                if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0')
7f561c08de6b Initial load
duke
parents:
diff changeset
   487
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   488
                    // Handle \nnn octal escapes
7f561c08de6b Initial load
duke
parents:
diff changeset
   489
                    int val = escapeChar - '0';
7f561c08de6b Initial load
duke
parents:
diff changeset
   490
                    if (idx < len && Character.isDigit(pattern.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
   491
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   492
                        val = ((val << 3) + (pattern.charAt(idx++) - '0'));
7f561c08de6b Initial load
duke
parents:
diff changeset
   493
                        if (idx < len && Character.isDigit(pattern.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
   494
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   495
                            val = ((val << 3) + (pattern.charAt(idx++) - '0'));
7f561c08de6b Initial load
duke
parents:
diff changeset
   496
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   497
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   498
                    return val;
7f561c08de6b Initial load
duke
parents:
diff changeset
   499
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   500
7f561c08de6b Initial load
duke
parents:
diff changeset
   501
                // It's actually a backreference (\[1-9]), not an escape
7f561c08de6b Initial load
duke
parents:
diff changeset
   502
                return ESC_BACKREF;
7f561c08de6b Initial load
duke
parents:
diff changeset
   503
7f561c08de6b Initial load
duke
parents:
diff changeset
   504
            default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   505
7f561c08de6b Initial load
duke
parents:
diff changeset
   506
                // Simple quoting of a character
7f561c08de6b Initial load
duke
parents:
diff changeset
   507
                return escapeChar;
7f561c08de6b Initial load
duke
parents:
diff changeset
   508
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   509
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   510
7f561c08de6b Initial load
duke
parents:
diff changeset
   511
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   512
     * Compile a character class
7f561c08de6b Initial load
duke
parents:
diff changeset
   513
     * @return Index of class node
7f561c08de6b Initial load
duke
parents:
diff changeset
   514
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   515
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   516
    int characterClass() throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   517
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   518
        // Check for bad calling or empty class
7f561c08de6b Initial load
duke
parents:
diff changeset
   519
        if (pattern.charAt(idx) != '[')
7f561c08de6b Initial load
duke
parents:
diff changeset
   520
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   521
            internalError();
7f561c08de6b Initial load
duke
parents:
diff changeset
   522
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   523
7f561c08de6b Initial load
duke
parents:
diff changeset
   524
        // Check for unterminated or empty class
7f561c08de6b Initial load
duke
parents:
diff changeset
   525
        if ((idx + 1) >= len || pattern.charAt(++idx) == ']')
7f561c08de6b Initial load
duke
parents:
diff changeset
   526
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   527
            syntaxError("Empty or unterminated class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   528
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   529
7f561c08de6b Initial load
duke
parents:
diff changeset
   530
        // Check for POSIX character class
7f561c08de6b Initial load
duke
parents:
diff changeset
   531
        if (idx < len && pattern.charAt(idx) == ':')
7f561c08de6b Initial load
duke
parents:
diff changeset
   532
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   533
            // Skip colon
7f561c08de6b Initial load
duke
parents:
diff changeset
   534
            idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   535
7f561c08de6b Initial load
duke
parents:
diff changeset
   536
            // POSIX character classes are denoted with lowercase ASCII strings
7f561c08de6b Initial load
duke
parents:
diff changeset
   537
            int idxStart = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   538
            while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z')
7f561c08de6b Initial load
duke
parents:
diff changeset
   539
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   540
                idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   541
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   542
7f561c08de6b Initial load
duke
parents:
diff changeset
   543
            // Should be a ":]" to terminate the POSIX character class
7f561c08de6b Initial load
duke
parents:
diff changeset
   544
            if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']')
7f561c08de6b Initial load
duke
parents:
diff changeset
   545
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   546
                // Get character class
7f561c08de6b Initial load
duke
parents:
diff changeset
   547
                String charClass = pattern.substring(idxStart, idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
   548
7f561c08de6b Initial load
duke
parents:
diff changeset
   549
                // Select the POSIX class id
7f561c08de6b Initial load
duke
parents:
diff changeset
   550
                Character i = (Character)hashPOSIX.get(charClass);
7f561c08de6b Initial load
duke
parents:
diff changeset
   551
                if (i != null)
7f561c08de6b Initial load
duke
parents:
diff changeset
   552
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   553
                    // Move past colon and right bracket
7f561c08de6b Initial load
duke
parents:
diff changeset
   554
                    idx += 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
   555
7f561c08de6b Initial load
duke
parents:
diff changeset
   556
                    // Return new POSIX character class node
7f561c08de6b Initial load
duke
parents:
diff changeset
   557
                    return node(RE.OP_POSIXCLASS, i.charValue());
7f561c08de6b Initial load
duke
parents:
diff changeset
   558
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   559
                syntaxError("Invalid POSIX character class '" + charClass + "'");
7f561c08de6b Initial load
duke
parents:
diff changeset
   560
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   561
            syntaxError("Invalid POSIX character class syntax");
7f561c08de6b Initial load
duke
parents:
diff changeset
   562
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   563
7f561c08de6b Initial load
duke
parents:
diff changeset
   564
        // Try to build a class.  Create OP_ANYOF node
7f561c08de6b Initial load
duke
parents:
diff changeset
   565
        int ret = node(RE.OP_ANYOF, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
   566
7f561c08de6b Initial load
duke
parents:
diff changeset
   567
        // Parse class declaration
7f561c08de6b Initial load
duke
parents:
diff changeset
   568
        char CHAR_INVALID = Character.MAX_VALUE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   569
        char last = CHAR_INVALID;
7f561c08de6b Initial load
duke
parents:
diff changeset
   570
        char simpleChar = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   571
        boolean include = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
   572
        boolean definingRange = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
   573
        int idxFirst = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   574
        char rangeStart = Character.MIN_VALUE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   575
        char rangeEnd;
7f561c08de6b Initial load
duke
parents:
diff changeset
   576
        RERange range = new RERange();
7f561c08de6b Initial load
duke
parents:
diff changeset
   577
        while (idx < len && pattern.charAt(idx) != ']')
7f561c08de6b Initial load
duke
parents:
diff changeset
   578
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   579
7f561c08de6b Initial load
duke
parents:
diff changeset
   580
            switchOnCharacter:
7f561c08de6b Initial load
duke
parents:
diff changeset
   581
7f561c08de6b Initial load
duke
parents:
diff changeset
   582
            // Switch on character
7f561c08de6b Initial load
duke
parents:
diff changeset
   583
            switch (pattern.charAt(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
   584
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   585
                case '^':
7f561c08de6b Initial load
duke
parents:
diff changeset
   586
                    include = !include;
7f561c08de6b Initial load
duke
parents:
diff changeset
   587
                    if (idx == idxFirst)
7f561c08de6b Initial load
duke
parents:
diff changeset
   588
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   589
                        range.include(Character.MIN_VALUE, Character.MAX_VALUE, true);
7f561c08de6b Initial load
duke
parents:
diff changeset
   590
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   591
                    idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   592
                    continue;
7f561c08de6b Initial load
duke
parents:
diff changeset
   593
7f561c08de6b Initial load
duke
parents:
diff changeset
   594
                case '\\':
7f561c08de6b Initial load
duke
parents:
diff changeset
   595
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   596
                    // Escape always advances the stream
7f561c08de6b Initial load
duke
parents:
diff changeset
   597
                    int c;
7f561c08de6b Initial load
duke
parents:
diff changeset
   598
                    switch (c = escape ())
7f561c08de6b Initial load
duke
parents:
diff changeset
   599
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   600
                        case ESC_COMPLEX:
7f561c08de6b Initial load
duke
parents:
diff changeset
   601
                        case ESC_BACKREF:
7f561c08de6b Initial load
duke
parents:
diff changeset
   602
7f561c08de6b Initial load
duke
parents:
diff changeset
   603
                            // Word boundaries and backrefs not allowed in a character class!
7f561c08de6b Initial load
duke
parents:
diff changeset
   604
                            syntaxError("Bad character class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   605
7f561c08de6b Initial load
duke
parents:
diff changeset
   606
                        case ESC_CLASS:
7f561c08de6b Initial load
duke
parents:
diff changeset
   607
7f561c08de6b Initial load
duke
parents:
diff changeset
   608
                            // Classes can't be an endpoint of a range
7f561c08de6b Initial load
duke
parents:
diff changeset
   609
                            if (definingRange)
7f561c08de6b Initial load
duke
parents:
diff changeset
   610
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   611
                                syntaxError("Bad character class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   612
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   613
7f561c08de6b Initial load
duke
parents:
diff changeset
   614
                            // Handle specific type of class (some are ok)
7f561c08de6b Initial load
duke
parents:
diff changeset
   615
                            switch (pattern.charAt(idx - 1))
7f561c08de6b Initial load
duke
parents:
diff changeset
   616
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   617
                                case RE.E_NSPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   618
                                case RE.E_NDIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
   619
                                case RE.E_NALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
   620
                                    syntaxError("Bad character class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   621
7f561c08de6b Initial load
duke
parents:
diff changeset
   622
                                case RE.E_SPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   623
                                    range.include('\t', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   624
                                    range.include('\r', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   625
                                    range.include('\f', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   626
                                    range.include('\n', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   627
                                    range.include('\b', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   628
                                    range.include(' ', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   629
                                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   630
7f561c08de6b Initial load
duke
parents:
diff changeset
   631
                                case RE.E_ALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
   632
                                    range.include('a', 'z', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   633
                                    range.include('A', 'Z', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   634
                                    range.include('_', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   635
7f561c08de6b Initial load
duke
parents:
diff changeset
   636
                                    // Fall through!
7f561c08de6b Initial load
duke
parents:
diff changeset
   637
7f561c08de6b Initial load
duke
parents:
diff changeset
   638
                                case RE.E_DIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
   639
                                    range.include('0', '9', include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   640
                                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   641
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   642
7f561c08de6b Initial load
duke
parents:
diff changeset
   643
                            // Make last char invalid (can't be a range start)
7f561c08de6b Initial load
duke
parents:
diff changeset
   644
                            last = CHAR_INVALID;
7f561c08de6b Initial load
duke
parents:
diff changeset
   645
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   646
7f561c08de6b Initial load
duke
parents:
diff changeset
   647
                        default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   648
7f561c08de6b Initial load
duke
parents:
diff changeset
   649
                            // Escape is simple so treat as a simple char
7f561c08de6b Initial load
duke
parents:
diff changeset
   650
                            simpleChar = (char) c;
7f561c08de6b Initial load
duke
parents:
diff changeset
   651
                            break switchOnCharacter;
7f561c08de6b Initial load
duke
parents:
diff changeset
   652
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   653
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   654
                continue;
7f561c08de6b Initial load
duke
parents:
diff changeset
   655
7f561c08de6b Initial load
duke
parents:
diff changeset
   656
                case '-':
7f561c08de6b Initial load
duke
parents:
diff changeset
   657
7f561c08de6b Initial load
duke
parents:
diff changeset
   658
                    // Start a range if one isn't already started
7f561c08de6b Initial load
duke
parents:
diff changeset
   659
                    if (definingRange)
7f561c08de6b Initial load
duke
parents:
diff changeset
   660
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   661
                        syntaxError("Bad class range");
7f561c08de6b Initial load
duke
parents:
diff changeset
   662
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   663
                    definingRange = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
   664
7f561c08de6b Initial load
duke
parents:
diff changeset
   665
                    // If no last character, start of range is 0
7f561c08de6b Initial load
duke
parents:
diff changeset
   666
                    rangeStart = (last == CHAR_INVALID ? 0 : last);
7f561c08de6b Initial load
duke
parents:
diff changeset
   667
7f561c08de6b Initial load
duke
parents:
diff changeset
   668
                    // Premature end of range. define up to Character.MAX_VALUE
7f561c08de6b Initial load
duke
parents:
diff changeset
   669
                    if ((idx + 1) < len && pattern.charAt(++idx) == ']')
7f561c08de6b Initial load
duke
parents:
diff changeset
   670
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   671
                        simpleChar = Character.MAX_VALUE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   672
                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   673
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   674
                    continue;
7f561c08de6b Initial load
duke
parents:
diff changeset
   675
7f561c08de6b Initial load
duke
parents:
diff changeset
   676
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   677
                    simpleChar = pattern.charAt(idx++);
7f561c08de6b Initial load
duke
parents:
diff changeset
   678
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   679
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   680
7f561c08de6b Initial load
duke
parents:
diff changeset
   681
            // Handle simple character simpleChar
7f561c08de6b Initial load
duke
parents:
diff changeset
   682
            if (definingRange)
7f561c08de6b Initial load
duke
parents:
diff changeset
   683
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   684
                // if we are defining a range make it now
7f561c08de6b Initial load
duke
parents:
diff changeset
   685
                rangeEnd = simpleChar;
7f561c08de6b Initial load
duke
parents:
diff changeset
   686
7f561c08de6b Initial load
duke
parents:
diff changeset
   687
                // Actually create a range if the range is ok
7f561c08de6b Initial load
duke
parents:
diff changeset
   688
                if (rangeStart >= rangeEnd)
7f561c08de6b Initial load
duke
parents:
diff changeset
   689
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   690
                    syntaxError("Bad character class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   691
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   692
                range.include(rangeStart, rangeEnd, include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   693
7f561c08de6b Initial load
duke
parents:
diff changeset
   694
                // We are done defining the range
7f561c08de6b Initial load
duke
parents:
diff changeset
   695
                last = CHAR_INVALID;
7f561c08de6b Initial load
duke
parents:
diff changeset
   696
                definingRange = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
   697
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   698
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
   699
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   700
                // If simple character and not start of range, include it
7f561c08de6b Initial load
duke
parents:
diff changeset
   701
                if (idx >= len || pattern.charAt(idx) != '-')
7f561c08de6b Initial load
duke
parents:
diff changeset
   702
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   703
                    range.include(simpleChar, include);
7f561c08de6b Initial load
duke
parents:
diff changeset
   704
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   705
                last = simpleChar;
7f561c08de6b Initial load
duke
parents:
diff changeset
   706
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   707
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   708
7f561c08de6b Initial load
duke
parents:
diff changeset
   709
        // Shouldn't be out of input
7f561c08de6b Initial load
duke
parents:
diff changeset
   710
        if (idx == len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   711
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   712
            syntaxError("Unterminated character class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   713
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   714
7f561c08de6b Initial load
duke
parents:
diff changeset
   715
        // Absorb the ']' end of class marker
7f561c08de6b Initial load
duke
parents:
diff changeset
   716
        idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   717
7f561c08de6b Initial load
duke
parents:
diff changeset
   718
        // Emit character class definition
7f561c08de6b Initial load
duke
parents:
diff changeset
   719
        instruction[ret + RE.offsetOpdata] = (char)range.num;
7f561c08de6b Initial load
duke
parents:
diff changeset
   720
        for (int i = 0; i < range.num; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   721
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   722
            emit((char)range.minRange[i]);
7f561c08de6b Initial load
duke
parents:
diff changeset
   723
            emit((char)range.maxRange[i]);
7f561c08de6b Initial load
duke
parents:
diff changeset
   724
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   725
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
   726
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   727
7f561c08de6b Initial load
duke
parents:
diff changeset
   728
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   729
     * Absorb an atomic character string.  This method is a little tricky because
7f561c08de6b Initial load
duke
parents:
diff changeset
   730
     * it can un-include the last character of string if a closure operator follows.
7f561c08de6b Initial load
duke
parents:
diff changeset
   731
     * This is correct because *+? have higher precedence than concatentation (thus
7f561c08de6b Initial load
duke
parents:
diff changeset
   732
     * ABC* means AB(C*) and NOT (ABC)*).
7f561c08de6b Initial load
duke
parents:
diff changeset
   733
     * @return Index of new atom node
7f561c08de6b Initial load
duke
parents:
diff changeset
   734
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   735
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   736
    int atom() throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   737
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   738
        // Create a string node
7f561c08de6b Initial load
duke
parents:
diff changeset
   739
        int ret = node(RE.OP_ATOM, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
   740
7f561c08de6b Initial load
duke
parents:
diff changeset
   741
        // Length of atom
7f561c08de6b Initial load
duke
parents:
diff changeset
   742
        int lenAtom = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   743
7f561c08de6b Initial load
duke
parents:
diff changeset
   744
        // Loop while we've got input
7f561c08de6b Initial load
duke
parents:
diff changeset
   745
7f561c08de6b Initial load
duke
parents:
diff changeset
   746
        atomLoop:
7f561c08de6b Initial load
duke
parents:
diff changeset
   747
7f561c08de6b Initial load
duke
parents:
diff changeset
   748
        while (idx < len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   749
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   750
            // Is there a next char?
7f561c08de6b Initial load
duke
parents:
diff changeset
   751
            if ((idx + 1) < len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   752
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   753
                char c = pattern.charAt(idx + 1);
7f561c08de6b Initial load
duke
parents:
diff changeset
   754
7f561c08de6b Initial load
duke
parents:
diff changeset
   755
                // If the next 'char' is an escape, look past the whole escape
7f561c08de6b Initial load
duke
parents:
diff changeset
   756
                if (pattern.charAt(idx) == '\\')
7f561c08de6b Initial load
duke
parents:
diff changeset
   757
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   758
                    int idxEscape = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   759
                    escape();
7f561c08de6b Initial load
duke
parents:
diff changeset
   760
                    if (idx < len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   761
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   762
                        c = pattern.charAt(idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
   763
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   764
                    idx = idxEscape;
7f561c08de6b Initial load
duke
parents:
diff changeset
   765
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   766
7f561c08de6b Initial load
duke
parents:
diff changeset
   767
                // Switch on next char
7f561c08de6b Initial load
duke
parents:
diff changeset
   768
                switch (c)
7f561c08de6b Initial load
duke
parents:
diff changeset
   769
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   770
                    case '{':
7f561c08de6b Initial load
duke
parents:
diff changeset
   771
                    case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
   772
                    case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
   773
                    case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
   774
7f561c08de6b Initial load
duke
parents:
diff changeset
   775
                        // If the next character is a closure operator and our atom is non-empty, the
7f561c08de6b Initial load
duke
parents:
diff changeset
   776
                        // current character should bind to the closure operator rather than the atom
7f561c08de6b Initial load
duke
parents:
diff changeset
   777
                        if (lenAtom != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   778
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   779
                            break atomLoop;
7f561c08de6b Initial load
duke
parents:
diff changeset
   780
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   781
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   782
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   783
7f561c08de6b Initial load
duke
parents:
diff changeset
   784
            // Switch on current char
7f561c08de6b Initial load
duke
parents:
diff changeset
   785
            switch (pattern.charAt(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
   786
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   787
                case ']':
7f561c08de6b Initial load
duke
parents:
diff changeset
   788
                case '^':
7f561c08de6b Initial load
duke
parents:
diff changeset
   789
                case '$':
7f561c08de6b Initial load
duke
parents:
diff changeset
   790
                case '.':
7f561c08de6b Initial load
duke
parents:
diff changeset
   791
                case '[':
7f561c08de6b Initial load
duke
parents:
diff changeset
   792
                case '(':
7f561c08de6b Initial load
duke
parents:
diff changeset
   793
                case ')':
7f561c08de6b Initial load
duke
parents:
diff changeset
   794
                case '|':
7f561c08de6b Initial load
duke
parents:
diff changeset
   795
                    break atomLoop;
7f561c08de6b Initial load
duke
parents:
diff changeset
   796
7f561c08de6b Initial load
duke
parents:
diff changeset
   797
                case '{':
7f561c08de6b Initial load
duke
parents:
diff changeset
   798
                case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
   799
                case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
   800
                case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
   801
7f561c08de6b Initial load
duke
parents:
diff changeset
   802
                    // We should have an atom by now
7f561c08de6b Initial load
duke
parents:
diff changeset
   803
                    if (lenAtom == 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   804
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   805
                        // No atom before closure
7f561c08de6b Initial load
duke
parents:
diff changeset
   806
                        syntaxError("Missing operand to closure");
7f561c08de6b Initial load
duke
parents:
diff changeset
   807
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   808
                    break atomLoop;
7f561c08de6b Initial load
duke
parents:
diff changeset
   809
7f561c08de6b Initial load
duke
parents:
diff changeset
   810
                case '\\':
7f561c08de6b Initial load
duke
parents:
diff changeset
   811
7f561c08de6b Initial load
duke
parents:
diff changeset
   812
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   813
                        // Get the escaped character (advances input automatically)
7f561c08de6b Initial load
duke
parents:
diff changeset
   814
                        int idxBeforeEscape = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   815
                        int c = escape();
7f561c08de6b Initial load
duke
parents:
diff changeset
   816
7f561c08de6b Initial load
duke
parents:
diff changeset
   817
                        // Check if it's a simple escape (as opposed to, say, a backreference)
7f561c08de6b Initial load
duke
parents:
diff changeset
   818
                        if ((c & ESC_MASK) == ESC_MASK)
7f561c08de6b Initial load
duke
parents:
diff changeset
   819
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   820
                            // Not a simple escape, so backup to where we were before the escape.
7f561c08de6b Initial load
duke
parents:
diff changeset
   821
                            idx = idxBeforeEscape;
7f561c08de6b Initial load
duke
parents:
diff changeset
   822
                            break atomLoop;
7f561c08de6b Initial load
duke
parents:
diff changeset
   823
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   824
7f561c08de6b Initial load
duke
parents:
diff changeset
   825
                        // Add escaped char to atom
7f561c08de6b Initial load
duke
parents:
diff changeset
   826
                        emit((char) c);
7f561c08de6b Initial load
duke
parents:
diff changeset
   827
                        lenAtom++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   828
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   829
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   830
7f561c08de6b Initial load
duke
parents:
diff changeset
   831
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   832
7f561c08de6b Initial load
duke
parents:
diff changeset
   833
                    // Add normal character to atom
7f561c08de6b Initial load
duke
parents:
diff changeset
   834
                    emit(pattern.charAt(idx++));
7f561c08de6b Initial load
duke
parents:
diff changeset
   835
                    lenAtom++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   836
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   837
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   838
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   839
7f561c08de6b Initial load
duke
parents:
diff changeset
   840
        // This "shouldn't" happen
7f561c08de6b Initial load
duke
parents:
diff changeset
   841
        if (lenAtom == 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   842
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   843
            internalError();
7f561c08de6b Initial load
duke
parents:
diff changeset
   844
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   845
7f561c08de6b Initial load
duke
parents:
diff changeset
   846
        // Emit the atom length into the program
7f561c08de6b Initial load
duke
parents:
diff changeset
   847
        instruction[ret + RE.offsetOpdata] = (char)lenAtom;
7f561c08de6b Initial load
duke
parents:
diff changeset
   848
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
   849
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   850
7f561c08de6b Initial load
duke
parents:
diff changeset
   851
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   852
     * Match a terminal node.
7f561c08de6b Initial load
duke
parents:
diff changeset
   853
     * @param flags Flags
7f561c08de6b Initial load
duke
parents:
diff changeset
   854
     * @return Index of terminal node (closeable)
7f561c08de6b Initial load
duke
parents:
diff changeset
   855
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   856
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   857
    int terminal(int[] flags) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   858
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   859
        switch (pattern.charAt(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
   860
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   861
        case RE.OP_EOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
   862
        case RE.OP_BOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
   863
        case RE.OP_ANY:
7f561c08de6b Initial load
duke
parents:
diff changeset
   864
            return node(pattern.charAt(idx++), 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
   865
7f561c08de6b Initial load
duke
parents:
diff changeset
   866
        case '[':
7f561c08de6b Initial load
duke
parents:
diff changeset
   867
            return characterClass();
7f561c08de6b Initial load
duke
parents:
diff changeset
   868
7f561c08de6b Initial load
duke
parents:
diff changeset
   869
        case '(':
7f561c08de6b Initial load
duke
parents:
diff changeset
   870
            return expr(flags);
7f561c08de6b Initial load
duke
parents:
diff changeset
   871
7f561c08de6b Initial load
duke
parents:
diff changeset
   872
        case ')':
7f561c08de6b Initial load
duke
parents:
diff changeset
   873
            syntaxError("Unexpected close paren");
7f561c08de6b Initial load
duke
parents:
diff changeset
   874
7f561c08de6b Initial load
duke
parents:
diff changeset
   875
        case '|':
7f561c08de6b Initial load
duke
parents:
diff changeset
   876
            internalError();
7f561c08de6b Initial load
duke
parents:
diff changeset
   877
7f561c08de6b Initial load
duke
parents:
diff changeset
   878
        case ']':
7f561c08de6b Initial load
duke
parents:
diff changeset
   879
            syntaxError("Mismatched class");
7f561c08de6b Initial load
duke
parents:
diff changeset
   880
7f561c08de6b Initial load
duke
parents:
diff changeset
   881
        case 0:
7f561c08de6b Initial load
duke
parents:
diff changeset
   882
            syntaxError("Unexpected end of input");
7f561c08de6b Initial load
duke
parents:
diff changeset
   883
7f561c08de6b Initial load
duke
parents:
diff changeset
   884
        case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
   885
        case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
   886
        case '{':
7f561c08de6b Initial load
duke
parents:
diff changeset
   887
        case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
   888
            syntaxError("Missing operand to closure");
7f561c08de6b Initial load
duke
parents:
diff changeset
   889
7f561c08de6b Initial load
duke
parents:
diff changeset
   890
        case '\\':
7f561c08de6b Initial load
duke
parents:
diff changeset
   891
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   892
                // Don't forget, escape() advances the input stream!
7f561c08de6b Initial load
duke
parents:
diff changeset
   893
                int idxBeforeEscape = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   894
7f561c08de6b Initial load
duke
parents:
diff changeset
   895
                // Switch on escaped character
7f561c08de6b Initial load
duke
parents:
diff changeset
   896
                switch (escape())
7f561c08de6b Initial load
duke
parents:
diff changeset
   897
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   898
                    case ESC_CLASS:
7f561c08de6b Initial load
duke
parents:
diff changeset
   899
                    case ESC_COMPLEX:
7f561c08de6b Initial load
duke
parents:
diff changeset
   900
                        flags[0] &= ~NODE_NULLABLE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   901
                        return node(RE.OP_ESCAPE, pattern.charAt(idx - 1));
7f561c08de6b Initial load
duke
parents:
diff changeset
   902
7f561c08de6b Initial load
duke
parents:
diff changeset
   903
                    case ESC_BACKREF:
7f561c08de6b Initial load
duke
parents:
diff changeset
   904
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   905
                            char backreference = (char)(pattern.charAt(idx - 1) - '0');
7f561c08de6b Initial load
duke
parents:
diff changeset
   906
                            if (parens <= backreference)
7f561c08de6b Initial load
duke
parents:
diff changeset
   907
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   908
                                syntaxError("Bad backreference");
7f561c08de6b Initial load
duke
parents:
diff changeset
   909
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   910
                            flags[0] |= NODE_NULLABLE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   911
                            return node(RE.OP_BACKREF, backreference);
7f561c08de6b Initial load
duke
parents:
diff changeset
   912
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   913
7f561c08de6b Initial load
duke
parents:
diff changeset
   914
                    default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   915
7f561c08de6b Initial load
duke
parents:
diff changeset
   916
                        // We had a simple escape and we want to have it end up in
7f561c08de6b Initial load
duke
parents:
diff changeset
   917
                        // an atom, so we back up and fall though to the default handling
7f561c08de6b Initial load
duke
parents:
diff changeset
   918
                        idx = idxBeforeEscape;
7f561c08de6b Initial load
duke
parents:
diff changeset
   919
                        flags[0] &= ~NODE_NULLABLE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   920
                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   921
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   922
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   923
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   924
7f561c08de6b Initial load
duke
parents:
diff changeset
   925
        // Everything above either fails or returns.
7f561c08de6b Initial load
duke
parents:
diff changeset
   926
        // If it wasn't one of the above, it must be the start of an atom.
7f561c08de6b Initial load
duke
parents:
diff changeset
   927
        flags[0] &= ~NODE_NULLABLE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   928
        return atom();
7f561c08de6b Initial load
duke
parents:
diff changeset
   929
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   930
7f561c08de6b Initial load
duke
parents:
diff changeset
   931
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   932
     * Compile a possibly closured terminal
7f561c08de6b Initial load
duke
parents:
diff changeset
   933
     * @param flags Flags passed by reference
7f561c08de6b Initial load
duke
parents:
diff changeset
   934
     * @return Index of closured node
7f561c08de6b Initial load
duke
parents:
diff changeset
   935
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   936
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   937
    int closure(int[] flags) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   938
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   939
        // Before terminal
7f561c08de6b Initial load
duke
parents:
diff changeset
   940
        int idxBeforeTerminal = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   941
7f561c08de6b Initial load
duke
parents:
diff changeset
   942
        // Values to pass by reference to terminal()
7f561c08de6b Initial load
duke
parents:
diff changeset
   943
        int[] terminalFlags = { NODE_NORMAL };
7f561c08de6b Initial load
duke
parents:
diff changeset
   944
7f561c08de6b Initial load
duke
parents:
diff changeset
   945
        // Get terminal symbol
7f561c08de6b Initial load
duke
parents:
diff changeset
   946
        int ret = terminal(terminalFlags);
7f561c08de6b Initial load
duke
parents:
diff changeset
   947
7f561c08de6b Initial load
duke
parents:
diff changeset
   948
        // Or in flags from terminal symbol
7f561c08de6b Initial load
duke
parents:
diff changeset
   949
        flags[0] |= terminalFlags[0];
7f561c08de6b Initial load
duke
parents:
diff changeset
   950
7f561c08de6b Initial load
duke
parents:
diff changeset
   951
        // Advance input, set NODE_NULLABLE flag and do sanity checks
7f561c08de6b Initial load
duke
parents:
diff changeset
   952
        if (idx >= len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   953
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   954
            return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
   955
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   956
        boolean greedy = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
   957
        char closureType = pattern.charAt(idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
   958
        switch (closureType)
7f561c08de6b Initial load
duke
parents:
diff changeset
   959
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   960
            case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
   961
            case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
   962
7f561c08de6b Initial load
duke
parents:
diff changeset
   963
                // The current node can be null
7f561c08de6b Initial load
duke
parents:
diff changeset
   964
                flags[0] |= NODE_NULLABLE;
7f561c08de6b Initial load
duke
parents:
diff changeset
   965
7f561c08de6b Initial load
duke
parents:
diff changeset
   966
            case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
   967
7f561c08de6b Initial load
duke
parents:
diff changeset
   968
                // Eat closure character
7f561c08de6b Initial load
duke
parents:
diff changeset
   969
                idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   970
7f561c08de6b Initial load
duke
parents:
diff changeset
   971
            case '{':
7f561c08de6b Initial load
duke
parents:
diff changeset
   972
7f561c08de6b Initial load
duke
parents:
diff changeset
   973
                // Don't allow blantant stupidity
7f561c08de6b Initial load
duke
parents:
diff changeset
   974
                int opcode = instruction[ret + RE.offsetOpcode];
7f561c08de6b Initial load
duke
parents:
diff changeset
   975
                if (opcode == RE.OP_BOL || opcode == RE.OP_EOL)
7f561c08de6b Initial load
duke
parents:
diff changeset
   976
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   977
                    syntaxError("Bad closure operand");
7f561c08de6b Initial load
duke
parents:
diff changeset
   978
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   979
                if ((terminalFlags[0] & NODE_NULLABLE) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   980
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   981
                    syntaxError("Closure operand can't be nullable");
7f561c08de6b Initial load
duke
parents:
diff changeset
   982
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   983
                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   984
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   985
7f561c08de6b Initial load
duke
parents:
diff changeset
   986
        // If the next character is a '?', make the closure non-greedy (reluctant)
7f561c08de6b Initial load
duke
parents:
diff changeset
   987
        if (idx < len && pattern.charAt(idx) == '?')
7f561c08de6b Initial load
duke
parents:
diff changeset
   988
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   989
            idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   990
            greedy = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
   991
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   992
7f561c08de6b Initial load
duke
parents:
diff changeset
   993
        if (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   994
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   995
            // Actually do the closure now
7f561c08de6b Initial load
duke
parents:
diff changeset
   996
            switch (closureType)
7f561c08de6b Initial load
duke
parents:
diff changeset
   997
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   998
                case '{':
7f561c08de6b Initial load
duke
parents:
diff changeset
   999
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1000
                    // We look for our bracket in the list
7f561c08de6b Initial load
duke
parents:
diff changeset
  1001
                    boolean found = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1002
                    int i;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1003
                    allocBrackets();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1004
                    for (i = 0; i < brackets; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1005
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1006
                        if (bracketStart[i] == idx)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1007
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1008
                            found = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1009
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1010
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1011
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1012
7f561c08de6b Initial load
duke
parents:
diff changeset
  1013
                    // If its not in the list we parse the {m,n}
7f561c08de6b Initial load
duke
parents:
diff changeset
  1014
                    if (!found)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1015
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1016
                        if (brackets >= maxBrackets)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1017
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1018
                            reallocBrackets();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1019
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1020
                        bracketStart[brackets] = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1021
                        bracket();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1022
                        bracketEnd[brackets] = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1023
                        i = brackets++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1024
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1025
7f561c08de6b Initial load
duke
parents:
diff changeset
  1026
                    // Process min first
7f561c08de6b Initial load
duke
parents:
diff changeset
  1027
                    if (bracketMin[i]-- > 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1028
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1029
                        if (bracketMin[i] > 0 || bracketOpt[i] != 0) {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1030
                            // Rewind stream and run it through again - more matchers coming
7f561c08de6b Initial load
duke
parents:
diff changeset
  1031
                            for (int j = 0; j < brackets; j++) {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1032
                                if (j != i && bracketStart[j] < idx
7f561c08de6b Initial load
duke
parents:
diff changeset
  1033
                                    && bracketStart[j] >= idxBeforeTerminal)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1034
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1035
                                    brackets--;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1036
                                    bracketStart[j] = bracketStart[brackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1037
                                    bracketEnd[j] = bracketEnd[brackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1038
                                    bracketMin[j] = bracketMin[brackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1039
                                    bracketOpt[j] = bracketOpt[brackets];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1040
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1041
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1042
7f561c08de6b Initial load
duke
parents:
diff changeset
  1043
                            idx = idxBeforeTerminal;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1044
                        } else {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1045
                            // Bug #1030: No optinal matches - no need to rewind
7f561c08de6b Initial load
duke
parents:
diff changeset
  1046
                            idx = bracketEnd[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1047
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1048
                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1049
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1050
7f561c08de6b Initial load
duke
parents:
diff changeset
  1051
                    // Do the right thing for maximum ({m,})
7f561c08de6b Initial load
duke
parents:
diff changeset
  1052
                    if (bracketOpt[i] == bracketUnbounded)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1053
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1054
                        // Drop through now and closure expression.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1055
                        // We are done with the {m,} expr, so skip rest
7f561c08de6b Initial load
duke
parents:
diff changeset
  1056
                        closureType = '*';
7f561c08de6b Initial load
duke
parents:
diff changeset
  1057
                        bracketOpt[i] = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1058
                        idx = bracketEnd[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1059
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1060
                    else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1061
                        if (bracketOpt[i]-- > 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1062
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1063
                            if (bracketOpt[i] > 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1064
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1065
                                // More optional matchers - 'play it again sam!'
7f561c08de6b Initial load
duke
parents:
diff changeset
  1066
                                idx = idxBeforeTerminal;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1067
                            } else {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1068
                                // Bug #1030: We are done - this one is last and optional
7f561c08de6b Initial load
duke
parents:
diff changeset
  1069
                                idx = bracketEnd[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1070
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1071
                            // Drop through to optionally close
7f561c08de6b Initial load
duke
parents:
diff changeset
  1072
                            closureType = '?';
7f561c08de6b Initial load
duke
parents:
diff changeset
  1073
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1074
                        else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1075
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1076
                            // Rollback terminal - neither min nor opt matchers present
7f561c08de6b Initial load
duke
parents:
diff changeset
  1077
                            lenInstruction = ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1078
                            node(RE.OP_NOTHING, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1079
7f561c08de6b Initial load
duke
parents:
diff changeset
  1080
                            // We are done. skip the rest of {m,n} expr
7f561c08de6b Initial load
duke
parents:
diff changeset
  1081
                            idx = bracketEnd[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1082
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1083
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1084
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1085
7f561c08de6b Initial load
duke
parents:
diff changeset
  1086
                // Fall through!
7f561c08de6b Initial load
duke
parents:
diff changeset
  1087
7f561c08de6b Initial load
duke
parents:
diff changeset
  1088
                case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
  1089
                case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
  1090
7f561c08de6b Initial load
duke
parents:
diff changeset
  1091
                    if (!greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1092
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1093
                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1094
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1095
7f561c08de6b Initial load
duke
parents:
diff changeset
  1096
                    if (closureType == '?')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1097
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1098
                        // X? is compiled as (X|)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1099
                        nodeInsert(RE.OP_BRANCH, 0, ret);                 // branch before X
7f561c08de6b Initial load
duke
parents:
diff changeset
  1100
                        setNextOfEnd(ret, node (RE.OP_BRANCH, 0));        // inserted branch to option
7f561c08de6b Initial load
duke
parents:
diff changeset
  1101
                        int nothing = node (RE.OP_NOTHING, 0);            // which is OP_NOTHING
7f561c08de6b Initial load
duke
parents:
diff changeset
  1102
                        setNextOfEnd(ret, nothing);                       // point (second) branch to OP_NOTHING
7f561c08de6b Initial load
duke
parents:
diff changeset
  1103
                        setNextOfEnd(ret + RE.nodeSize, nothing);         // point the end of X to OP_NOTHING node
7f561c08de6b Initial load
duke
parents:
diff changeset
  1104
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1105
7f561c08de6b Initial load
duke
parents:
diff changeset
  1106
                    if (closureType == '*')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1107
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1108
                        // X* is compiled as (X{gotoX}|)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1109
                        nodeInsert(RE.OP_BRANCH, 0, ret);                         // branch before X
7f561c08de6b Initial load
duke
parents:
diff changeset
  1110
                        setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0));   // end of X points to an option
7f561c08de6b Initial load
duke
parents:
diff changeset
  1111
                        setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0));     // to goto
7f561c08de6b Initial load
duke
parents:
diff changeset
  1112
                        setNextOfEnd(ret + RE.nodeSize, ret);                     // the start again
7f561c08de6b Initial load
duke
parents:
diff changeset
  1113
                        setNextOfEnd(ret, node(RE.OP_BRANCH, 0));                 // the other option is
7f561c08de6b Initial load
duke
parents:
diff changeset
  1114
                        setNextOfEnd(ret, node(RE.OP_NOTHING, 0));                // OP_NOTHING
7f561c08de6b Initial load
duke
parents:
diff changeset
  1115
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1116
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1117
7f561c08de6b Initial load
duke
parents:
diff changeset
  1118
                case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
  1119
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1120
                    // X+ is compiled as X({gotoX}|)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1121
                    int branch;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1122
                    branch = node(RE.OP_BRANCH, 0);                   // a new branch
7f561c08de6b Initial load
duke
parents:
diff changeset
  1123
                    setNextOfEnd(ret, branch);                        // is added to the end of X
7f561c08de6b Initial load
duke
parents:
diff changeset
  1124
                    setNextOfEnd(node(RE.OP_GOTO, 0), ret);           // one option is to go back to the start
7f561c08de6b Initial load
duke
parents:
diff changeset
  1125
                    setNextOfEnd(branch, node(RE.OP_BRANCH, 0));      // the other option
7f561c08de6b Initial load
duke
parents:
diff changeset
  1126
                    setNextOfEnd(ret, node(RE.OP_NOTHING, 0));        // is OP_NOTHING
7f561c08de6b Initial load
duke
parents:
diff changeset
  1127
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1128
                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1129
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1130
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1131
        else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1132
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1133
            // Add end after closured subexpr
7f561c08de6b Initial load
duke
parents:
diff changeset
  1134
            setNextOfEnd(ret, node(RE.OP_END, 0));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1135
7f561c08de6b Initial load
duke
parents:
diff changeset
  1136
            // Actually do the closure now
7f561c08de6b Initial load
duke
parents:
diff changeset
  1137
            switch (closureType)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1138
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1139
                case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
  1140
                    nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1141
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1142
7f561c08de6b Initial load
duke
parents:
diff changeset
  1143
                case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
  1144
                    nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1145
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1146
7f561c08de6b Initial load
duke
parents:
diff changeset
  1147
                case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
  1148
                    nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1149
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1150
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1151
7f561c08de6b Initial load
duke
parents:
diff changeset
  1152
            // Point to the expr after the closure
7f561c08de6b Initial load
duke
parents:
diff changeset
  1153
            setNextOfEnd(ret, lenInstruction);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1154
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1155
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1156
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1157
7f561c08de6b Initial load
duke
parents:
diff changeset
  1158
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1159
     * Compile one branch of an or operator (implements concatenation)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1160
     * @param flags Flags passed by reference
7f561c08de6b Initial load
duke
parents:
diff changeset
  1161
     * @return Pointer to branch node
7f561c08de6b Initial load
duke
parents:
diff changeset
  1162
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1163
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1164
    int branch(int[] flags) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
  1165
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1166
        // Get each possibly closured piece and concat
7f561c08de6b Initial load
duke
parents:
diff changeset
  1167
        int node;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1168
        int ret = node(RE.OP_BRANCH, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1169
        int chain = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1170
        int[] closureFlags = new int[1];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1171
        boolean nullable = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1172
        while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1173
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1174
            // Get new node
7f561c08de6b Initial load
duke
parents:
diff changeset
  1175
            closureFlags[0] = NODE_NORMAL;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1176
            node = closure(closureFlags);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1177
            if (closureFlags[0] == NODE_NORMAL)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1178
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1179
                nullable = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1180
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1181
7f561c08de6b Initial load
duke
parents:
diff changeset
  1182
            // If there's a chain, append to the end
7f561c08de6b Initial load
duke
parents:
diff changeset
  1183
            if (chain != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1184
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1185
                setNextOfEnd(chain, node);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1186
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1187
7f561c08de6b Initial load
duke
parents:
diff changeset
  1188
            // Chain starts at current
7f561c08de6b Initial load
duke
parents:
diff changeset
  1189
            chain = node;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1190
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1191
7f561c08de6b Initial load
duke
parents:
diff changeset
  1192
        // If we don't run loop, make a nothing node
7f561c08de6b Initial load
duke
parents:
diff changeset
  1193
        if (chain == -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1194
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1195
            node(RE.OP_NOTHING, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1196
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1197
7f561c08de6b Initial load
duke
parents:
diff changeset
  1198
        // Set nullable flag for this branch
7f561c08de6b Initial load
duke
parents:
diff changeset
  1199
        if (nullable)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1200
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1201
            flags[0] |= NODE_NULLABLE;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1202
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1203
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1204
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1205
7f561c08de6b Initial load
duke
parents:
diff changeset
  1206
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1207
     * Compile an expression with possible parens around it.  Paren matching
7f561c08de6b Initial load
duke
parents:
diff changeset
  1208
     * is done at this level so we can tie the branch tails together.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1209
     * @param flags Flag value passed by reference
7f561c08de6b Initial load
duke
parents:
diff changeset
  1210
     * @return Node index of expression in instruction array
7f561c08de6b Initial load
duke
parents:
diff changeset
  1211
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1212
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1213
    int expr(int[] flags) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
  1214
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1215
        // Create open paren node unless we were called from the top level (which has no parens)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1216
        int paren = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1217
        int ret = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1218
        int closeParens = parens;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1219
        if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1220
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1221
            // if its a cluster ( rather than a proper subexpression ie with backrefs )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1222
            if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1223
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1224
                paren = 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1225
                idx += 3;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1226
                ret = node( RE.OP_OPEN_CLUSTER, 0 );
7f561c08de6b Initial load
duke
parents:
diff changeset
  1227
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1228
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1229
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1230
                paren = 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1231
                idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1232
                ret = node(RE.OP_OPEN, parens++);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1233
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1234
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1235
        flags[0] &= ~NODE_TOPLEVEL;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1236
7f561c08de6b Initial load
duke
parents:
diff changeset
  1237
        // Create a branch node
7f561c08de6b Initial load
duke
parents:
diff changeset
  1238
        int branch = branch(flags);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1239
        if (ret == -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1240
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1241
            ret = branch;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1242
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1243
        else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1244
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1245
            setNextOfEnd(ret, branch);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1246
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1247
7f561c08de6b Initial load
duke
parents:
diff changeset
  1248
        // Loop through branches
7f561c08de6b Initial load
duke
parents:
diff changeset
  1249
        while (idx < len && pattern.charAt(idx) == '|')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1250
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1251
            idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1252
            branch = branch(flags);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1253
            setNextOfEnd(ret, branch);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1254
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1255
7f561c08de6b Initial load
duke
parents:
diff changeset
  1256
        // Create an ending node (either a close paren or an OP_END)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1257
        int end;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1258
        if ( paren > 0 )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1259
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1260
            if (idx < len && pattern.charAt(idx) == ')')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1261
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1262
                idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1263
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1264
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1265
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1266
                syntaxError("Missing close paren");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1267
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1268
            if ( paren == 1 )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1269
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1270
                end = node(RE.OP_CLOSE, closeParens);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1271
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1272
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1273
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1274
                end = node( RE.OP_CLOSE_CLUSTER, 0 );
7f561c08de6b Initial load
duke
parents:
diff changeset
  1275
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1276
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1277
        else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1278
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1279
            end = node(RE.OP_END, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1280
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1281
7f561c08de6b Initial load
duke
parents:
diff changeset
  1282
        // Append the ending node to the ret nodelist
7f561c08de6b Initial load
duke
parents:
diff changeset
  1283
        setNextOfEnd(ret, end);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1284
7f561c08de6b Initial load
duke
parents:
diff changeset
  1285
        // Hook the ends of each branch to the end node
7f561c08de6b Initial load
duke
parents:
diff changeset
  1286
        int currentNode = ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1287
        int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1288
        // while the next node o
7f561c08de6b Initial load
duke
parents:
diff changeset
  1289
        while ( nextNodeOffset != 0 && currentNode < lenInstruction )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1290
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1291
            // If branch, make the end of the branch's operand chain point to the end node.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1292
            if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1293
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1294
                setNextOfEnd( currentNode + RE.nodeSize, end );
7f561c08de6b Initial load
duke
parents:
diff changeset
  1295
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1296
            nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1297
            currentNode += nextNodeOffset;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1298
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1299
7f561c08de6b Initial load
duke
parents:
diff changeset
  1300
        // Return the node list
7f561c08de6b Initial load
duke
parents:
diff changeset
  1301
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1302
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1303
7f561c08de6b Initial load
duke
parents:
diff changeset
  1304
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1305
     * Compiles a regular expression pattern into a program runnable by the pattern
7f561c08de6b Initial load
duke
parents:
diff changeset
  1306
     * matcher class 'RE'.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1307
     * @param pattern Regular expression pattern to compile (see RECompiler class
7f561c08de6b Initial load
duke
parents:
diff changeset
  1308
     * for details).
7f561c08de6b Initial load
duke
parents:
diff changeset
  1309
     * @return A compiled regular expression program.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1310
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1311
     * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
  1312
     * @see RE
7f561c08de6b Initial load
duke
parents:
diff changeset
  1313
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1314
    public REProgram compile(String pattern) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
  1315
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1316
        // Initialize variables for compilation
7f561c08de6b Initial load
duke
parents:
diff changeset
  1317
        this.pattern = pattern;                         // Save pattern in instance variable
7f561c08de6b Initial load
duke
parents:
diff changeset
  1318
        len = pattern.length();                         // Precompute pattern length for speed
7f561c08de6b Initial load
duke
parents:
diff changeset
  1319
        idx = 0;                                        // Set parsing index to the first character
7f561c08de6b Initial load
duke
parents:
diff changeset
  1320
        lenInstruction = 0;                             // Set emitted instruction count to zero
7f561c08de6b Initial load
duke
parents:
diff changeset
  1321
        parens = 1;                                     // Set paren level to 1 (the implicit outer parens)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1322
        brackets = 0;                                   // No bracketed closures yet
7f561c08de6b Initial load
duke
parents:
diff changeset
  1323
7f561c08de6b Initial load
duke
parents:
diff changeset
  1324
        // Initialize pass by reference flags value
7f561c08de6b Initial load
duke
parents:
diff changeset
  1325
        int[] flags = { NODE_TOPLEVEL };
7f561c08de6b Initial load
duke
parents:
diff changeset
  1326
7f561c08de6b Initial load
duke
parents:
diff changeset
  1327
        // Parse expression
7f561c08de6b Initial load
duke
parents:
diff changeset
  1328
        expr(flags);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1329
7f561c08de6b Initial load
duke
parents:
diff changeset
  1330
        // Should be at end of input
7f561c08de6b Initial load
duke
parents:
diff changeset
  1331
        if (idx != len)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1332
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1333
            if (pattern.charAt(idx) == ')')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1334
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1335
                syntaxError("Unmatched close paren");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1336
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1337
            syntaxError("Unexpected input remains");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1338
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1339
7f561c08de6b Initial load
duke
parents:
diff changeset
  1340
        // Return the result
7f561c08de6b Initial load
duke
parents:
diff changeset
  1341
        char[] ins = new char[lenInstruction];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1342
        System.arraycopy(instruction, 0, ins, 0, lenInstruction);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1343
        return new REProgram(parens, ins);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1344
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1345
7f561c08de6b Initial load
duke
parents:
diff changeset
  1346
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1347
     * Local, nested class for maintaining character ranges for character classes.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1348
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1349
    class RERange
7f561c08de6b Initial load
duke
parents:
diff changeset
  1350
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1351
        int size = 16;                      // Capacity of current range arrays
7f561c08de6b Initial load
duke
parents:
diff changeset
  1352
        int[] minRange = new int[size];     // Range minima
7f561c08de6b Initial load
duke
parents:
diff changeset
  1353
        int[] maxRange = new int[size];     // Range maxima
7f561c08de6b Initial load
duke
parents:
diff changeset
  1354
        int num = 0;                        // Number of range array elements in use
7f561c08de6b Initial load
duke
parents:
diff changeset
  1355
7f561c08de6b Initial load
duke
parents:
diff changeset
  1356
        /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1357
         * Deletes the range at a given index from the range lists
7f561c08de6b Initial load
duke
parents:
diff changeset
  1358
         * @param index Index of range to delete from minRange and maxRange arrays.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1359
         */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1360
        void delete(int index)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1361
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1362
            // Return if no elements left or index is out of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1363
            if (num == 0 || index >= num)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1364
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1365
                return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1366
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1367
7f561c08de6b Initial load
duke
parents:
diff changeset
  1368
            // Move elements down
7f561c08de6b Initial load
duke
parents:
diff changeset
  1369
            while (++index < num)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1370
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1371
                if (index - 1 >= 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1372
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1373
                    minRange[index-1] = minRange[index];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1374
                    maxRange[index-1] = maxRange[index];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1375
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1376
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1377
7f561c08de6b Initial load
duke
parents:
diff changeset
  1378
            // One less element now
7f561c08de6b Initial load
duke
parents:
diff changeset
  1379
            num--;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1380
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1381
7f561c08de6b Initial load
duke
parents:
diff changeset
  1382
        /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1383
         * Merges a range into the range list, coalescing ranges if possible.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1384
         * @param min Minimum end of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1385
         * @param max Maximum end of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1386
         */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1387
        void merge(int min, int max)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1388
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1389
            // Loop through ranges
7f561c08de6b Initial load
duke
parents:
diff changeset
  1390
            for (int i = 0; i < num; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1391
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1392
                // Min-max is subsumed by minRange[i]-maxRange[i]
7f561c08de6b Initial load
duke
parents:
diff changeset
  1393
                if (min >= minRange[i] && max <= maxRange[i])
7f561c08de6b Initial load
duke
parents:
diff changeset
  1394
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1395
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1396
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1397
7f561c08de6b Initial load
duke
parents:
diff changeset
  1398
                // Min-max subsumes minRange[i]-maxRange[i]
7f561c08de6b Initial load
duke
parents:
diff changeset
  1399
                else if (min <= minRange[i] && max >= maxRange[i])
7f561c08de6b Initial load
duke
parents:
diff changeset
  1400
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1401
                    delete(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1402
                    merge(min, max);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1403
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1404
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1405
7f561c08de6b Initial load
duke
parents:
diff changeset
  1406
                // Min is in the range, but max is outside
7f561c08de6b Initial load
duke
parents:
diff changeset
  1407
                else if (min >= minRange[i] && min <= maxRange[i])
7f561c08de6b Initial load
duke
parents:
diff changeset
  1408
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1409
                    delete(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1410
                    min = minRange[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1411
                    merge(min, max);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1412
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1413
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1414
7f561c08de6b Initial load
duke
parents:
diff changeset
  1415
                // Max is in the range, but min is outside
7f561c08de6b Initial load
duke
parents:
diff changeset
  1416
                else if (max >= minRange[i] && max <= maxRange[i])
7f561c08de6b Initial load
duke
parents:
diff changeset
  1417
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1418
                    delete(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1419
                    max = maxRange[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1420
                    merge(min, max);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1421
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1422
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1423
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1424
7f561c08de6b Initial load
duke
parents:
diff changeset
  1425
            // Must not overlap any other ranges
7f561c08de6b Initial load
duke
parents:
diff changeset
  1426
            if (num >= size)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1427
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1428
                size *= 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1429
                int[] newMin = new int[size];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1430
                int[] newMax = new int[size];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1431
                System.arraycopy(minRange, 0, newMin, 0, num);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1432
                System.arraycopy(maxRange, 0, newMax, 0, num);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1433
                minRange = newMin;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1434
                maxRange = newMax;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1435
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1436
            minRange[num] = min;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1437
            maxRange[num] = max;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1438
            num++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1439
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1440
7f561c08de6b Initial load
duke
parents:
diff changeset
  1441
        /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1442
         * Removes a range by deleting or shrinking all other ranges
7f561c08de6b Initial load
duke
parents:
diff changeset
  1443
         * @param min Minimum end of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1444
         * @param max Maximum end of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1445
         */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1446
        void remove(int min, int max)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1447
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1448
            // Loop through ranges
7f561c08de6b Initial load
duke
parents:
diff changeset
  1449
            for (int i = 0; i < num; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1450
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1451
                // minRange[i]-maxRange[i] is subsumed by min-max
7f561c08de6b Initial load
duke
parents:
diff changeset
  1452
                if (minRange[i] >= min && maxRange[i] <= max)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1453
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1454
                    delete(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1455
                    i--;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1456
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1457
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1458
7f561c08de6b Initial load
duke
parents:
diff changeset
  1459
                // min-max is subsumed by minRange[i]-maxRange[i]
7f561c08de6b Initial load
duke
parents:
diff changeset
  1460
                else if (min >= minRange[i] && max <= maxRange[i])
7f561c08de6b Initial load
duke
parents:
diff changeset
  1461
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1462
                    int minr = minRange[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1463
                    int maxr = maxRange[i];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1464
                    delete(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1465
                    if (minr < min)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1466
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1467
                        merge(minr, min - 1);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1468
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1469
                    if (max < maxr)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1470
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1471
                        merge(max + 1, maxr);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1472
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1473
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1474
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1475
7f561c08de6b Initial load
duke
parents:
diff changeset
  1476
                // minRange is in the range, but maxRange is outside
7f561c08de6b Initial load
duke
parents:
diff changeset
  1477
                else if (minRange[i] >= min && minRange[i] <= max)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1478
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1479
                    minRange[i] = max + 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1480
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1481
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1482
7f561c08de6b Initial load
duke
parents:
diff changeset
  1483
                // maxRange is in the range, but minRange is outside
7f561c08de6b Initial load
duke
parents:
diff changeset
  1484
                else if (maxRange[i] >= min && maxRange[i] <= max)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1485
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1486
                    maxRange[i] = min - 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1487
                    return;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1488
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1489
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1490
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1491
7f561c08de6b Initial load
duke
parents:
diff changeset
  1492
        /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1493
         * Includes (or excludes) the range from min to max, inclusive.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1494
         * @param min Minimum end of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1495
         * @param max Maximum end of range
7f561c08de6b Initial load
duke
parents:
diff changeset
  1496
         * @param include True if range should be included.  False otherwise.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1497
         */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1498
        void include(int min, int max, boolean include)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1499
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1500
            if (include)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1501
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1502
                merge(min, max);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1503
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1504
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1505
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1506
                remove(min, max);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1507
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1508
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1509
7f561c08de6b Initial load
duke
parents:
diff changeset
  1510
        /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1511
         * Includes a range with the same min and max
7f561c08de6b Initial load
duke
parents:
diff changeset
  1512
         * @param minmax Minimum and maximum end of range (inclusive)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1513
         * @param include True if range should be included.  False otherwise.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1514
         */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1515
        void include(char minmax, boolean include)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1516
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1517
            include(minmax, minmax, include);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1518
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1519
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1520
}