jaxp/src/com/sun/org/apache/regexp/internal/REProgram.java
author ehelin
Mon, 31 Mar 2014 14:02:40 +0200
changeset 23544 e6362a5ba011
parent 12457 c348e06f0e82
permissions -rw-r--r--
8033251: Use DWARF debug symbols for Linux 32-bit as default Reviewed-by: dcubed, dholmes, coleenp
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6
7f561c08de6b Initial load
duke
parents:
diff changeset
     1
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     2
 * reserved comment block
7f561c08de6b Initial load
duke
parents:
diff changeset
     3
 * DO NOT REMOVE OR ALTER!
7f561c08de6b Initial load
duke
parents:
diff changeset
     4
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
     5
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     6
 * Copyright 1999-2004 The Apache Software Foundation.
7f561c08de6b Initial load
duke
parents:
diff changeset
     7
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
     8
 * Licensed under the Apache License, Version 2.0 (the "License");
7f561c08de6b Initial load
duke
parents:
diff changeset
     9
 * you may not use this file except in compliance with the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    10
 * You may obtain a copy of the License at
7f561c08de6b Initial load
duke
parents:
diff changeset
    11
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    12
 *     http://www.apache.org/licenses/LICENSE-2.0
7f561c08de6b Initial load
duke
parents:
diff changeset
    13
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    14
 * Unless required by applicable law or agreed to in writing, software
7f561c08de6b Initial load
duke
parents:
diff changeset
    15
 * distributed under the License is distributed on an "AS IS" BASIS,
7f561c08de6b Initial load
duke
parents:
diff changeset
    16
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
7f561c08de6b Initial load
duke
parents:
diff changeset
    17
 * See the License for the specific language governing permissions and
7f561c08de6b Initial load
duke
parents:
diff changeset
    18
 * limitations under the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    19
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    20
7f561c08de6b Initial load
duke
parents:
diff changeset
    21
package com.sun.org.apache.regexp.internal;
7f561c08de6b Initial load
duke
parents:
diff changeset
    22
7f561c08de6b Initial load
duke
parents:
diff changeset
    23
import java.io.Serializable;
7f561c08de6b Initial load
duke
parents:
diff changeset
    24
7f561c08de6b Initial load
duke
parents:
diff changeset
    25
/**
7f561c08de6b Initial load
duke
parents:
diff changeset
    26
 * A class that holds compiled regular expressions.  This is exposed mainly
7f561c08de6b Initial load
duke
parents:
diff changeset
    27
 * for use by the recompile utility (which helps you produce precompiled
7f561c08de6b Initial load
duke
parents:
diff changeset
    28
 * REProgram objects). You should not otherwise need to work directly with
7f561c08de6b Initial load
duke
parents:
diff changeset
    29
 * this class.
7f561c08de6b Initial load
duke
parents:
diff changeset
    30
*
7f561c08de6b Initial load
duke
parents:
diff changeset
    31
 * @see RE
7f561c08de6b Initial load
duke
parents:
diff changeset
    32
 * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
    33
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    34
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
7f561c08de6b Initial load
duke
parents:
diff changeset
    35
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    36
public class REProgram implements Serializable
7f561c08de6b Initial load
duke
parents:
diff changeset
    37
{
7f561c08de6b Initial load
duke
parents:
diff changeset
    38
    static final int OPT_HASBACKREFS = 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
    39
7f561c08de6b Initial load
duke
parents:
diff changeset
    40
    char[] instruction;         // The compiled regular expression 'program'
7f561c08de6b Initial load
duke
parents:
diff changeset
    41
    int lenInstruction;         // The amount of the instruction buffer in use
7f561c08de6b Initial load
duke
parents:
diff changeset
    42
    char[] prefix;              // Prefix string optimization
7f561c08de6b Initial load
duke
parents:
diff changeset
    43
    int flags;                  // Optimization flags (REProgram.OPT_*)
7f561c08de6b Initial load
duke
parents:
diff changeset
    44
    int maxParens = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
    45
7f561c08de6b Initial load
duke
parents:
diff changeset
    46
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    47
     * Constructs a program object from a character array
7f561c08de6b Initial load
duke
parents:
diff changeset
    48
     * @param instruction Character array with RE opcode instructions in it
7f561c08de6b Initial load
duke
parents:
diff changeset
    49
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
    50
    public REProgram(char[] instruction)
7f561c08de6b Initial load
duke
parents:
diff changeset
    51
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    52
        this(instruction, instruction.length);
7f561c08de6b Initial load
duke
parents:
diff changeset
    53
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    54
7f561c08de6b Initial load
duke
parents:
diff changeset
    55
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    56
     * Constructs a program object from a character array
7f561c08de6b Initial load
duke
parents:
diff changeset
    57
     * @param parens Count of parens in the program
7f561c08de6b Initial load
duke
parents:
diff changeset
    58
     * @param instruction Character array with RE opcode instructions in it
7f561c08de6b Initial load
duke
parents:
diff changeset
    59
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
    60
    public REProgram(int parens, char[] instruction)
7f561c08de6b Initial load
duke
parents:
diff changeset
    61
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    62
        this(instruction, instruction.length);
7f561c08de6b Initial load
duke
parents:
diff changeset
    63
        this.maxParens = parens;
7f561c08de6b Initial load
duke
parents:
diff changeset
    64
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    65
7f561c08de6b Initial load
duke
parents:
diff changeset
    66
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    67
     * Constructs a program object from a character array
7f561c08de6b Initial load
duke
parents:
diff changeset
    68
     * @param instruction Character array with RE opcode instructions in it
7f561c08de6b Initial load
duke
parents:
diff changeset
    69
     * @param lenInstruction Amount of instruction array in use
7f561c08de6b Initial load
duke
parents:
diff changeset
    70
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
    71
    public REProgram(char[] instruction, int lenInstruction)
7f561c08de6b Initial load
duke
parents:
diff changeset
    72
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    73
        setInstructions(instruction, lenInstruction);
7f561c08de6b Initial load
duke
parents:
diff changeset
    74
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    75
7f561c08de6b Initial load
duke
parents:
diff changeset
    76
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    77
     * Returns a copy of the current regular expression program in a character
7f561c08de6b Initial load
duke
parents:
diff changeset
    78
     * array that is exactly the right length to hold the program.  If there is
7f561c08de6b Initial load
duke
parents:
diff changeset
    79
     * no program compiled yet, getInstructions() will return null.
7f561c08de6b Initial load
duke
parents:
diff changeset
    80
     * @return A copy of the current compiled RE program
7f561c08de6b Initial load
duke
parents:
diff changeset
    81
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
    82
    public char[] getInstructions()
7f561c08de6b Initial load
duke
parents:
diff changeset
    83
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    84
        // Ensure program has been compiled!
7f561c08de6b Initial load
duke
parents:
diff changeset
    85
        if (lenInstruction != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
    86
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
    87
            // Return copy of program
7f561c08de6b Initial load
duke
parents:
diff changeset
    88
            char[] ret = new char[lenInstruction];
7f561c08de6b Initial load
duke
parents:
diff changeset
    89
            System.arraycopy(instruction, 0, ret, 0, lenInstruction);
7f561c08de6b Initial load
duke
parents:
diff changeset
    90
            return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
    91
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
    92
        return null;
7f561c08de6b Initial load
duke
parents:
diff changeset
    93
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    94
7f561c08de6b Initial load
duke
parents:
diff changeset
    95
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    96
     * Sets a new regular expression program to run.  It is this method which
7f561c08de6b Initial load
duke
parents:
diff changeset
    97
     * performs any special compile-time search optimizations.  Currently only
7f561c08de6b Initial load
duke
parents:
diff changeset
    98
     * two optimizations are in place - one which checks for backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
    99
     * (so that they can be lazily allocated) and another which attempts to
7f561c08de6b Initial load
duke
parents:
diff changeset
   100
     * find an prefix anchor string so that substantial amounts of input can
7f561c08de6b Initial load
duke
parents:
diff changeset
   101
     * potentially be skipped without running the actual program.
7f561c08de6b Initial load
duke
parents:
diff changeset
   102
     * @param instruction Program instruction buffer
7f561c08de6b Initial load
duke
parents:
diff changeset
   103
     * @param lenInstruction Length of instruction buffer in use
7f561c08de6b Initial load
duke
parents:
diff changeset
   104
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   105
    public void setInstructions(char[] instruction, int lenInstruction)
7f561c08de6b Initial load
duke
parents:
diff changeset
   106
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   107
        // Save reference to instruction array
7f561c08de6b Initial load
duke
parents:
diff changeset
   108
        this.instruction = instruction;
7f561c08de6b Initial load
duke
parents:
diff changeset
   109
        this.lenInstruction = lenInstruction;
7f561c08de6b Initial load
duke
parents:
diff changeset
   110
7f561c08de6b Initial load
duke
parents:
diff changeset
   111
        // Initialize other program-related variables
7f561c08de6b Initial load
duke
parents:
diff changeset
   112
        flags = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   113
        prefix = null;
7f561c08de6b Initial load
duke
parents:
diff changeset
   114
7f561c08de6b Initial load
duke
parents:
diff changeset
   115
        // Try various compile-time optimizations if there's a program
7f561c08de6b Initial load
duke
parents:
diff changeset
   116
        if (instruction != null && lenInstruction != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   117
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   118
            // If the first node is a branch
7f561c08de6b Initial load
duke
parents:
diff changeset
   119
            if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
7f561c08de6b Initial load
duke
parents:
diff changeset
   120
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   121
                // to the end node
7f561c08de6b Initial load
duke
parents:
diff changeset
   122
                int next = instruction[0 + RE.offsetNext];
7f561c08de6b Initial load
duke
parents:
diff changeset
   123
                if (instruction[next + RE.offsetOpcode] == RE.OP_END)
7f561c08de6b Initial load
duke
parents:
diff changeset
   124
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   125
                    // and the branch starts with an atom
7f561c08de6b Initial load
duke
parents:
diff changeset
   126
                    if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
7f561c08de6b Initial load
duke
parents:
diff changeset
   127
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   128
                        // then get that atom as an prefix because there's no other choice
7f561c08de6b Initial load
duke
parents:
diff changeset
   129
                        int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
7f561c08de6b Initial load
duke
parents:
diff changeset
   130
                        prefix = new char[lenAtom];
7f561c08de6b Initial load
duke
parents:
diff changeset
   131
                        System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
7f561c08de6b Initial load
duke
parents:
diff changeset
   132
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   133
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   134
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   135
7f561c08de6b Initial load
duke
parents:
diff changeset
   136
            BackrefScanLoop:
7f561c08de6b Initial load
duke
parents:
diff changeset
   137
7f561c08de6b Initial load
duke
parents:
diff changeset
   138
            // Check for backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
   139
            for (int i = 0; i < lenInstruction; i += RE.nodeSize)
7f561c08de6b Initial load
duke
parents:
diff changeset
   140
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   141
                switch (instruction[i + RE.offsetOpcode])
7f561c08de6b Initial load
duke
parents:
diff changeset
   142
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
   143
                    case RE.OP_ANYOF:
7f561c08de6b Initial load
duke
parents:
diff changeset
   144
                        i += (instruction[i + RE.offsetOpdata] * 2);
7f561c08de6b Initial load
duke
parents:
diff changeset
   145
                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   146
7f561c08de6b Initial load
duke
parents:
diff changeset
   147
                    case RE.OP_ATOM:
7f561c08de6b Initial load
duke
parents:
diff changeset
   148
                        i += instruction[i + RE.offsetOpdata];
7f561c08de6b Initial load
duke
parents:
diff changeset
   149
                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   150
7f561c08de6b Initial load
duke
parents:
diff changeset
   151
                    case RE.OP_BACKREF:
7f561c08de6b Initial load
duke
parents:
diff changeset
   152
                        flags |= OPT_HASBACKREFS;
7f561c08de6b Initial load
duke
parents:
diff changeset
   153
                        break BackrefScanLoop;
7f561c08de6b Initial load
duke
parents:
diff changeset
   154
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
   155
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   156
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   157
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   158
}