jaxp/src/com/sun/org/apache/regexp/internal/REProgram.java
changeset 12457 c348e06f0e82
parent 6 7f561c08de6b
equal deleted inserted replaced
12324:1d7e6da6adc8 12457:c348e06f0e82
       
     1 /*
       
     2  * reserved comment block
       
     3  * DO NOT REMOVE OR ALTER!
       
     4  */
       
     5 /*
       
     6  * Copyright 1999-2004 The Apache Software Foundation.
       
     7  *
       
     8  * Licensed under the Apache License, Version 2.0 (the "License");
       
     9  * you may not use this file except in compliance with the License.
       
    10  * You may obtain a copy of the License at
       
    11  *
       
    12  *     http://www.apache.org/licenses/LICENSE-2.0
       
    13  *
       
    14  * Unless required by applicable law or agreed to in writing, software
       
    15  * distributed under the License is distributed on an "AS IS" BASIS,
       
    16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       
    17  * See the License for the specific language governing permissions and
       
    18  * limitations under the License.
       
    19  */
       
    20 
       
    21 package com.sun.org.apache.regexp.internal;
       
    22 
       
    23 import java.io.Serializable;
       
    24 
       
    25 /**
       
    26  * A class that holds compiled regular expressions.  This is exposed mainly
       
    27  * for use by the recompile utility (which helps you produce precompiled
       
    28  * REProgram objects). You should not otherwise need to work directly with
       
    29  * this class.
       
    30 *
       
    31  * @see RE
       
    32  * @see RECompiler
       
    33  *
       
    34  * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
       
    35  */
       
    36 public class REProgram implements Serializable
       
    37 {
       
    38     static final int OPT_HASBACKREFS = 1;
       
    39 
       
    40     char[] instruction;         // The compiled regular expression 'program'
       
    41     int lenInstruction;         // The amount of the instruction buffer in use
       
    42     char[] prefix;              // Prefix string optimization
       
    43     int flags;                  // Optimization flags (REProgram.OPT_*)
       
    44     int maxParens = -1;
       
    45 
       
    46     /**
       
    47      * Constructs a program object from a character array
       
    48      * @param instruction Character array with RE opcode instructions in it
       
    49      */
       
    50     public REProgram(char[] instruction)
       
    51     {
       
    52         this(instruction, instruction.length);
       
    53     }
       
    54 
       
    55     /**
       
    56      * Constructs a program object from a character array
       
    57      * @param parens Count of parens in the program
       
    58      * @param instruction Character array with RE opcode instructions in it
       
    59      */
       
    60     public REProgram(int parens, char[] instruction)
       
    61     {
       
    62         this(instruction, instruction.length);
       
    63         this.maxParens = parens;
       
    64     }
       
    65 
       
    66     /**
       
    67      * Constructs a program object from a character array
       
    68      * @param instruction Character array with RE opcode instructions in it
       
    69      * @param lenInstruction Amount of instruction array in use
       
    70      */
       
    71     public REProgram(char[] instruction, int lenInstruction)
       
    72     {
       
    73         setInstructions(instruction, lenInstruction);
       
    74     }
       
    75 
       
    76     /**
       
    77      * Returns a copy of the current regular expression program in a character
       
    78      * array that is exactly the right length to hold the program.  If there is
       
    79      * no program compiled yet, getInstructions() will return null.
       
    80      * @return A copy of the current compiled RE program
       
    81      */
       
    82     public char[] getInstructions()
       
    83     {
       
    84         // Ensure program has been compiled!
       
    85         if (lenInstruction != 0)
       
    86         {
       
    87             // Return copy of program
       
    88             char[] ret = new char[lenInstruction];
       
    89             System.arraycopy(instruction, 0, ret, 0, lenInstruction);
       
    90             return ret;
       
    91         }
       
    92         return null;
       
    93     }
       
    94 
       
    95     /**
       
    96      * Sets a new regular expression program to run.  It is this method which
       
    97      * performs any special compile-time search optimizations.  Currently only
       
    98      * two optimizations are in place - one which checks for backreferences
       
    99      * (so that they can be lazily allocated) and another which attempts to
       
   100      * find an prefix anchor string so that substantial amounts of input can
       
   101      * potentially be skipped without running the actual program.
       
   102      * @param instruction Program instruction buffer
       
   103      * @param lenInstruction Length of instruction buffer in use
       
   104      */
       
   105     public void setInstructions(char[] instruction, int lenInstruction)
       
   106     {
       
   107         // Save reference to instruction array
       
   108         this.instruction = instruction;
       
   109         this.lenInstruction = lenInstruction;
       
   110 
       
   111         // Initialize other program-related variables
       
   112         flags = 0;
       
   113         prefix = null;
       
   114 
       
   115         // Try various compile-time optimizations if there's a program
       
   116         if (instruction != null && lenInstruction != 0)
       
   117         {
       
   118             // If the first node is a branch
       
   119             if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
       
   120             {
       
   121                 // to the end node
       
   122                 int next = instruction[0 + RE.offsetNext];
       
   123                 if (instruction[next + RE.offsetOpcode] == RE.OP_END)
       
   124                 {
       
   125                     // and the branch starts with an atom
       
   126                     if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
       
   127                     {
       
   128                         // then get that atom as an prefix because there's no other choice
       
   129                         int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
       
   130                         prefix = new char[lenAtom];
       
   131                         System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
       
   132                     }
       
   133                 }
       
   134             }
       
   135 
       
   136             BackrefScanLoop:
       
   137 
       
   138             // Check for backreferences
       
   139             for (int i = 0; i < lenInstruction; i += RE.nodeSize)
       
   140             {
       
   141                 switch (instruction[i + RE.offsetOpcode])
       
   142                 {
       
   143                     case RE.OP_ANYOF:
       
   144                         i += (instruction[i + RE.offsetOpdata] * 2);
       
   145                         break;
       
   146 
       
   147                     case RE.OP_ATOM:
       
   148                         i += instruction[i + RE.offsetOpdata];
       
   149                         break;
       
   150 
       
   151                     case RE.OP_BACKREF:
       
   152                         flags |= OPT_HASBACKREFS;
       
   153                         break BackrefScanLoop;
       
   154                 }
       
   155             }
       
   156         }
       
   157     }
       
   158 }