jaxp/src/com/sun/org/apache/regexp/internal/RE.java
author mchung
Mon, 26 Nov 2012 22:49:06 -0800
changeset 16098 9001e536ab4e
parent 12457 c348e06f0e82
permissions -rw-r--r--
6664509: Add logging context 6664528: Find log level matching its name or value given at construction time Reviewed-by: alanb, ahgross, jgish, hawtin
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6
7f561c08de6b Initial load
duke
parents:
diff changeset
     1
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     2
 * reserved comment block
7f561c08de6b Initial load
duke
parents:
diff changeset
     3
 * DO NOT REMOVE OR ALTER!
7f561c08de6b Initial load
duke
parents:
diff changeset
     4
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
     5
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     6
 * Copyright 1999-2004 The Apache Software Foundation.
7f561c08de6b Initial load
duke
parents:
diff changeset
     7
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
     8
 * Licensed under the Apache License, Version 2.0 (the "License");
7f561c08de6b Initial load
duke
parents:
diff changeset
     9
 * you may not use this file except in compliance with the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    10
 * You may obtain a copy of the License at
7f561c08de6b Initial load
duke
parents:
diff changeset
    11
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    12
 *     http://www.apache.org/licenses/LICENSE-2.0
7f561c08de6b Initial load
duke
parents:
diff changeset
    13
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    14
 * Unless required by applicable law or agreed to in writing, software
7f561c08de6b Initial load
duke
parents:
diff changeset
    15
 * distributed under the License is distributed on an "AS IS" BASIS,
7f561c08de6b Initial load
duke
parents:
diff changeset
    16
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
7f561c08de6b Initial load
duke
parents:
diff changeset
    17
 * See the License for the specific language governing permissions and
7f561c08de6b Initial load
duke
parents:
diff changeset
    18
 * limitations under the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    19
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    20
7f561c08de6b Initial load
duke
parents:
diff changeset
    21
package com.sun.org.apache.regexp.internal;
7f561c08de6b Initial load
duke
parents:
diff changeset
    22
7f561c08de6b Initial load
duke
parents:
diff changeset
    23
import java.io.Serializable;
7f561c08de6b Initial load
duke
parents:
diff changeset
    24
import java.util.Vector;
7f561c08de6b Initial load
duke
parents:
diff changeset
    25
7f561c08de6b Initial load
duke
parents:
diff changeset
    26
/**
7f561c08de6b Initial load
duke
parents:
diff changeset
    27
 * RE is an efficient, lightweight regular expression evaluator/matcher
7f561c08de6b Initial load
duke
parents:
diff changeset
    28
 * class. Regular expressions are pattern descriptions which enable
7f561c08de6b Initial load
duke
parents:
diff changeset
    29
 * sophisticated matching of strings.  In addition to being able to
7f561c08de6b Initial load
duke
parents:
diff changeset
    30
 * match a string against a pattern, you can also extract parts of the
7f561c08de6b Initial load
duke
parents:
diff changeset
    31
 * match.  This is especially useful in text parsing! Details on the
7f561c08de6b Initial load
duke
parents:
diff changeset
    32
 * syntax of regular expression patterns are given below.
7f561c08de6b Initial load
duke
parents:
diff changeset
    33
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    34
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    35
 * To compile a regular expression (RE), you can simply construct an RE
7f561c08de6b Initial load
duke
parents:
diff changeset
    36
 * matcher object from the string specification of the pattern, like this:
7f561c08de6b Initial load
duke
parents:
diff changeset
    37
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    38
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    39
 *  RE r = new RE("a*b");
7f561c08de6b Initial load
duke
parents:
diff changeset
    40
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    41
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    42
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    43
 * Once you have done this, you can call either of the RE.match methods to
7f561c08de6b Initial load
duke
parents:
diff changeset
    44
 * perform matching on a String.  For example:
7f561c08de6b Initial load
duke
parents:
diff changeset
    45
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    46
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    47
 *  boolean matched = r.match("aaaab");
7f561c08de6b Initial load
duke
parents:
diff changeset
    48
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    49
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    50
 * will cause the boolean matched to be set to true because the
7f561c08de6b Initial load
duke
parents:
diff changeset
    51
 * pattern "a*b" matches the string "aaaab".
7f561c08de6b Initial load
duke
parents:
diff changeset
    52
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    53
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    54
 * If you were interested in the <i>number</i> of a's which matched the
7f561c08de6b Initial load
duke
parents:
diff changeset
    55
 * first part of our example expression, you could change the expression to
7f561c08de6b Initial load
duke
parents:
diff changeset
    56
 * "(a*)b".  Then when you compiled the expression and matched it against
7f561c08de6b Initial load
duke
parents:
diff changeset
    57
 * something like "xaaaab", you would get results like this:
7f561c08de6b Initial load
duke
parents:
diff changeset
    58
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    59
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    60
 *  RE r = new RE("(a*)b");                  // Compile expression
7f561c08de6b Initial load
duke
parents:
diff changeset
    61
 *  boolean matched = r.match("xaaaab");     // Match against "xaaaab"
7f561c08de6b Initial load
duke
parents:
diff changeset
    62
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    63
 *  String wholeExpr = r.getParen(0);        // wholeExpr will be 'aaaab'
7f561c08de6b Initial load
duke
parents:
diff changeset
    64
 *  String insideParens = r.getParen(1);     // insideParens will be 'aaaa'
7f561c08de6b Initial load
duke
parents:
diff changeset
    65
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    66
 *  int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
7f561c08de6b Initial load
duke
parents:
diff changeset
    67
 *  int endWholeExpr = r.getParenEnd(0);     // endWholeExpr will be index 6
7f561c08de6b Initial load
duke
parents:
diff changeset
    68
 *  int lenWholeExpr = r.getParenLength(0);  // lenWholeExpr will be 5
7f561c08de6b Initial load
duke
parents:
diff changeset
    69
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    70
 *  int startInside = r.getParenStart(1);    // startInside will be index 1
7f561c08de6b Initial load
duke
parents:
diff changeset
    71
 *  int endInside = r.getParenEnd(1);        // endInside will be index 5
7f561c08de6b Initial load
duke
parents:
diff changeset
    72
 *  int lenInside = r.getParenLength(1);     // lenInside will be 4
7f561c08de6b Initial load
duke
parents:
diff changeset
    73
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    74
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    75
 * You can also refer to the contents of a parenthesized expression
7f561c08de6b Initial load
duke
parents:
diff changeset
    76
 * within a regular expression itself.  This is called a
7f561c08de6b Initial load
duke
parents:
diff changeset
    77
 * 'backreference'.  The first backreference in a regular expression is
7f561c08de6b Initial load
duke
parents:
diff changeset
    78
 * denoted by \1, the second by \2 and so on.  So the expression:
7f561c08de6b Initial load
duke
parents:
diff changeset
    79
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    80
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    81
 *  ([0-9]+)=\1
7f561c08de6b Initial load
duke
parents:
diff changeset
    82
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    83
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    84
 * will match any string of the form n=n (like 0=0 or 2=2).
7f561c08de6b Initial load
duke
parents:
diff changeset
    85
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    86
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    87
 * The full regular expression syntax accepted by RE is described here:
7f561c08de6b Initial load
duke
parents:
diff changeset
    88
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    89
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    90
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    91
 *  <b><font face=times roman>Characters</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
    92
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    93
 *    <i>unicodeChar</i>   Matches any identical unicode character
7f561c08de6b Initial load
duke
parents:
diff changeset
    94
 *    \                    Used to quote a meta-character (like '*')
7f561c08de6b Initial load
duke
parents:
diff changeset
    95
 *    \\                   Matches a single '\' character
7f561c08de6b Initial load
duke
parents:
diff changeset
    96
 *    \0nnn                Matches a given octal character
7f561c08de6b Initial load
duke
parents:
diff changeset
    97
 *    \xhh                 Matches a given 8-bit hexadecimal character
7f561c08de6b Initial load
duke
parents:
diff changeset
    98
 *    \\uhhhh              Matches a given 16-bit hexadecimal character
7f561c08de6b Initial load
duke
parents:
diff changeset
    99
 *    \t                   Matches an ASCII tab character
7f561c08de6b Initial load
duke
parents:
diff changeset
   100
 *    \n                   Matches an ASCII newline character
7f561c08de6b Initial load
duke
parents:
diff changeset
   101
 *    \r                   Matches an ASCII return character
7f561c08de6b Initial load
duke
parents:
diff changeset
   102
 *    \f                   Matches an ASCII form feed character
7f561c08de6b Initial load
duke
parents:
diff changeset
   103
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   104
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   105
 *  <b><font face=times roman>Character Classes</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   106
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   107
 *    [abc]                Simple character class
7f561c08de6b Initial load
duke
parents:
diff changeset
   108
 *    [a-zA-Z]             Character class with ranges
7f561c08de6b Initial load
duke
parents:
diff changeset
   109
 *    [^abc]               Negated character class
7f561c08de6b Initial load
duke
parents:
diff changeset
   110
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   111
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   112
 * <b>NOTE:</b> Incomplete ranges will be interpreted as &quot;starts
7f561c08de6b Initial load
duke
parents:
diff changeset
   113
 * from zero&quot; or &quot;ends with last character&quot;.
7f561c08de6b Initial load
duke
parents:
diff changeset
   114
 * <br>
7f561c08de6b Initial load
duke
parents:
diff changeset
   115
 * I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF],
7f561c08de6b Initial load
duke
parents:
diff changeset
   116
 * [-] means &quot;all characters&quot;.
7f561c08de6b Initial load
duke
parents:
diff changeset
   117
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   118
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   119
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   120
 *  <b><font face=times roman>Standard POSIX Character Classes</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   121
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   122
 *    [:alnum:]            Alphanumeric characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   123
 *    [:alpha:]            Alphabetic characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   124
 *    [:blank:]            Space and tab characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   125
 *    [:cntrl:]            Control characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   126
 *    [:digit:]            Numeric characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   127
 *    [:graph:]            Characters that are printable and are also visible.
7f561c08de6b Initial load
duke
parents:
diff changeset
   128
 *                         (A space is printable, but not visible, while an
7f561c08de6b Initial load
duke
parents:
diff changeset
   129
 *                         `a' is both.)
7f561c08de6b Initial load
duke
parents:
diff changeset
   130
 *    [:lower:]            Lower-case alphabetic characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   131
 *    [:print:]            Printable characters (characters that are not
7f561c08de6b Initial load
duke
parents:
diff changeset
   132
 *                         control characters.)
7f561c08de6b Initial load
duke
parents:
diff changeset
   133
 *    [:punct:]            Punctuation characters (characters that are not letter,
7f561c08de6b Initial load
duke
parents:
diff changeset
   134
 *                         digits, control characters, or space characters).
7f561c08de6b Initial load
duke
parents:
diff changeset
   135
 *    [:space:]            Space characters (such as space, tab, and formfeed,
7f561c08de6b Initial load
duke
parents:
diff changeset
   136
 *                         to name a few).
7f561c08de6b Initial load
duke
parents:
diff changeset
   137
 *    [:upper:]            Upper-case alphabetic characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
   138
 *    [:xdigit:]           Characters that are hexadecimal digits.
7f561c08de6b Initial load
duke
parents:
diff changeset
   139
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   140
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   141
 *  <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   142
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   143
 *    [:javastart:]        Start of a Java identifier
7f561c08de6b Initial load
duke
parents:
diff changeset
   144
 *    [:javapart:]         Part of a Java identifier
7f561c08de6b Initial load
duke
parents:
diff changeset
   145
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   146
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   147
 *  <b><font face=times roman>Predefined Classes</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   148
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   149
 *    .         Matches any character other than newline
7f561c08de6b Initial load
duke
parents:
diff changeset
   150
 *    \w        Matches a "word" character (alphanumeric plus "_")
7f561c08de6b Initial load
duke
parents:
diff changeset
   151
 *    \W        Matches a non-word character
7f561c08de6b Initial load
duke
parents:
diff changeset
   152
 *    \s        Matches a whitespace character
7f561c08de6b Initial load
duke
parents:
diff changeset
   153
 *    \S        Matches a non-whitespace character
7f561c08de6b Initial load
duke
parents:
diff changeset
   154
 *    \d        Matches a digit character
7f561c08de6b Initial load
duke
parents:
diff changeset
   155
 *    \D        Matches a non-digit character
7f561c08de6b Initial load
duke
parents:
diff changeset
   156
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   157
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   158
 *  <b><font face=times roman>Boundary Matchers</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   159
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   160
 *    ^         Matches only at the beginning of a line
7f561c08de6b Initial load
duke
parents:
diff changeset
   161
 *    $         Matches only at the end of a line
7f561c08de6b Initial load
duke
parents:
diff changeset
   162
 *    \b        Matches only at a word boundary
7f561c08de6b Initial load
duke
parents:
diff changeset
   163
 *    \B        Matches only at a non-word boundary
7f561c08de6b Initial load
duke
parents:
diff changeset
   164
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   165
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   166
 *  <b><font face=times roman>Greedy Closures</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   167
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   168
 *    A*        Matches A 0 or more times (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   169
 *    A+        Matches A 1 or more times (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   170
 *    A?        Matches A 1 or 0 times (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   171
 *    A{n}      Matches A exactly n times (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   172
 *    A{n,}     Matches A at least n times (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   173
 *    A{n,m}    Matches A at least n but not more than m times (greedy)
7f561c08de6b Initial load
duke
parents:
diff changeset
   174
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   175
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   176
 *  <b><font face=times roman>Reluctant Closures</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   177
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   178
 *    A*?       Matches A 0 or more times (reluctant)
7f561c08de6b Initial load
duke
parents:
diff changeset
   179
 *    A+?       Matches A 1 or more times (reluctant)
7f561c08de6b Initial load
duke
parents:
diff changeset
   180
 *    A??       Matches A 0 or 1 times (reluctant)
7f561c08de6b Initial load
duke
parents:
diff changeset
   181
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   182
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   183
 *  <b><font face=times roman>Logical Operators</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   184
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   185
 *    AB        Matches A followed by B
7f561c08de6b Initial load
duke
parents:
diff changeset
   186
 *    A|B       Matches either A or B
7f561c08de6b Initial load
duke
parents:
diff changeset
   187
 *    (A)       Used for subexpression grouping
7f561c08de6b Initial load
duke
parents:
diff changeset
   188
 *   (?:A)      Used for subexpression clustering (just like grouping but
7f561c08de6b Initial load
duke
parents:
diff changeset
   189
 *              no backrefs)
7f561c08de6b Initial load
duke
parents:
diff changeset
   190
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   191
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   192
 *  <b><font face=times roman>Backreferences</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   193
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   194
 *    \1    Backreference to 1st parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   195
 *    \2    Backreference to 2nd parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   196
 *    \3    Backreference to 3rd parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   197
 *    \4    Backreference to 4th parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   198
 *    \5    Backreference to 5th parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   199
 *    \6    Backreference to 6th parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   200
 *    \7    Backreference to 7th parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   201
 *    \8    Backreference to 8th parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   202
 *    \9    Backreference to 9th parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   203
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   204
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   205
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   206
 * All closure operators (+, *, ?, {m,n}) are greedy by default, meaning
7f561c08de6b Initial load
duke
parents:
diff changeset
   207
 * that they match as many elements of the string as possible without
7f561c08de6b Initial load
duke
parents:
diff changeset
   208
 * causing the overall match to fail.  If you want a closure to be
7f561c08de6b Initial load
duke
parents:
diff changeset
   209
 * reluctant (non-greedy), you can simply follow it with a '?'.  A
7f561c08de6b Initial load
duke
parents:
diff changeset
   210
 * reluctant closure will match as few elements of the string as
7f561c08de6b Initial load
duke
parents:
diff changeset
   211
 * possible when finding matches.  {m,n} closures don't currently
7f561c08de6b Initial load
duke
parents:
diff changeset
   212
 * support reluctancy.
7f561c08de6b Initial load
duke
parents:
diff changeset
   213
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   214
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   215
 * <b><font face="times roman">Line terminators</font></b>
7f561c08de6b Initial load
duke
parents:
diff changeset
   216
 * <br>
7f561c08de6b Initial load
duke
parents:
diff changeset
   217
 * A line terminator is a one- or two-character sequence that marks
7f561c08de6b Initial load
duke
parents:
diff changeset
   218
 * the end of a line of the input character sequence. The following
7f561c08de6b Initial load
duke
parents:
diff changeset
   219
 * are recognized as line terminators:
7f561c08de6b Initial load
duke
parents:
diff changeset
   220
 * <ul>
7f561c08de6b Initial load
duke
parents:
diff changeset
   221
 * <li>A newline (line feed) character ('\n'),</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   222
 * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   223
 * <li>A standalone carriage-return character ('\r'),</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   224
 * <li>A next-line character ('\u0085'),</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   225
 * <li>A line-separator character ('\u2028'), or</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   226
 * <li>A paragraph-separator character ('\u2029).</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   227
 * </ul>
7f561c08de6b Initial load
duke
parents:
diff changeset
   228
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   229
 * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   230
 * RE runs programs compiled by the RECompiler class.  But the RE
7f561c08de6b Initial load
duke
parents:
diff changeset
   231
 * matcher class does not include the actual regular expression compiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   232
 * for reasons of efficiency.  In fact, if you want to pre-compile one
7f561c08de6b Initial load
duke
parents:
diff changeset
   233
 * or more regular expressions, the 'recompile' class can be invoked
7f561c08de6b Initial load
duke
parents:
diff changeset
   234
 * from the command line to produce compiled output like this:
7f561c08de6b Initial load
duke
parents:
diff changeset
   235
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   236
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   237
 *    // Pre-compiled regular expression "a*b"
7f561c08de6b Initial load
duke
parents:
diff changeset
   238
 *    char[] re1Instructions =
7f561c08de6b Initial load
duke
parents:
diff changeset
   239
 *    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   240
 *        0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
7f561c08de6b Initial load
duke
parents:
diff changeset
   241
 *        0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
7f561c08de6b Initial load
duke
parents:
diff changeset
   242
 *        0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
7f561c08de6b Initial load
duke
parents:
diff changeset
   243
 *        0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
7f561c08de6b Initial load
duke
parents:
diff changeset
   244
 *        0x0000,
7f561c08de6b Initial load
duke
parents:
diff changeset
   245
 *    };
7f561c08de6b Initial load
duke
parents:
diff changeset
   246
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   247
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   248
 *    REProgram re1 = new REProgram(re1Instructions);
7f561c08de6b Initial load
duke
parents:
diff changeset
   249
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   250
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   251
 * You can then construct a regular expression matcher (RE) object from
7f561c08de6b Initial load
duke
parents:
diff changeset
   252
 * the pre-compiled expression re1 and thus avoid the overhead of
7f561c08de6b Initial load
duke
parents:
diff changeset
   253
 * compiling the expression at runtime. If you require more dynamic
7f561c08de6b Initial load
duke
parents:
diff changeset
   254
 * regular expressions, you can construct a single RECompiler object and
7f561c08de6b Initial load
duke
parents:
diff changeset
   255
 * re-use it to compile each expression. Similarly, you can change the
7f561c08de6b Initial load
duke
parents:
diff changeset
   256
 * program run by a given matcher object at any time. However, RE and
7f561c08de6b Initial load
duke
parents:
diff changeset
   257
 * RECompiler are not threadsafe (for efficiency reasons, and because
7f561c08de6b Initial load
duke
parents:
diff changeset
   258
 * requiring thread safety in this class is deemed to be a rare
7f561c08de6b Initial load
duke
parents:
diff changeset
   259
 * requirement), so you will need to construct a separate compiler or
7f561c08de6b Initial load
duke
parents:
diff changeset
   260
 * matcher object for each thread (unless you do thread synchronization
7f561c08de6b Initial load
duke
parents:
diff changeset
   261
 * yourself). Once expression compiled into the REProgram object, REProgram
7f561c08de6b Initial load
duke
parents:
diff changeset
   262
 * can be safely shared across multiple threads and RE objects.
7f561c08de6b Initial load
duke
parents:
diff changeset
   263
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   264
 * <br><p><br>
7f561c08de6b Initial load
duke
parents:
diff changeset
   265
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   266
 * <font color="red">
7f561c08de6b Initial load
duke
parents:
diff changeset
   267
 * <i>ISSUES:</i>
7f561c08de6b Initial load
duke
parents:
diff changeset
   268
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   269
 * <ul>
7f561c08de6b Initial load
duke
parents:
diff changeset
   270
 *  <li>com.weusours.util.re is not currently compatible with all
7f561c08de6b Initial load
duke
parents:
diff changeset
   271
 *      standard POSIX regcomp flags</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   272
 *  <li>com.weusours.util.re does not support POSIX equivalence classes
7f561c08de6b Initial load
duke
parents:
diff changeset
   273
 *      ([=foo=] syntax) (I18N/locale issue)</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   274
 *  <li>com.weusours.util.re does not support nested POSIX character
7f561c08de6b Initial load
duke
parents:
diff changeset
   275
 *      classes (definitely should, but not completely trivial)</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   276
 *  <li>com.weusours.util.re Does not support POSIX character collation
7f561c08de6b Initial load
duke
parents:
diff changeset
   277
 *      concepts ([.foo.] syntax) (I18N/locale issue)</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   278
 *  <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   279
 *  <li>Should RE support character iterators (for backwards RE matching!)?</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   280
 *  <li>Should RE support reluctant {m,n} closures (does anyone care)?</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   281
 *  <li>Not *all* possibilities are considered for greediness when backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
   282
 *      are involved (as POSIX suggests should be the case).  The POSIX RE
7f561c08de6b Initial load
duke
parents:
diff changeset
   283
 *      "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match
7f561c08de6b Initial load
duke
parents:
diff changeset
   284
 *      of acdacaa where \1 is "a".  This is not the case in this RE package,
7f561c08de6b Initial load
duke
parents:
diff changeset
   285
 *      and actually Perl doesn't go to this extent either!  Until someone
7f561c08de6b Initial load
duke
parents:
diff changeset
   286
 *      actually complains about this, I'm not sure it's worth "fixing".
7f561c08de6b Initial load
duke
parents:
diff changeset
   287
 *      If it ever is fixed, test #137 in RETest.txt should be updated.</li>
7f561c08de6b Initial load
duke
parents:
diff changeset
   288
 * </ul>
7f561c08de6b Initial load
duke
parents:
diff changeset
   289
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   290
 * </font>
7f561c08de6b Initial load
duke
parents:
diff changeset
   291
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   292
 * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
   293
 * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   294
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   295
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
7f561c08de6b Initial load
duke
parents:
diff changeset
   296
 * @author <a href="mailto:ts@sch-fer.de">Tobias Sch&auml;fer</a>
7f561c08de6b Initial load
duke
parents:
diff changeset
   297
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
   298
public class RE implements Serializable
7f561c08de6b Initial load
duke
parents:
diff changeset
   299
{
7f561c08de6b Initial load
duke
parents:
diff changeset
   300
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   301
     * Specifies normal, case-sensitive matching behaviour.
7f561c08de6b Initial load
duke
parents:
diff changeset
   302
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   303
    public static final int MATCH_NORMAL          = 0x0000;
7f561c08de6b Initial load
duke
parents:
diff changeset
   304
7f561c08de6b Initial load
duke
parents:
diff changeset
   305
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   306
     * Flag to indicate that matching should be case-independent (folded)
7f561c08de6b Initial load
duke
parents:
diff changeset
   307
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   308
    public static final int MATCH_CASEINDEPENDENT = 0x0001;
7f561c08de6b Initial load
duke
parents:
diff changeset
   309
7f561c08de6b Initial load
duke
parents:
diff changeset
   310
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   311
     * Newlines should match as BOL/EOL (^ and $)
7f561c08de6b Initial load
duke
parents:
diff changeset
   312
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   313
    public static final int MATCH_MULTILINE       = 0x0002;
7f561c08de6b Initial load
duke
parents:
diff changeset
   314
7f561c08de6b Initial load
duke
parents:
diff changeset
   315
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   316
     * Consider all input a single body of text - newlines are matched by .
7f561c08de6b Initial load
duke
parents:
diff changeset
   317
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   318
    public static final int MATCH_SINGLELINE      = 0x0004;
7f561c08de6b Initial load
duke
parents:
diff changeset
   319
7f561c08de6b Initial load
duke
parents:
diff changeset
   320
    /************************************************
7f561c08de6b Initial load
duke
parents:
diff changeset
   321
     *                                              *
7f561c08de6b Initial load
duke
parents:
diff changeset
   322
     * The format of a node in a program is:        *
7f561c08de6b Initial load
duke
parents:
diff changeset
   323
     *                                              *
7f561c08de6b Initial load
duke
parents:
diff changeset
   324
     * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *
7f561c08de6b Initial load
duke
parents:
diff changeset
   325
     *                                              *
7f561c08de6b Initial load
duke
parents:
diff changeset
   326
     * char OPCODE - instruction                    *
7f561c08de6b Initial load
duke
parents:
diff changeset
   327
     * char OPDATA - modifying data                 *
7f561c08de6b Initial load
duke
parents:
diff changeset
   328
     * char OPNEXT - next node (relative offset)    *
7f561c08de6b Initial load
duke
parents:
diff changeset
   329
     *                                              *
7f561c08de6b Initial load
duke
parents:
diff changeset
   330
     ************************************************/
7f561c08de6b Initial load
duke
parents:
diff changeset
   331
7f561c08de6b Initial load
duke
parents:
diff changeset
   332
                 //   Opcode              Char       Opdata/Operand  Meaning
7f561c08de6b Initial load
duke
parents:
diff changeset
   333
                 //   ----------          ---------- --------------- --------------------------------------------------
7f561c08de6b Initial load
duke
parents:
diff changeset
   334
    static final char OP_END              = 'E';  //                 end of program
7f561c08de6b Initial load
duke
parents:
diff changeset
   335
    static final char OP_BOL              = '^';  //                 match only if at beginning of line
7f561c08de6b Initial load
duke
parents:
diff changeset
   336
    static final char OP_EOL              = '$';  //                 match only if at end of line
7f561c08de6b Initial load
duke
parents:
diff changeset
   337
    static final char OP_ANY              = '.';  //                 match any single character except newline
7f561c08de6b Initial load
duke
parents:
diff changeset
   338
    static final char OP_ANYOF            = '[';  // count/ranges    match any char in the list of ranges
7f561c08de6b Initial load
duke
parents:
diff changeset
   339
    static final char OP_BRANCH           = '|';  // node            match this alternative or the next one
7f561c08de6b Initial load
duke
parents:
diff changeset
   340
    static final char OP_ATOM             = 'A';  // length/string   length of string followed by string itself
7f561c08de6b Initial load
duke
parents:
diff changeset
   341
    static final char OP_STAR             = '*';  // node            kleene closure
7f561c08de6b Initial load
duke
parents:
diff changeset
   342
    static final char OP_PLUS             = '+';  // node            positive closure
7f561c08de6b Initial load
duke
parents:
diff changeset
   343
    static final char OP_MAYBE            = '?';  // node            optional closure
7f561c08de6b Initial load
duke
parents:
diff changeset
   344
    static final char OP_ESCAPE           = '\\'; // escape          special escape code char class (escape is E_* code)
7f561c08de6b Initial load
duke
parents:
diff changeset
   345
    static final char OP_OPEN             = '(';  // number          nth opening paren
7f561c08de6b Initial load
duke
parents:
diff changeset
   346
    static final char OP_OPEN_CLUSTER     = '<';  //                 opening cluster
7f561c08de6b Initial load
duke
parents:
diff changeset
   347
    static final char OP_CLOSE            = ')';  // number          nth closing paren
7f561c08de6b Initial load
duke
parents:
diff changeset
   348
    static final char OP_CLOSE_CLUSTER    = '>';  //                 closing cluster
7f561c08de6b Initial load
duke
parents:
diff changeset
   349
    static final char OP_BACKREF          = '#';  // number          reference nth already matched parenthesized string
7f561c08de6b Initial load
duke
parents:
diff changeset
   350
    static final char OP_GOTO             = 'G';  //                 nothing but a (back-)pointer
7f561c08de6b Initial load
duke
parents:
diff changeset
   351
    static final char OP_NOTHING          = 'N';  //                 match null string such as in '(a|)'
7f561c08de6b Initial load
duke
parents:
diff changeset
   352
    static final char OP_RELUCTANTSTAR    = '8';  // none/expr       reluctant '*' (mnemonic for char is unshifted '*')
7f561c08de6b Initial load
duke
parents:
diff changeset
   353
    static final char OP_RELUCTANTPLUS    = '=';  // none/expr       reluctant '+' (mnemonic for char is unshifted '+')
7f561c08de6b Initial load
duke
parents:
diff changeset
   354
    static final char OP_RELUCTANTMAYBE   = '/';  // none/expr       reluctant '?' (mnemonic for char is unshifted '?')
7f561c08de6b Initial load
duke
parents:
diff changeset
   355
    static final char OP_POSIXCLASS       = 'P';  // classid         one of the posix character classes
7f561c08de6b Initial load
duke
parents:
diff changeset
   356
7f561c08de6b Initial load
duke
parents:
diff changeset
   357
    // Escape codes
7f561c08de6b Initial load
duke
parents:
diff changeset
   358
    static final char E_ALNUM             = 'w';  // Alphanumeric
7f561c08de6b Initial load
duke
parents:
diff changeset
   359
    static final char E_NALNUM            = 'W';  // Non-alphanumeric
7f561c08de6b Initial load
duke
parents:
diff changeset
   360
    static final char E_BOUND             = 'b';  // Word boundary
7f561c08de6b Initial load
duke
parents:
diff changeset
   361
    static final char E_NBOUND            = 'B';  // Non-word boundary
7f561c08de6b Initial load
duke
parents:
diff changeset
   362
    static final char E_SPACE             = 's';  // Whitespace
7f561c08de6b Initial load
duke
parents:
diff changeset
   363
    static final char E_NSPACE            = 'S';  // Non-whitespace
7f561c08de6b Initial load
duke
parents:
diff changeset
   364
    static final char E_DIGIT             = 'd';  // Digit
7f561c08de6b Initial load
duke
parents:
diff changeset
   365
    static final char E_NDIGIT            = 'D';  // Non-digit
7f561c08de6b Initial load
duke
parents:
diff changeset
   366
7f561c08de6b Initial load
duke
parents:
diff changeset
   367
    // Posix character classes
7f561c08de6b Initial load
duke
parents:
diff changeset
   368
    static final char POSIX_CLASS_ALNUM   = 'w';  // Alphanumerics
7f561c08de6b Initial load
duke
parents:
diff changeset
   369
    static final char POSIX_CLASS_ALPHA   = 'a';  // Alphabetics
7f561c08de6b Initial load
duke
parents:
diff changeset
   370
    static final char POSIX_CLASS_BLANK   = 'b';  // Blanks
7f561c08de6b Initial load
duke
parents:
diff changeset
   371
    static final char POSIX_CLASS_CNTRL   = 'c';  // Control characters
7f561c08de6b Initial load
duke
parents:
diff changeset
   372
    static final char POSIX_CLASS_DIGIT   = 'd';  // Digits
7f561c08de6b Initial load
duke
parents:
diff changeset
   373
    static final char POSIX_CLASS_GRAPH   = 'g';  // Graphic characters
7f561c08de6b Initial load
duke
parents:
diff changeset
   374
    static final char POSIX_CLASS_LOWER   = 'l';  // Lowercase characters
7f561c08de6b Initial load
duke
parents:
diff changeset
   375
    static final char POSIX_CLASS_PRINT   = 'p';  // Printable characters
7f561c08de6b Initial load
duke
parents:
diff changeset
   376
    static final char POSIX_CLASS_PUNCT   = '!';  // Punctuation
7f561c08de6b Initial load
duke
parents:
diff changeset
   377
    static final char POSIX_CLASS_SPACE   = 's';  // Spaces
7f561c08de6b Initial load
duke
parents:
diff changeset
   378
    static final char POSIX_CLASS_UPPER   = 'u';  // Uppercase characters
7f561c08de6b Initial load
duke
parents:
diff changeset
   379
    static final char POSIX_CLASS_XDIGIT  = 'x';  // Hexadecimal digits
7f561c08de6b Initial load
duke
parents:
diff changeset
   380
    static final char POSIX_CLASS_JSTART  = 'j';  // Java identifier start
7f561c08de6b Initial load
duke
parents:
diff changeset
   381
    static final char POSIX_CLASS_JPART   = 'k';  // Java identifier part
7f561c08de6b Initial load
duke
parents:
diff changeset
   382
7f561c08de6b Initial load
duke
parents:
diff changeset
   383
    // Limits
7f561c08de6b Initial load
duke
parents:
diff changeset
   384
    static final int maxNode  = 65536;            // Maximum number of nodes in a program
7f561c08de6b Initial load
duke
parents:
diff changeset
   385
    static final int MAX_PAREN = 16;              // Number of paren pairs (only 9 can be backrefs)
7f561c08de6b Initial load
duke
parents:
diff changeset
   386
7f561c08de6b Initial load
duke
parents:
diff changeset
   387
    // Node layout constants
7f561c08de6b Initial load
duke
parents:
diff changeset
   388
    static final int offsetOpcode = 0;            // Opcode offset (first character)
7f561c08de6b Initial load
duke
parents:
diff changeset
   389
    static final int offsetOpdata = 1;            // Opdata offset (second char)
7f561c08de6b Initial load
duke
parents:
diff changeset
   390
    static final int offsetNext   = 2;            // Next index offset (third char)
7f561c08de6b Initial load
duke
parents:
diff changeset
   391
    static final int nodeSize     = 3;            // Node size (in chars)
7f561c08de6b Initial load
duke
parents:
diff changeset
   392
7f561c08de6b Initial load
duke
parents:
diff changeset
   393
    // State of current program
7f561c08de6b Initial load
duke
parents:
diff changeset
   394
    REProgram program;                            // Compiled regular expression 'program'
7f561c08de6b Initial load
duke
parents:
diff changeset
   395
    transient CharacterIterator search;           // The string being matched against
7f561c08de6b Initial load
duke
parents:
diff changeset
   396
    int matchFlags;                               // Match behaviour flags
7f561c08de6b Initial load
duke
parents:
diff changeset
   397
    int maxParen = MAX_PAREN;
7f561c08de6b Initial load
duke
parents:
diff changeset
   398
7f561c08de6b Initial load
duke
parents:
diff changeset
   399
    // Parenthesized subexpressions
7f561c08de6b Initial load
duke
parents:
diff changeset
   400
    transient int parenCount;                     // Number of subexpressions matched (num open parens + 1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   401
    transient int start0;                         // Cache of start[0]
7f561c08de6b Initial load
duke
parents:
diff changeset
   402
    transient int end0;                           // Cache of start[0]
7f561c08de6b Initial load
duke
parents:
diff changeset
   403
    transient int start1;                         // Cache of start[1]
7f561c08de6b Initial load
duke
parents:
diff changeset
   404
    transient int end1;                           // Cache of start[1]
7f561c08de6b Initial load
duke
parents:
diff changeset
   405
    transient int start2;                         // Cache of start[2]
7f561c08de6b Initial load
duke
parents:
diff changeset
   406
    transient int end2;                           // Cache of start[2]
7f561c08de6b Initial load
duke
parents:
diff changeset
   407
    transient int[] startn;                       // Lazy-alloced array of sub-expression starts
7f561c08de6b Initial load
duke
parents:
diff changeset
   408
    transient int[] endn;                         // Lazy-alloced array of sub-expression ends
7f561c08de6b Initial load
duke
parents:
diff changeset
   409
7f561c08de6b Initial load
duke
parents:
diff changeset
   410
    // Backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
   411
    transient int[] startBackref;                 // Lazy-alloced array of backref starts
7f561c08de6b Initial load
duke
parents:
diff changeset
   412
    transient int[] endBackref;                   // Lazy-alloced array of backref ends
7f561c08de6b Initial load
duke
parents:
diff changeset
   413
7f561c08de6b Initial load
duke
parents:
diff changeset
   414
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   415
     * Constructs a regular expression matcher from a String by compiling it
7f561c08de6b Initial load
duke
parents:
diff changeset
   416
     * using a new instance of RECompiler.  If you will be compiling many
7f561c08de6b Initial load
duke
parents:
diff changeset
   417
     * expressions, you may prefer to use a single RECompiler object instead.
7f561c08de6b Initial load
duke
parents:
diff changeset
   418
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   419
     * @param pattern The regular expression pattern to compile.
7f561c08de6b Initial load
duke
parents:
diff changeset
   420
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   421
     * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   422
     * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
   423
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   424
    public RE(String pattern) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   425
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   426
        this(pattern, MATCH_NORMAL);
7f561c08de6b Initial load
duke
parents:
diff changeset
   427
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   428
7f561c08de6b Initial load
duke
parents:
diff changeset
   429
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   430
     * Constructs a regular expression matcher from a String by compiling it
7f561c08de6b Initial load
duke
parents:
diff changeset
   431
     * using a new instance of RECompiler.  If you will be compiling many
7f561c08de6b Initial load
duke
parents:
diff changeset
   432
     * expressions, you may prefer to use a single RECompiler object instead.
7f561c08de6b Initial load
duke
parents:
diff changeset
   433
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   434
     * @param pattern The regular expression pattern to compile.
7f561c08de6b Initial load
duke
parents:
diff changeset
   435
     * @param matchFlags The matching style
7f561c08de6b Initial load
duke
parents:
diff changeset
   436
     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load
duke
parents:
diff changeset
   437
     * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   438
     * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
   439
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   440
    public RE(String pattern, int matchFlags) throws RESyntaxException
7f561c08de6b Initial load
duke
parents:
diff changeset
   441
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   442
        this(new RECompiler().compile(pattern));
7f561c08de6b Initial load
duke
parents:
diff changeset
   443
        setMatchFlags(matchFlags);
7f561c08de6b Initial load
duke
parents:
diff changeset
   444
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   445
7f561c08de6b Initial load
duke
parents:
diff changeset
   446
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   447
     * Construct a matcher for a pre-compiled regular expression from program
7f561c08de6b Initial load
duke
parents:
diff changeset
   448
     * (bytecode) data.  Permits special flags to be passed in to modify matching
7f561c08de6b Initial load
duke
parents:
diff changeset
   449
     * behaviour.
7f561c08de6b Initial load
duke
parents:
diff changeset
   450
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   451
     * @param program Compiled regular expression program (see RECompiler and/or recompile)
7f561c08de6b Initial load
duke
parents:
diff changeset
   452
     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
7f561c08de6b Initial load
duke
parents:
diff changeset
   453
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   454
     * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   455
     *   MATCH_NORMAL              // Normal (case-sensitive) matching
7f561c08de6b Initial load
duke
parents:
diff changeset
   456
     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
7f561c08de6b Initial load
duke
parents:
diff changeset
   457
     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
7f561c08de6b Initial load
duke
parents:
diff changeset
   458
     * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   459
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   460
     * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   461
     * @see REProgram
7f561c08de6b Initial load
duke
parents:
diff changeset
   462
     * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
   463
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   464
    public RE(REProgram program, int matchFlags)
7f561c08de6b Initial load
duke
parents:
diff changeset
   465
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   466
        setProgram(program);
7f561c08de6b Initial load
duke
parents:
diff changeset
   467
        setMatchFlags(matchFlags);
7f561c08de6b Initial load
duke
parents:
diff changeset
   468
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   469
7f561c08de6b Initial load
duke
parents:
diff changeset
   470
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   471
     * Construct a matcher for a pre-compiled regular expression from program
7f561c08de6b Initial load
duke
parents:
diff changeset
   472
     * (bytecode) data.
7f561c08de6b Initial load
duke
parents:
diff changeset
   473
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   474
     * @param program Compiled regular expression program
7f561c08de6b Initial load
duke
parents:
diff changeset
   475
     * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   476
     * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
   477
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   478
    public RE(REProgram program)
7f561c08de6b Initial load
duke
parents:
diff changeset
   479
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   480
        this(program, MATCH_NORMAL);
7f561c08de6b Initial load
duke
parents:
diff changeset
   481
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   482
7f561c08de6b Initial load
duke
parents:
diff changeset
   483
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   484
     * Constructs a regular expression matcher with no initial program.
7f561c08de6b Initial load
duke
parents:
diff changeset
   485
     * This is likely to be an uncommon practice, but is still supported.
7f561c08de6b Initial load
duke
parents:
diff changeset
   486
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   487
    public RE()
7f561c08de6b Initial load
duke
parents:
diff changeset
   488
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   489
        this((REProgram)null, MATCH_NORMAL);
7f561c08de6b Initial load
duke
parents:
diff changeset
   490
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   491
7f561c08de6b Initial load
duke
parents:
diff changeset
   492
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   493
     * Converts a 'simplified' regular expression to a full regular expression
7f561c08de6b Initial load
duke
parents:
diff changeset
   494
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   495
     * @param pattern The pattern to convert
7f561c08de6b Initial load
duke
parents:
diff changeset
   496
     * @return The full regular expression
7f561c08de6b Initial load
duke
parents:
diff changeset
   497
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   498
    public static String simplePatternToFullRegularExpression(String pattern)
7f561c08de6b Initial load
duke
parents:
diff changeset
   499
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   500
        StringBuffer buf = new StringBuffer();
7f561c08de6b Initial load
duke
parents:
diff changeset
   501
        for (int i = 0; i < pattern.length(); i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   502
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   503
            char c = pattern.charAt(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
   504
            switch (c)
7f561c08de6b Initial load
duke
parents:
diff changeset
   505
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   506
                case '*':
7f561c08de6b Initial load
duke
parents:
diff changeset
   507
                    buf.append(".*");
7f561c08de6b Initial load
duke
parents:
diff changeset
   508
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   509
7f561c08de6b Initial load
duke
parents:
diff changeset
   510
                case '.':
7f561c08de6b Initial load
duke
parents:
diff changeset
   511
                case '[':
7f561c08de6b Initial load
duke
parents:
diff changeset
   512
                case ']':
7f561c08de6b Initial load
duke
parents:
diff changeset
   513
                case '\\':
7f561c08de6b Initial load
duke
parents:
diff changeset
   514
                case '+':
7f561c08de6b Initial load
duke
parents:
diff changeset
   515
                case '?':
7f561c08de6b Initial load
duke
parents:
diff changeset
   516
                case '{':
7f561c08de6b Initial load
duke
parents:
diff changeset
   517
                case '}':
7f561c08de6b Initial load
duke
parents:
diff changeset
   518
                case '$':
7f561c08de6b Initial load
duke
parents:
diff changeset
   519
                case '^':
7f561c08de6b Initial load
duke
parents:
diff changeset
   520
                case '|':
7f561c08de6b Initial load
duke
parents:
diff changeset
   521
                case '(':
7f561c08de6b Initial load
duke
parents:
diff changeset
   522
                case ')':
7f561c08de6b Initial load
duke
parents:
diff changeset
   523
                    buf.append('\\');
7f561c08de6b Initial load
duke
parents:
diff changeset
   524
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   525
                    buf.append(c);
7f561c08de6b Initial load
duke
parents:
diff changeset
   526
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   527
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   528
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   529
        return buf.toString();
7f561c08de6b Initial load
duke
parents:
diff changeset
   530
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   531
7f561c08de6b Initial load
duke
parents:
diff changeset
   532
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   533
     * Sets match behaviour flags which alter the way RE does matching.
7f561c08de6b Initial load
duke
parents:
diff changeset
   534
     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
7f561c08de6b Initial load
duke
parents:
diff changeset
   535
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   536
     * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   537
     *   MATCH_NORMAL              // Normal (case-sensitive) matching
7f561c08de6b Initial load
duke
parents:
diff changeset
   538
     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
7f561c08de6b Initial load
duke
parents:
diff changeset
   539
     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
7f561c08de6b Initial load
duke
parents:
diff changeset
   540
     * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   541
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   542
    public void setMatchFlags(int matchFlags)
7f561c08de6b Initial load
duke
parents:
diff changeset
   543
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   544
        this.matchFlags = matchFlags;
7f561c08de6b Initial load
duke
parents:
diff changeset
   545
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   546
7f561c08de6b Initial load
duke
parents:
diff changeset
   547
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   548
     * Returns the current match behaviour flags.
7f561c08de6b Initial load
duke
parents:
diff changeset
   549
     * @return Current match behaviour flags (RE.MATCH_*).
7f561c08de6b Initial load
duke
parents:
diff changeset
   550
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   551
     * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   552
     *   MATCH_NORMAL              // Normal (case-sensitive) matching
7f561c08de6b Initial load
duke
parents:
diff changeset
   553
     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
7f561c08de6b Initial load
duke
parents:
diff changeset
   554
     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
7f561c08de6b Initial load
duke
parents:
diff changeset
   555
     * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   556
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   557
     * @see #setMatchFlags
7f561c08de6b Initial load
duke
parents:
diff changeset
   558
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   559
    public int getMatchFlags()
7f561c08de6b Initial load
duke
parents:
diff changeset
   560
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   561
        return matchFlags;
7f561c08de6b Initial load
duke
parents:
diff changeset
   562
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   563
7f561c08de6b Initial load
duke
parents:
diff changeset
   564
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   565
     * Sets the current regular expression program used by this matcher object.
7f561c08de6b Initial load
duke
parents:
diff changeset
   566
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   567
     * @param program Regular expression program compiled by RECompiler.
7f561c08de6b Initial load
duke
parents:
diff changeset
   568
     * @see RECompiler
7f561c08de6b Initial load
duke
parents:
diff changeset
   569
     * @see REProgram
7f561c08de6b Initial load
duke
parents:
diff changeset
   570
     * @see recompile
7f561c08de6b Initial load
duke
parents:
diff changeset
   571
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   572
    public void setProgram(REProgram program)
7f561c08de6b Initial load
duke
parents:
diff changeset
   573
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   574
        this.program = program;
7f561c08de6b Initial load
duke
parents:
diff changeset
   575
        if (program != null && program.maxParens != -1) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   576
            this.maxParen = program.maxParens;
7f561c08de6b Initial load
duke
parents:
diff changeset
   577
        } else {
7f561c08de6b Initial load
duke
parents:
diff changeset
   578
            this.maxParen = MAX_PAREN;
7f561c08de6b Initial load
duke
parents:
diff changeset
   579
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   580
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   581
7f561c08de6b Initial load
duke
parents:
diff changeset
   582
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   583
     * Returns the current regular expression program in use by this matcher object.
7f561c08de6b Initial load
duke
parents:
diff changeset
   584
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   585
     * @return Regular expression program
7f561c08de6b Initial load
duke
parents:
diff changeset
   586
     * @see #setProgram
7f561c08de6b Initial load
duke
parents:
diff changeset
   587
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   588
    public REProgram getProgram()
7f561c08de6b Initial load
duke
parents:
diff changeset
   589
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   590
        return program;
7f561c08de6b Initial load
duke
parents:
diff changeset
   591
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   592
7f561c08de6b Initial load
duke
parents:
diff changeset
   593
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   594
     * Returns the number of parenthesized subexpressions available after a successful match.
7f561c08de6b Initial load
duke
parents:
diff changeset
   595
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   596
     * @return Number of available parenthesized subexpressions
7f561c08de6b Initial load
duke
parents:
diff changeset
   597
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   598
    public int getParenCount()
7f561c08de6b Initial load
duke
parents:
diff changeset
   599
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   600
        return parenCount;
7f561c08de6b Initial load
duke
parents:
diff changeset
   601
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   602
7f561c08de6b Initial load
duke
parents:
diff changeset
   603
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   604
     * Gets the contents of a parenthesized subexpression after a successful match.
7f561c08de6b Initial load
duke
parents:
diff changeset
   605
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   606
     * @param which Nesting level of subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   607
     * @return String
7f561c08de6b Initial load
duke
parents:
diff changeset
   608
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   609
    public String getParen(int which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   610
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   611
        int start;
7f561c08de6b Initial load
duke
parents:
diff changeset
   612
        if (which < parenCount && (start = getParenStart(which)) >= 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   613
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   614
            return search.substring(start, getParenEnd(which));
7f561c08de6b Initial load
duke
parents:
diff changeset
   615
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   616
        return null;
7f561c08de6b Initial load
duke
parents:
diff changeset
   617
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   618
7f561c08de6b Initial load
duke
parents:
diff changeset
   619
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   620
     * Returns the start index of a given paren level.
7f561c08de6b Initial load
duke
parents:
diff changeset
   621
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   622
     * @param which Nesting level of subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   623
     * @return String index
7f561c08de6b Initial load
duke
parents:
diff changeset
   624
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   625
    public final int getParenStart(int which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   626
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   627
        if (which < parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   628
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   629
            switch (which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   630
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   631
                case 0:
7f561c08de6b Initial load
duke
parents:
diff changeset
   632
                    return start0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   633
7f561c08de6b Initial load
duke
parents:
diff changeset
   634
                case 1:
7f561c08de6b Initial load
duke
parents:
diff changeset
   635
                    return start1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   636
7f561c08de6b Initial load
duke
parents:
diff changeset
   637
                case 2:
7f561c08de6b Initial load
duke
parents:
diff changeset
   638
                    return start2;
7f561c08de6b Initial load
duke
parents:
diff changeset
   639
7f561c08de6b Initial load
duke
parents:
diff changeset
   640
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   641
                    if (startn == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
   642
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   643
                        allocParens();
7f561c08de6b Initial load
duke
parents:
diff changeset
   644
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   645
                    return startn[which];
7f561c08de6b Initial load
duke
parents:
diff changeset
   646
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   647
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   648
        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   649
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   650
7f561c08de6b Initial load
duke
parents:
diff changeset
   651
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   652
     * Returns the end index of a given paren level.
7f561c08de6b Initial load
duke
parents:
diff changeset
   653
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   654
     * @param which Nesting level of subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   655
     * @return String index
7f561c08de6b Initial load
duke
parents:
diff changeset
   656
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   657
    public final int getParenEnd(int which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   658
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   659
        if (which < parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   660
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   661
            switch (which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   662
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   663
                case 0:
7f561c08de6b Initial load
duke
parents:
diff changeset
   664
                    return end0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   665
7f561c08de6b Initial load
duke
parents:
diff changeset
   666
                case 1:
7f561c08de6b Initial load
duke
parents:
diff changeset
   667
                    return end1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   668
7f561c08de6b Initial load
duke
parents:
diff changeset
   669
                case 2:
7f561c08de6b Initial load
duke
parents:
diff changeset
   670
                    return end2;
7f561c08de6b Initial load
duke
parents:
diff changeset
   671
7f561c08de6b Initial load
duke
parents:
diff changeset
   672
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   673
                    if (endn == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
   674
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   675
                        allocParens();
7f561c08de6b Initial load
duke
parents:
diff changeset
   676
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   677
                    return endn[which];
7f561c08de6b Initial load
duke
parents:
diff changeset
   678
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   679
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   680
        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   681
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   682
7f561c08de6b Initial load
duke
parents:
diff changeset
   683
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   684
     * Returns the length of a given paren level.
7f561c08de6b Initial load
duke
parents:
diff changeset
   685
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   686
     * @param which Nesting level of subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   687
     * @return Number of characters in the parenthesized subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   688
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   689
    public final int getParenLength(int which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   690
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   691
        if (which < parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   692
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   693
            return getParenEnd(which) - getParenStart(which);
7f561c08de6b Initial load
duke
parents:
diff changeset
   694
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   695
        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   696
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   697
7f561c08de6b Initial load
duke
parents:
diff changeset
   698
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   699
     * Sets the start of a paren level
7f561c08de6b Initial load
duke
parents:
diff changeset
   700
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   701
     * @param which Which paren level
7f561c08de6b Initial load
duke
parents:
diff changeset
   702
     * @param i Index in input array
7f561c08de6b Initial load
duke
parents:
diff changeset
   703
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   704
    protected final void setParenStart(int which, int i)
7f561c08de6b Initial load
duke
parents:
diff changeset
   705
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   706
        if (which < parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   707
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   708
            switch (which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   709
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   710
                case 0:
7f561c08de6b Initial load
duke
parents:
diff changeset
   711
                    start0 = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   712
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   713
7f561c08de6b Initial load
duke
parents:
diff changeset
   714
                case 1:
7f561c08de6b Initial load
duke
parents:
diff changeset
   715
                    start1 = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   716
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   717
7f561c08de6b Initial load
duke
parents:
diff changeset
   718
                case 2:
7f561c08de6b Initial load
duke
parents:
diff changeset
   719
                    start2 = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   720
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   721
7f561c08de6b Initial load
duke
parents:
diff changeset
   722
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   723
                    if (startn == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
   724
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   725
                        allocParens();
7f561c08de6b Initial load
duke
parents:
diff changeset
   726
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   727
                    startn[which] = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   728
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   729
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   730
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   731
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   732
7f561c08de6b Initial load
duke
parents:
diff changeset
   733
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   734
     * Sets the end of a paren level
7f561c08de6b Initial load
duke
parents:
diff changeset
   735
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   736
     * @param which Which paren level
7f561c08de6b Initial load
duke
parents:
diff changeset
   737
     * @param i Index in input array
7f561c08de6b Initial load
duke
parents:
diff changeset
   738
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   739
    protected final void setParenEnd(int which, int i)
7f561c08de6b Initial load
duke
parents:
diff changeset
   740
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   741
        if (which < parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   742
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   743
            switch (which)
7f561c08de6b Initial load
duke
parents:
diff changeset
   744
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   745
                case 0:
7f561c08de6b Initial load
duke
parents:
diff changeset
   746
                    end0 = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   747
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   748
7f561c08de6b Initial load
duke
parents:
diff changeset
   749
                case 1:
7f561c08de6b Initial load
duke
parents:
diff changeset
   750
                    end1 = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   751
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   752
7f561c08de6b Initial load
duke
parents:
diff changeset
   753
                case 2:
7f561c08de6b Initial load
duke
parents:
diff changeset
   754
                    end2 = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   755
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   756
7f561c08de6b Initial load
duke
parents:
diff changeset
   757
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
   758
                    if (endn == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
   759
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   760
                        allocParens();
7f561c08de6b Initial load
duke
parents:
diff changeset
   761
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   762
                    endn[which] = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
   763
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   764
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   765
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   766
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   767
7f561c08de6b Initial load
duke
parents:
diff changeset
   768
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   769
     * Throws an Error representing an internal error condition probably resulting
7f561c08de6b Initial load
duke
parents:
diff changeset
   770
     * from a bug in the regular expression compiler (or possibly data corruption).
7f561c08de6b Initial load
duke
parents:
diff changeset
   771
     * In practice, this should be very rare.
7f561c08de6b Initial load
duke
parents:
diff changeset
   772
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   773
     * @param s Error description
7f561c08de6b Initial load
duke
parents:
diff changeset
   774
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   775
    protected void internalError(String s) throws Error
7f561c08de6b Initial load
duke
parents:
diff changeset
   776
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   777
        throw new Error("RE internal error: " + s);
7f561c08de6b Initial load
duke
parents:
diff changeset
   778
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   779
7f561c08de6b Initial load
duke
parents:
diff changeset
   780
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   781
     * Performs lazy allocation of subexpression arrays
7f561c08de6b Initial load
duke
parents:
diff changeset
   782
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   783
    private final void allocParens()
7f561c08de6b Initial load
duke
parents:
diff changeset
   784
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   785
        // Allocate arrays for subexpressions
7f561c08de6b Initial load
duke
parents:
diff changeset
   786
        startn = new int[maxParen];
7f561c08de6b Initial load
duke
parents:
diff changeset
   787
        endn = new int[maxParen];
7f561c08de6b Initial load
duke
parents:
diff changeset
   788
7f561c08de6b Initial load
duke
parents:
diff changeset
   789
        // Set sub-expression pointers to invalid values
7f561c08de6b Initial load
duke
parents:
diff changeset
   790
        for (int i = 0; i < maxParen; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   791
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   792
            startn[i] = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   793
            endn[i] = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   794
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   795
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   796
7f561c08de6b Initial load
duke
parents:
diff changeset
   797
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   798
     * Try to match a string against a subset of nodes in the program
7f561c08de6b Initial load
duke
parents:
diff changeset
   799
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   800
     * @param firstNode Node to start at in program
7f561c08de6b Initial load
duke
parents:
diff changeset
   801
     * @param lastNode  Last valid node (used for matching a subexpression without
7f561c08de6b Initial load
duke
parents:
diff changeset
   802
     *                  matching the rest of the program as well).
7f561c08de6b Initial load
duke
parents:
diff changeset
   803
     * @param idxStart  Starting position in character array
7f561c08de6b Initial load
duke
parents:
diff changeset
   804
     * @return Final input array index if match succeeded.  -1 if not.
7f561c08de6b Initial load
duke
parents:
diff changeset
   805
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   806
    protected int matchNodes(int firstNode, int lastNode, int idxStart)
7f561c08de6b Initial load
duke
parents:
diff changeset
   807
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   808
        // Our current place in the string
7f561c08de6b Initial load
duke
parents:
diff changeset
   809
        int idx = idxStart;
7f561c08de6b Initial load
duke
parents:
diff changeset
   810
7f561c08de6b Initial load
duke
parents:
diff changeset
   811
        // Loop while node is valid
7f561c08de6b Initial load
duke
parents:
diff changeset
   812
        int next, opcode, opdata;
7f561c08de6b Initial load
duke
parents:
diff changeset
   813
        int idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
   814
        char[] instruction = program.instruction;
7f561c08de6b Initial load
duke
parents:
diff changeset
   815
        for (int node = firstNode; node < lastNode; )
7f561c08de6b Initial load
duke
parents:
diff changeset
   816
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   817
            opcode = instruction[node + offsetOpcode];
7f561c08de6b Initial load
duke
parents:
diff changeset
   818
            next   = node + (short)instruction[node + offsetNext];
7f561c08de6b Initial load
duke
parents:
diff changeset
   819
            opdata = instruction[node + offsetOpdata];
7f561c08de6b Initial load
duke
parents:
diff changeset
   820
7f561c08de6b Initial load
duke
parents:
diff changeset
   821
            switch (opcode)
7f561c08de6b Initial load
duke
parents:
diff changeset
   822
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   823
                case OP_RELUCTANTMAYBE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   824
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   825
                        int once = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   826
                        do
7f561c08de6b Initial load
duke
parents:
diff changeset
   827
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   828
                            // Try to match the rest without using the reluctant subexpr
7f561c08de6b Initial load
duke
parents:
diff changeset
   829
                            if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   830
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   831
                                return idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
   832
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   833
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   834
                        while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1);
7f561c08de6b Initial load
duke
parents:
diff changeset
   835
                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   836
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   837
7f561c08de6b Initial load
duke
parents:
diff changeset
   838
                case OP_RELUCTANTPLUS:
7f561c08de6b Initial load
duke
parents:
diff changeset
   839
                    while ((idx = matchNodes(node + nodeSize, next, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   840
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   841
                        // Try to match the rest without using the reluctant subexpr
7f561c08de6b Initial load
duke
parents:
diff changeset
   842
                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   843
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   844
                            return idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
   845
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   846
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   847
                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   848
7f561c08de6b Initial load
duke
parents:
diff changeset
   849
                case OP_RELUCTANTSTAR:
7f561c08de6b Initial load
duke
parents:
diff changeset
   850
                    do
7f561c08de6b Initial load
duke
parents:
diff changeset
   851
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   852
                        // Try to match the rest without using the reluctant subexpr
7f561c08de6b Initial load
duke
parents:
diff changeset
   853
                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   854
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   855
                            return idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
   856
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   857
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   858
                    while ((idx = matchNodes(node + nodeSize, next, idx)) != -1);
7f561c08de6b Initial load
duke
parents:
diff changeset
   859
                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   860
7f561c08de6b Initial load
duke
parents:
diff changeset
   861
                case OP_OPEN:
7f561c08de6b Initial load
duke
parents:
diff changeset
   862
7f561c08de6b Initial load
duke
parents:
diff changeset
   863
                    // Match subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   864
                    if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   865
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   866
                        startBackref[opdata] = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   867
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   868
                    if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   869
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   870
                        // Increase valid paren count
7f561c08de6b Initial load
duke
parents:
diff changeset
   871
                        if ((opdata + 1) > parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   872
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   873
                            parenCount = opdata + 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   874
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   875
7f561c08de6b Initial load
duke
parents:
diff changeset
   876
                        // Don't set paren if already set later on
7f561c08de6b Initial load
duke
parents:
diff changeset
   877
                        if (getParenStart(opdata) == -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   878
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   879
                            setParenStart(opdata, idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
   880
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   881
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   882
                    return idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
   883
7f561c08de6b Initial load
duke
parents:
diff changeset
   884
                case OP_CLOSE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   885
7f561c08de6b Initial load
duke
parents:
diff changeset
   886
                    // Done matching subexpression
7f561c08de6b Initial load
duke
parents:
diff changeset
   887
                    if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   888
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   889
                        endBackref[opdata] = idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
   890
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   891
                    if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   892
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   893
                        // Increase valid paren count
7f561c08de6b Initial load
duke
parents:
diff changeset
   894
                        if ((opdata + 1) > parenCount)
7f561c08de6b Initial load
duke
parents:
diff changeset
   895
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   896
                            parenCount = opdata + 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   897
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   898
7f561c08de6b Initial load
duke
parents:
diff changeset
   899
                        // Don't set paren if already set later on
7f561c08de6b Initial load
duke
parents:
diff changeset
   900
                        if (getParenEnd(opdata) == -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   901
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   902
                            setParenEnd(opdata, idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
   903
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   904
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   905
                    return idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
   906
7f561c08de6b Initial load
duke
parents:
diff changeset
   907
                case OP_OPEN_CLUSTER:
7f561c08de6b Initial load
duke
parents:
diff changeset
   908
                case OP_CLOSE_CLUSTER:
7f561c08de6b Initial load
duke
parents:
diff changeset
   909
                    // starting or ending the matching of a subexpression which has no backref.
7f561c08de6b Initial load
duke
parents:
diff changeset
   910
                    return matchNodes( next, maxNode, idx );
7f561c08de6b Initial load
duke
parents:
diff changeset
   911
7f561c08de6b Initial load
duke
parents:
diff changeset
   912
                case OP_BACKREF:
7f561c08de6b Initial load
duke
parents:
diff changeset
   913
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   914
                        // Get the start and end of the backref
7f561c08de6b Initial load
duke
parents:
diff changeset
   915
                        int s = startBackref[opdata];
7f561c08de6b Initial load
duke
parents:
diff changeset
   916
                        int e = endBackref[opdata];
7f561c08de6b Initial load
duke
parents:
diff changeset
   917
7f561c08de6b Initial load
duke
parents:
diff changeset
   918
                        // We don't know the backref yet
7f561c08de6b Initial load
duke
parents:
diff changeset
   919
                        if (s == -1 || e == -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   920
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   921
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   922
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   923
7f561c08de6b Initial load
duke
parents:
diff changeset
   924
                        // The backref is empty size
7f561c08de6b Initial load
duke
parents:
diff changeset
   925
                        if (s == e)
7f561c08de6b Initial load
duke
parents:
diff changeset
   926
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   927
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   928
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   929
7f561c08de6b Initial load
duke
parents:
diff changeset
   930
                        // Get the length of the backref
7f561c08de6b Initial load
duke
parents:
diff changeset
   931
                        int l = e - s;
7f561c08de6b Initial load
duke
parents:
diff changeset
   932
7f561c08de6b Initial load
duke
parents:
diff changeset
   933
                        // If there's not enough input left, give up.
7f561c08de6b Initial load
duke
parents:
diff changeset
   934
                        if (search.isEnd(idx + l - 1))
7f561c08de6b Initial load
duke
parents:
diff changeset
   935
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   936
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   937
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   938
7f561c08de6b Initial load
duke
parents:
diff changeset
   939
                        // Case fold the backref?
7f561c08de6b Initial load
duke
parents:
diff changeset
   940
                        final boolean caseFold =
7f561c08de6b Initial load
duke
parents:
diff changeset
   941
                            ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
   942
                        // Compare backref to input
7f561c08de6b Initial load
duke
parents:
diff changeset
   943
                        for (int i = 0; i < l; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   944
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   945
                            if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   946
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   947
                                return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   948
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   949
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   950
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   951
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   952
7f561c08de6b Initial load
duke
parents:
diff changeset
   953
                case OP_BOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
   954
7f561c08de6b Initial load
duke
parents:
diff changeset
   955
                    // Fail if we're not at the start of the string
7f561c08de6b Initial load
duke
parents:
diff changeset
   956
                    if (idx != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   957
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   958
                        // If we're multiline matching, we could still be at the start of a line
7f561c08de6b Initial load
duke
parents:
diff changeset
   959
                        if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
7f561c08de6b Initial load
duke
parents:
diff changeset
   960
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   961
                            // If not at start of line, give up
7f561c08de6b Initial load
duke
parents:
diff changeset
   962
                            if (idx <= 0 || !isNewline(idx - 1)) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   963
                                return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   964
                            } else {
7f561c08de6b Initial load
duke
parents:
diff changeset
   965
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   966
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   967
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   968
                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   969
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   970
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   971
7f561c08de6b Initial load
duke
parents:
diff changeset
   972
                case OP_EOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
   973
7f561c08de6b Initial load
duke
parents:
diff changeset
   974
                    // If we're not at the end of string
7f561c08de6b Initial load
duke
parents:
diff changeset
   975
                    if (!search.isEnd(0) && !search.isEnd(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
   976
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   977
                        // If we're multi-line matching
7f561c08de6b Initial load
duke
parents:
diff changeset
   978
                        if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
7f561c08de6b Initial load
duke
parents:
diff changeset
   979
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   980
                            // Give up if we're not at the end of a line
7f561c08de6b Initial load
duke
parents:
diff changeset
   981
                            if (!isNewline(idx)) {
7f561c08de6b Initial load
duke
parents:
diff changeset
   982
                                return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   983
                            } else {
7f561c08de6b Initial load
duke
parents:
diff changeset
   984
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   985
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   986
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   987
                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
   988
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   989
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
   990
7f561c08de6b Initial load
duke
parents:
diff changeset
   991
                case OP_ESCAPE:
7f561c08de6b Initial load
duke
parents:
diff changeset
   992
7f561c08de6b Initial load
duke
parents:
diff changeset
   993
                    // Which escape?
7f561c08de6b Initial load
duke
parents:
diff changeset
   994
                    switch (opdata)
7f561c08de6b Initial load
duke
parents:
diff changeset
   995
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   996
                        // Word boundary match
7f561c08de6b Initial load
duke
parents:
diff changeset
   997
                        case E_NBOUND:
7f561c08de6b Initial load
duke
parents:
diff changeset
   998
                        case E_BOUND:
7f561c08de6b Initial load
duke
parents:
diff changeset
   999
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1000
                                char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1001
                                char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1002
                                if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1003
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1004
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1005
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1006
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1007
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1008
7f561c08de6b Initial load
duke
parents:
diff changeset
  1009
                        // Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit
7f561c08de6b Initial load
duke
parents:
diff changeset
  1010
                        case E_ALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1011
                        case E_NALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1012
                        case E_DIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1013
                        case E_NDIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1014
                        case E_SPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1015
                        case E_NSPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1016
7f561c08de6b Initial load
duke
parents:
diff changeset
  1017
                            // Give up if out of input
7f561c08de6b Initial load
duke
parents:
diff changeset
  1018
                            if (search.isEnd(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1019
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1020
                                return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1021
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1022
7f561c08de6b Initial load
duke
parents:
diff changeset
  1023
                            char c = search.charAt(idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1024
7f561c08de6b Initial load
duke
parents:
diff changeset
  1025
                            // Switch on escape
7f561c08de6b Initial load
duke
parents:
diff changeset
  1026
                            switch (opdata)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1027
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1028
                                case E_ALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1029
                                case E_NALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1030
                                    if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1031
                                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1032
                                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1033
                                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1034
                                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1035
7f561c08de6b Initial load
duke
parents:
diff changeset
  1036
                                case E_DIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1037
                                case E_NDIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1038
                                    if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1039
                                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1040
                                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1041
                                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1042
                                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1043
7f561c08de6b Initial load
duke
parents:
diff changeset
  1044
                                case E_SPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1045
                                case E_NSPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1046
                                    if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1047
                                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1048
                                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1049
                                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1050
                                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1051
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1052
                            idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1053
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1054
7f561c08de6b Initial load
duke
parents:
diff changeset
  1055
                        default:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1056
                            internalError("Unrecognized escape '" + opdata + "'");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1057
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1058
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1059
7f561c08de6b Initial load
duke
parents:
diff changeset
  1060
                case OP_ANY:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1061
7f561c08de6b Initial load
duke
parents:
diff changeset
  1062
                    if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1063
                        // Match anything
7f561c08de6b Initial load
duke
parents:
diff changeset
  1064
                        if (search.isEnd(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1065
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1066
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1067
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1068
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1069
                    else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1070
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1071
                        // Match anything but a newline
7f561c08de6b Initial load
duke
parents:
diff changeset
  1072
                        if (search.isEnd(idx) || isNewline(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1073
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1074
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1075
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1076
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1077
                    idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1078
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1079
7f561c08de6b Initial load
duke
parents:
diff changeset
  1080
                case OP_ATOM:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1081
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1082
                        // Match an atom value
7f561c08de6b Initial load
duke
parents:
diff changeset
  1083
                        if (search.isEnd(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1084
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1085
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1086
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1087
7f561c08de6b Initial load
duke
parents:
diff changeset
  1088
                        // Get length of atom and starting index
7f561c08de6b Initial load
duke
parents:
diff changeset
  1089
                        int lenAtom = opdata;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1090
                        int startAtom = node + nodeSize;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1091
7f561c08de6b Initial load
duke
parents:
diff changeset
  1092
                        // Give up if not enough input remains to have a match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1093
                        if (search.isEnd(lenAtom + idx - 1))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1094
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1095
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1096
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1097
7f561c08de6b Initial load
duke
parents:
diff changeset
  1098
                        // Match atom differently depending on casefolding flag
7f561c08de6b Initial load
duke
parents:
diff changeset
  1099
                        final boolean caseFold =
7f561c08de6b Initial load
duke
parents:
diff changeset
  1100
                            ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1101
7f561c08de6b Initial load
duke
parents:
diff changeset
  1102
                        for (int i = 0; i < lenAtom; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1103
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1104
                            if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1105
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1106
                                return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1107
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1108
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1109
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1110
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1111
7f561c08de6b Initial load
duke
parents:
diff changeset
  1112
                case OP_POSIXCLASS:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1113
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1114
                        // Out of input?
7f561c08de6b Initial load
duke
parents:
diff changeset
  1115
                        if (search.isEnd(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1116
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1117
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1118
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1119
7f561c08de6b Initial load
duke
parents:
diff changeset
  1120
                        switch (opdata)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1121
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1122
                            case POSIX_CLASS_ALNUM:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1123
                                if (!Character.isLetterOrDigit(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1124
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1125
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1126
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1127
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1128
7f561c08de6b Initial load
duke
parents:
diff changeset
  1129
                            case POSIX_CLASS_ALPHA:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1130
                                if (!Character.isLetter(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1131
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1132
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1133
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1134
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1135
7f561c08de6b Initial load
duke
parents:
diff changeset
  1136
                            case POSIX_CLASS_DIGIT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1137
                                if (!Character.isDigit(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1138
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1139
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1140
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1141
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1142
7f561c08de6b Initial load
duke
parents:
diff changeset
  1143
                            case POSIX_CLASS_BLANK: // JWL - bugbug: is this right??
7f561c08de6b Initial load
duke
parents:
diff changeset
  1144
                                if (!Character.isSpaceChar(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1145
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1146
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1147
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1148
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1149
7f561c08de6b Initial load
duke
parents:
diff changeset
  1150
                            case POSIX_CLASS_SPACE:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1151
                                if (!Character.isWhitespace(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1152
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1153
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1154
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1155
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1156
7f561c08de6b Initial load
duke
parents:
diff changeset
  1157
                            case POSIX_CLASS_CNTRL:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1158
                                if (Character.getType(search.charAt(idx)) != Character.CONTROL)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1159
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1160
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1161
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1162
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1163
7f561c08de6b Initial load
duke
parents:
diff changeset
  1164
                            case POSIX_CLASS_GRAPH: // JWL - bugbug???
7f561c08de6b Initial load
duke
parents:
diff changeset
  1165
                                switch (Character.getType(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1166
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1167
                                    case Character.MATH_SYMBOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1168
                                    case Character.CURRENCY_SYMBOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1169
                                    case Character.MODIFIER_SYMBOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1170
                                    case Character.OTHER_SYMBOL:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1171
                                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1172
7f561c08de6b Initial load
duke
parents:
diff changeset
  1173
                                    default:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1174
                                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1175
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1176
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1177
7f561c08de6b Initial load
duke
parents:
diff changeset
  1178
                            case POSIX_CLASS_LOWER:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1179
                                if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1180
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1181
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1182
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1183
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1184
7f561c08de6b Initial load
duke
parents:
diff changeset
  1185
                            case POSIX_CLASS_UPPER:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1186
                                if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1187
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1188
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1189
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1190
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1191
7f561c08de6b Initial load
duke
parents:
diff changeset
  1192
                            case POSIX_CLASS_PRINT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1193
                                if (Character.getType(search.charAt(idx)) == Character.CONTROL)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1194
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1195
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1196
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1197
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1198
7f561c08de6b Initial load
duke
parents:
diff changeset
  1199
                            case POSIX_CLASS_PUNCT:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1200
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1201
                                int type = Character.getType(search.charAt(idx));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1202
                                switch(type)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1203
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1204
                                    case Character.DASH_PUNCTUATION:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1205
                                    case Character.START_PUNCTUATION:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1206
                                    case Character.END_PUNCTUATION:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1207
                                    case Character.CONNECTOR_PUNCTUATION:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1208
                                    case Character.OTHER_PUNCTUATION:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1209
                                        break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1210
7f561c08de6b Initial load
duke
parents:
diff changeset
  1211
                                    default:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1212
                                        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1213
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1214
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1215
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1216
7f561c08de6b Initial load
duke
parents:
diff changeset
  1217
                            case POSIX_CLASS_XDIGIT: // JWL - bugbug??
7f561c08de6b Initial load
duke
parents:
diff changeset
  1218
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1219
                                boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') ||
7f561c08de6b Initial load
duke
parents:
diff changeset
  1220
                                                    (search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') ||
7f561c08de6b Initial load
duke
parents:
diff changeset
  1221
                                                    (search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F'));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1222
                                if (!isXDigit)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1223
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1224
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1225
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1226
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1227
                            break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1228
7f561c08de6b Initial load
duke
parents:
diff changeset
  1229
                            case POSIX_CLASS_JSTART:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1230
                                if (!Character.isJavaIdentifierStart(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1231
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1232
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1233
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1234
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1235
7f561c08de6b Initial load
duke
parents:
diff changeset
  1236
                            case POSIX_CLASS_JPART:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1237
                                if (!Character.isJavaIdentifierPart(search.charAt(idx)))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1238
                                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1239
                                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1240
                                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1241
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1242
7f561c08de6b Initial load
duke
parents:
diff changeset
  1243
                            default:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1244
                                internalError("Bad posix class");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1245
                                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1246
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1247
7f561c08de6b Initial load
duke
parents:
diff changeset
  1248
                        // Matched.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1249
                        idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1250
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1251
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1252
7f561c08de6b Initial load
duke
parents:
diff changeset
  1253
                case OP_ANYOF:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1254
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1255
                        // Out of input?
7f561c08de6b Initial load
duke
parents:
diff changeset
  1256
                        if (search.isEnd(idx))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1257
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1258
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1259
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1260
7f561c08de6b Initial load
duke
parents:
diff changeset
  1261
                        // Get character to match against character class and maybe casefold
7f561c08de6b Initial load
duke
parents:
diff changeset
  1262
                        char c = search.charAt(idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1263
                        boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1264
                        // Loop through character class checking our match character
7f561c08de6b Initial load
duke
parents:
diff changeset
  1265
                        int idxRange = node + nodeSize;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1266
                        int idxEnd = idxRange + (opdata * 2);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1267
                        boolean match = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1268
                        for (int i = idxRange; !match && i < idxEnd; )
7f561c08de6b Initial load
duke
parents:
diff changeset
  1269
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1270
                            // Get start, end and match characters
7f561c08de6b Initial load
duke
parents:
diff changeset
  1271
                            char s = instruction[i++];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1272
                            char e = instruction[i++];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1273
7f561c08de6b Initial load
duke
parents:
diff changeset
  1274
                            match = ((compareChars(c, s, caseFold) >= 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1275
                                     && (compareChars(c, e, caseFold) <= 0));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1276
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1277
7f561c08de6b Initial load
duke
parents:
diff changeset
  1278
                        // Fail if we didn't match the character class
7f561c08de6b Initial load
duke
parents:
diff changeset
  1279
                        if (!match)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1280
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1281
                            return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1282
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1283
                        idx++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1284
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1285
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1286
7f561c08de6b Initial load
duke
parents:
diff changeset
  1287
                case OP_BRANCH:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1288
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1289
                    // Check for choices
7f561c08de6b Initial load
duke
parents:
diff changeset
  1290
                    if (instruction[next + offsetOpcode] != OP_BRANCH)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1291
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1292
                        // If there aren't any other choices, just evaluate this branch.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1293
                        node += nodeSize;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1294
                        continue;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1295
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1296
7f561c08de6b Initial load
duke
parents:
diff changeset
  1297
                    // Try all available branches
7f561c08de6b Initial load
duke
parents:
diff changeset
  1298
                    short nextBranch;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1299
                    do
7f561c08de6b Initial load
duke
parents:
diff changeset
  1300
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1301
                        // Try matching the branch against the string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1302
                        if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1303
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1304
                            return idxNew;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1305
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1306
7f561c08de6b Initial load
duke
parents:
diff changeset
  1307
                        // Go to next branch (if any)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1308
                        nextBranch = (short)instruction[node + offsetNext];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1309
                        node += nextBranch;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1310
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1311
                    while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1312
7f561c08de6b Initial load
duke
parents:
diff changeset
  1313
                    // Failed to match any branch!
7f561c08de6b Initial load
duke
parents:
diff changeset
  1314
                    return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1315
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1316
7f561c08de6b Initial load
duke
parents:
diff changeset
  1317
                case OP_NOTHING:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1318
                case OP_GOTO:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1319
7f561c08de6b Initial load
duke
parents:
diff changeset
  1320
                    // Just advance to the next node without doing anything
7f561c08de6b Initial load
duke
parents:
diff changeset
  1321
                    break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1322
7f561c08de6b Initial load
duke
parents:
diff changeset
  1323
                case OP_END:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1324
7f561c08de6b Initial load
duke
parents:
diff changeset
  1325
                    // Match has succeeded!
7f561c08de6b Initial load
duke
parents:
diff changeset
  1326
                    setParenEnd(0, idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1327
                    return idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1328
7f561c08de6b Initial load
duke
parents:
diff changeset
  1329
                default:
7f561c08de6b Initial load
duke
parents:
diff changeset
  1330
7f561c08de6b Initial load
duke
parents:
diff changeset
  1331
                    // Corrupt program
7f561c08de6b Initial load
duke
parents:
diff changeset
  1332
                    internalError("Invalid opcode '" + opcode + "'");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1333
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1334
7f561c08de6b Initial load
duke
parents:
diff changeset
  1335
            // Advance to the next node in the program
7f561c08de6b Initial load
duke
parents:
diff changeset
  1336
            node = next;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1337
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1338
7f561c08de6b Initial load
duke
parents:
diff changeset
  1339
        // We "should" never end up here
7f561c08de6b Initial load
duke
parents:
diff changeset
  1340
        internalError("Corrupt program");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1341
        return -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1342
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1343
7f561c08de6b Initial load
duke
parents:
diff changeset
  1344
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1345
     * Match the current regular expression program against the current
7f561c08de6b Initial load
duke
parents:
diff changeset
  1346
     * input string, starting at index i of the input string.  This method
7f561c08de6b Initial load
duke
parents:
diff changeset
  1347
     * is only meant for internal use.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1348
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1349
     * @param i The input string index to start matching at
7f561c08de6b Initial load
duke
parents:
diff changeset
  1350
     * @return True if the input matched the expression
7f561c08de6b Initial load
duke
parents:
diff changeset
  1351
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1352
    protected boolean matchAt(int i)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1353
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1354
        // Initialize start pointer, paren cache and paren count
7f561c08de6b Initial load
duke
parents:
diff changeset
  1355
        start0 = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1356
        end0   = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1357
        start1 = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1358
        end1   = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1359
        start2 = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1360
        end2   = -1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1361
        startn = null;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1362
        endn   = null;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1363
        parenCount = 1;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1364
        setParenStart(0, i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1365
7f561c08de6b Initial load
duke
parents:
diff changeset
  1366
        // Allocate backref arrays (unless optimizations indicate otherwise)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1367
        if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1368
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1369
            startBackref = new int[maxParen];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1370
            endBackref = new int[maxParen];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1371
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1372
7f561c08de6b Initial load
duke
parents:
diff changeset
  1373
        // Match against string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1374
        int idx;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1375
        if ((idx = matchNodes(0, maxNode, i)) != -1)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1376
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1377
            setParenEnd(0, idx);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1378
            return true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1379
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1380
7f561c08de6b Initial load
duke
parents:
diff changeset
  1381
        // Didn't match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1382
        parenCount = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1383
        return false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1384
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1385
7f561c08de6b Initial load
duke
parents:
diff changeset
  1386
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1387
     * Matches the current regular expression program against a character array,
7f561c08de6b Initial load
duke
parents:
diff changeset
  1388
     * starting at a given index.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1389
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1390
     * @param search String to match against
7f561c08de6b Initial load
duke
parents:
diff changeset
  1391
     * @param i Index to start searching at
7f561c08de6b Initial load
duke
parents:
diff changeset
  1392
     * @return True if string matched
7f561c08de6b Initial load
duke
parents:
diff changeset
  1393
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1394
    public boolean match(String search, int i)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1395
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1396
        return match(new StringCharacterIterator(search), i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1397
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1398
7f561c08de6b Initial load
duke
parents:
diff changeset
  1399
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1400
     * Matches the current regular expression program against a character array,
7f561c08de6b Initial load
duke
parents:
diff changeset
  1401
     * starting at a given index.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1402
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1403
     * @param search String to match against
7f561c08de6b Initial load
duke
parents:
diff changeset
  1404
     * @param i Index to start searching at
7f561c08de6b Initial load
duke
parents:
diff changeset
  1405
     * @return True if string matched
7f561c08de6b Initial load
duke
parents:
diff changeset
  1406
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1407
    public boolean match(CharacterIterator search, int i)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1408
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1409
        // There is no compiled program to search with!
7f561c08de6b Initial load
duke
parents:
diff changeset
  1410
        if (program == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1411
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1412
            // This should be uncommon enough to be an error case rather
7f561c08de6b Initial load
duke
parents:
diff changeset
  1413
            // than an exception (which would have to be handled everywhere)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1414
            internalError("No RE program to run!");
7f561c08de6b Initial load
duke
parents:
diff changeset
  1415
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1416
7f561c08de6b Initial load
duke
parents:
diff changeset
  1417
        // Save string to search
7f561c08de6b Initial load
duke
parents:
diff changeset
  1418
        this.search = search;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1419
7f561c08de6b Initial load
duke
parents:
diff changeset
  1420
        // Can we optimize the search by looking for a prefix string?
7f561c08de6b Initial load
duke
parents:
diff changeset
  1421
        if (program.prefix == null)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1422
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1423
            // Unprefixed matching must try for a match at each character
7f561c08de6b Initial load
duke
parents:
diff changeset
  1424
            for ( ;! search.isEnd(i - 1); i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1425
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1426
                // Try a match at index i
7f561c08de6b Initial load
duke
parents:
diff changeset
  1427
                if (matchAt(i))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1428
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1429
                    return true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1430
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1431
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1432
            return false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1433
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1434
        else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1435
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1436
            // Prefix-anchored matching is possible
7f561c08de6b Initial load
duke
parents:
diff changeset
  1437
            boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1438
            char[] prefix = program.prefix;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1439
            for ( ; !search.isEnd(i + prefix.length - 1); i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1440
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1441
                int j = i;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1442
                int k = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1443
7f561c08de6b Initial load
duke
parents:
diff changeset
  1444
                boolean match;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1445
                do {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1446
                    // If there's a mismatch of any character in the prefix, give up
7f561c08de6b Initial load
duke
parents:
diff changeset
  1447
                    match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1448
                } while (match && k < prefix.length);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1449
7f561c08de6b Initial load
duke
parents:
diff changeset
  1450
                // See if the whole prefix string matched
7f561c08de6b Initial load
duke
parents:
diff changeset
  1451
                if (k == prefix.length)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1452
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1453
                    // We matched the full prefix at firstChar, so try it
7f561c08de6b Initial load
duke
parents:
diff changeset
  1454
                    if (matchAt(i))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1455
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1456
                        return true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1457
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1458
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1459
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1460
            return false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1461
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1462
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1463
7f561c08de6b Initial load
duke
parents:
diff changeset
  1464
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1465
     * Matches the current regular expression program against a String.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1466
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1467
     * @param search String to match against
7f561c08de6b Initial load
duke
parents:
diff changeset
  1468
     * @return True if string matched
7f561c08de6b Initial load
duke
parents:
diff changeset
  1469
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1470
    public boolean match(String search)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1471
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1472
        return match(search, 0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1473
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1474
7f561c08de6b Initial load
duke
parents:
diff changeset
  1475
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1476
     * Splits a string into an array of strings on regular expression boundaries.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1477
     * This function works the same way as the Perl function of the same name.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1478
     * Given a regular expression of "[ab]+" and a string to split of
7f561c08de6b Initial load
duke
parents:
diff changeset
  1479
     * "xyzzyababbayyzabbbab123", the result would be the array of Strings
7f561c08de6b Initial load
duke
parents:
diff changeset
  1480
     * "[xyzzy, yyz, 123]".
7f561c08de6b Initial load
duke
parents:
diff changeset
  1481
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1482
     * <p>Please note that the first string in the resulting array may be an empty
7f561c08de6b Initial load
duke
parents:
diff changeset
  1483
     * string. This happens when the very first character of input string is
7f561c08de6b Initial load
duke
parents:
diff changeset
  1484
     * matched by the pattern.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1485
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1486
     * @param s String to split on this regular exression
7f561c08de6b Initial load
duke
parents:
diff changeset
  1487
     * @return Array of strings
7f561c08de6b Initial load
duke
parents:
diff changeset
  1488
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1489
    public String[] split(String s)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1490
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1491
        // Create new vector
7f561c08de6b Initial load
duke
parents:
diff changeset
  1492
        Vector v = new Vector();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1493
7f561c08de6b Initial load
duke
parents:
diff changeset
  1494
        // Start at position 0 and search the whole string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1495
        int pos = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1496
        int len = s.length();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1497
7f561c08de6b Initial load
duke
parents:
diff changeset
  1498
        // Try a match at each position
7f561c08de6b Initial load
duke
parents:
diff changeset
  1499
        while (pos < len && match(s, pos))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1500
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1501
            // Get start of match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1502
            int start = getParenStart(0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1503
7f561c08de6b Initial load
duke
parents:
diff changeset
  1504
            // Get end of match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1505
            int newpos = getParenEnd(0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1506
7f561c08de6b Initial load
duke
parents:
diff changeset
  1507
            // Check if no progress was made
7f561c08de6b Initial load
duke
parents:
diff changeset
  1508
            if (newpos == pos)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1509
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1510
                v.addElement(s.substring(pos, start + 1));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1511
                newpos++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1512
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1513
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1514
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1515
                v.addElement(s.substring(pos, start));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1516
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1517
7f561c08de6b Initial load
duke
parents:
diff changeset
  1518
            // Move to new position
7f561c08de6b Initial load
duke
parents:
diff changeset
  1519
            pos = newpos;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1520
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1521
7f561c08de6b Initial load
duke
parents:
diff changeset
  1522
        // Push remainder if it's not empty
7f561c08de6b Initial load
duke
parents:
diff changeset
  1523
        String remainder = s.substring(pos);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1524
        if (remainder.length() != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1525
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1526
            v.addElement(remainder);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1527
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1528
7f561c08de6b Initial load
duke
parents:
diff changeset
  1529
        // Return vector as an array of strings
7f561c08de6b Initial load
duke
parents:
diff changeset
  1530
        String[] ret = new String[v.size()];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1531
        v.copyInto(ret);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1532
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1533
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1534
7f561c08de6b Initial load
duke
parents:
diff changeset
  1535
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1536
     * Flag bit that indicates that subst should replace all occurrences of this
7f561c08de6b Initial load
duke
parents:
diff changeset
  1537
     * regular expression.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1538
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1539
    public static final int REPLACE_ALL            = 0x0000;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1540
7f561c08de6b Initial load
duke
parents:
diff changeset
  1541
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1542
     * Flag bit that indicates that subst should only replace the first occurrence
7f561c08de6b Initial load
duke
parents:
diff changeset
  1543
     * of this regular expression.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1544
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1545
    public static final int REPLACE_FIRSTONLY      = 0x0001;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1546
7f561c08de6b Initial load
duke
parents:
diff changeset
  1547
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1548
     * Flag bit that indicates that subst should replace backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
  1549
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1550
    public static final int REPLACE_BACKREFERENCES = 0x0002;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1551
7f561c08de6b Initial load
duke
parents:
diff changeset
  1552
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1553
     * Substitutes a string for this regular expression in another string.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1554
     * This method works like the Perl function of the same name.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1555
     * Given a regular expression of "a*b", a String to substituteIn of
7f561c08de6b Initial load
duke
parents:
diff changeset
  1556
     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
7f561c08de6b Initial load
duke
parents:
diff changeset
  1557
     * resulting String returned by subst would be "-foo-garply-wacky-".
7f561c08de6b Initial load
duke
parents:
diff changeset
  1558
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1559
     * @param substituteIn String to substitute within
7f561c08de6b Initial load
duke
parents:
diff changeset
  1560
     * @param substitution String to substitute for all matches of this regular expression.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1561
     * @return The string substituteIn with zero or more occurrences of the current
7f561c08de6b Initial load
duke
parents:
diff changeset
  1562
     * regular expression replaced with the substitution String (if this regular
7f561c08de6b Initial load
duke
parents:
diff changeset
  1563
     * expression object doesn't match at any position, the original String is returned
7f561c08de6b Initial load
duke
parents:
diff changeset
  1564
     * unchanged).
7f561c08de6b Initial load
duke
parents:
diff changeset
  1565
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1566
    public String subst(String substituteIn, String substitution)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1567
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1568
        return subst(substituteIn, substitution, REPLACE_ALL);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1569
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1570
7f561c08de6b Initial load
duke
parents:
diff changeset
  1571
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1572
     * Substitutes a string for this regular expression in another string.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1573
     * This method works like the Perl function of the same name.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1574
     * Given a regular expression of "a*b", a String to substituteIn of
7f561c08de6b Initial load
duke
parents:
diff changeset
  1575
     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
7f561c08de6b Initial load
duke
parents:
diff changeset
  1576
     * resulting String returned by subst would be "-foo-garply-wacky-".
7f561c08de6b Initial load
duke
parents:
diff changeset
  1577
     * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
  1578
     * It is also possible to reference the contents of a parenthesized expression
7f561c08de6b Initial load
duke
parents:
diff changeset
  1579
     * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
7f561c08de6b Initial load
duke
parents:
diff changeset
  1580
     * a String to substituteIn of "visit us: http://www.apache.org!" and the
7f561c08de6b Initial load
duke
parents:
diff changeset
  1581
     * substitution String "&lt;a href=\"$0\"&gt;$0&lt;/a&gt;", the resulting String
7f561c08de6b Initial load
duke
parents:
diff changeset
  1582
     * returned by subst would be
7f561c08de6b Initial load
duke
parents:
diff changeset
  1583
     * "visit us: &lt;a href=\"http://www.apache.org\"&gt;http://www.apache.org&lt;/a&gt;!".
7f561c08de6b Initial load
duke
parents:
diff changeset
  1584
     * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
  1585
     * <i>Note:</i> $0 represents the whole match.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1586
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1587
     * @param substituteIn String to substitute within
7f561c08de6b Initial load
duke
parents:
diff changeset
  1588
     * @param substitution String to substitute for matches of this regular expression
7f561c08de6b Initial load
duke
parents:
diff changeset
  1589
     * @param flags One or more bitwise flags from REPLACE_*.  If the REPLACE_FIRSTONLY
7f561c08de6b Initial load
duke
parents:
diff changeset
  1590
     * flag bit is set, only the first occurrence of this regular expression is replaced.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1591
     * If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be
7f561c08de6b Initial load
duke
parents:
diff changeset
  1592
     * replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will
7f561c08de6b Initial load
duke
parents:
diff changeset
  1593
     * be processed.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1594
     * @return The string substituteIn with zero or more occurrences of the current
7f561c08de6b Initial load
duke
parents:
diff changeset
  1595
     * regular expression replaced with the substitution String (if this regular
7f561c08de6b Initial load
duke
parents:
diff changeset
  1596
     * expression object doesn't match at any position, the original String is returned
7f561c08de6b Initial load
duke
parents:
diff changeset
  1597
     * unchanged).
7f561c08de6b Initial load
duke
parents:
diff changeset
  1598
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1599
    public String subst(String substituteIn, String substitution, int flags)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1600
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1601
        // String to return
7f561c08de6b Initial load
duke
parents:
diff changeset
  1602
        StringBuffer ret = new StringBuffer();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1603
7f561c08de6b Initial load
duke
parents:
diff changeset
  1604
        // Start at position 0 and search the whole string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1605
        int pos = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1606
        int len = substituteIn.length();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1607
7f561c08de6b Initial load
duke
parents:
diff changeset
  1608
        // Try a match at each position
7f561c08de6b Initial load
duke
parents:
diff changeset
  1609
        while (pos < len && match(substituteIn, pos))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1610
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1611
            // Append string before match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1612
            ret.append(substituteIn.substring(pos, getParenStart(0)));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1613
7f561c08de6b Initial load
duke
parents:
diff changeset
  1614
            if ((flags & REPLACE_BACKREFERENCES) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1615
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1616
                // Process backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
  1617
                int lCurrentPosition = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1618
                int lLastPosition = -2;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1619
                int lLength = substitution.length();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1620
                boolean bAddedPrefix = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1621
7f561c08de6b Initial load
duke
parents:
diff changeset
  1622
                while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1623
                {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1624
                    if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1625
                        && lCurrentPosition+1 < lLength)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1626
                    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1627
                        char c = substitution.charAt(lCurrentPosition + 1);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1628
                        if (c >= '0' && c <= '9')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1629
                        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1630
                            if (bAddedPrefix == false)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1631
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1632
                                // Append everything between the beginning of the
7f561c08de6b Initial load
duke
parents:
diff changeset
  1633
                                // substitution string and the current $ sign
7f561c08de6b Initial load
duke
parents:
diff changeset
  1634
                                ret.append(substitution.substring(0, lCurrentPosition));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1635
                                bAddedPrefix = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1636
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1637
                            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1638
                            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1639
                                // Append everything between the last and the current $ sign
7f561c08de6b Initial load
duke
parents:
diff changeset
  1640
                                ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1641
                            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1642
7f561c08de6b Initial load
duke
parents:
diff changeset
  1643
                            // Append the parenthesized expression
7f561c08de6b Initial load
duke
parents:
diff changeset
  1644
                            // Note: if a parenthesized expression of the requested
7f561c08de6b Initial load
duke
parents:
diff changeset
  1645
                            // index is not available "null" is added to the string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1646
                            ret.append(getParen(c - '0'));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1647
                            lLastPosition = lCurrentPosition;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1648
                        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1649
                    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1650
7f561c08de6b Initial load
duke
parents:
diff changeset
  1651
                    // Move forward, skipping past match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1652
                    lCurrentPosition++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1653
                }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1654
7f561c08de6b Initial load
duke
parents:
diff changeset
  1655
                // Append everything after the last $ sign
7f561c08de6b Initial load
duke
parents:
diff changeset
  1656
                ret.append(substitution.substring(lLastPosition + 2, lLength));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1657
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1658
            else
7f561c08de6b Initial load
duke
parents:
diff changeset
  1659
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1660
                // Append substitution without processing backreferences
7f561c08de6b Initial load
duke
parents:
diff changeset
  1661
                ret.append(substitution);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1662
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1663
7f561c08de6b Initial load
duke
parents:
diff changeset
  1664
            // Move forward, skipping past match
7f561c08de6b Initial load
duke
parents:
diff changeset
  1665
            int newpos = getParenEnd(0);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1666
7f561c08de6b Initial load
duke
parents:
diff changeset
  1667
            // We always want to make progress!
7f561c08de6b Initial load
duke
parents:
diff changeset
  1668
            if (newpos == pos)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1669
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1670
                newpos++;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1671
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1672
7f561c08de6b Initial load
duke
parents:
diff changeset
  1673
            // Try new position
7f561c08de6b Initial load
duke
parents:
diff changeset
  1674
            pos = newpos;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1675
7f561c08de6b Initial load
duke
parents:
diff changeset
  1676
            // Break out if we're only supposed to replace one occurrence
7f561c08de6b Initial load
duke
parents:
diff changeset
  1677
            if ((flags & REPLACE_FIRSTONLY) != 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1678
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1679
                break;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1680
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1681
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1682
7f561c08de6b Initial load
duke
parents:
diff changeset
  1683
        // If there's remaining input, append it
7f561c08de6b Initial load
duke
parents:
diff changeset
  1684
        if (pos < len)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1685
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1686
            ret.append(substituteIn.substring(pos));
7f561c08de6b Initial load
duke
parents:
diff changeset
  1687
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1688
7f561c08de6b Initial load
duke
parents:
diff changeset
  1689
        // Return string buffer as string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1690
        return ret.toString();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1691
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1692
7f561c08de6b Initial load
duke
parents:
diff changeset
  1693
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1694
     * Returns an array of Strings, whose toString representation matches a regular
7f561c08de6b Initial load
duke
parents:
diff changeset
  1695
     * expression. This method works like the Perl function of the same name.  Given
7f561c08de6b Initial load
duke
parents:
diff changeset
  1696
     * a regular expression of "a*b" and an array of String objects of [foo, aab, zzz,
7f561c08de6b Initial load
duke
parents:
diff changeset
  1697
     * aaaab], the array of Strings returned by grep would be [aab, aaaab].
7f561c08de6b Initial load
duke
parents:
diff changeset
  1698
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1699
     * @param search Array of Objects to search
7f561c08de6b Initial load
duke
parents:
diff changeset
  1700
     * @return Array of Strings whose toString() value matches this regular expression.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1701
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1702
    public String[] grep(Object[] search)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1703
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1704
        // Create new vector to hold return items
7f561c08de6b Initial load
duke
parents:
diff changeset
  1705
        Vector v = new Vector();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1706
7f561c08de6b Initial load
duke
parents:
diff changeset
  1707
        // Traverse array of objects
7f561c08de6b Initial load
duke
parents:
diff changeset
  1708
        for (int i = 0; i < search.length; i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1709
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1710
            // Get next object as a string
7f561c08de6b Initial load
duke
parents:
diff changeset
  1711
            String s = search[i].toString();
7f561c08de6b Initial load
duke
parents:
diff changeset
  1712
7f561c08de6b Initial load
duke
parents:
diff changeset
  1713
            // If it matches this regexp, add it to the list
7f561c08de6b Initial load
duke
parents:
diff changeset
  1714
            if (match(s))
7f561c08de6b Initial load
duke
parents:
diff changeset
  1715
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1716
                v.addElement(s);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1717
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1718
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1719
7f561c08de6b Initial load
duke
parents:
diff changeset
  1720
        // Return vector as an array of strings
7f561c08de6b Initial load
duke
parents:
diff changeset
  1721
        String[] ret = new String[v.size()];
7f561c08de6b Initial load
duke
parents:
diff changeset
  1722
        v.copyInto(ret);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1723
        return ret;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1724
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1725
7f561c08de6b Initial load
duke
parents:
diff changeset
  1726
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1727
     * @return true if character at i-th position in the <code>search</code> string is a newline
7f561c08de6b Initial load
duke
parents:
diff changeset
  1728
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1729
    private boolean isNewline(int i)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1730
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1731
        char nextChar = search.charAt(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1732
7f561c08de6b Initial load
duke
parents:
diff changeset
  1733
        if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
7f561c08de6b Initial load
duke
parents:
diff changeset
  1734
            || nextChar == '\u2028' || nextChar == '\u2029')
7f561c08de6b Initial load
duke
parents:
diff changeset
  1735
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1736
            return true;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1737
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1738
7f561c08de6b Initial load
duke
parents:
diff changeset
  1739
        return false;
7f561c08de6b Initial load
duke
parents:
diff changeset
  1740
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1741
7f561c08de6b Initial load
duke
parents:
diff changeset
  1742
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
  1743
     * Compares two characters.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1744
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
  1745
     * @param c1 first character to compare.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1746
     * @param c2 second character to compare.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1747
     * @param caseIndependent whether comparision is case insensitive or not.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1748
     * @return negative, 0, or positive integer as the first character
7f561c08de6b Initial load
duke
parents:
diff changeset
  1749
     *         less than, equal to, or greater then the second.
7f561c08de6b Initial load
duke
parents:
diff changeset
  1750
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
  1751
    private int compareChars(char c1, char c2, boolean caseIndependent)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1752
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1753
        if (caseIndependent)
7f561c08de6b Initial load
duke
parents:
diff changeset
  1754
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
  1755
            c1 = Character.toLowerCase(c1);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1756
            c2 = Character.toLowerCase(c2);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1757
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1758
        return ((int)c1 - (int)c2);
7f561c08de6b Initial load
duke
parents:
diff changeset
  1759
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
  1760
}