--- a/jaxp/THIRD_PARTY_README Tue May 10 00:24:24 2016 -0700
+++ b/jaxp/THIRD_PARTY_README Tue May 10 16:19:44 2016 -0700
@@ -3387,7 +3387,6 @@
Apache Commons Math 2.2
Apache Derby 10.10.1.2 [included with JDK 8]
Apache Jakarta BCEL 5.2
- Apache Jakarta Regexp 1.4
Apache Santuario XML Security for Java 1.5.4
Apache Xalan-Java 2.7.1
Apache Xerces Java 2.10.0
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/bcel/internal/util/InstructionFinder.java Tue May 10 00:24:24 2016 -0700
+++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/bcel/internal/util/InstructionFinder.java Tue May 10 16:19:44 2016 -0700
@@ -4,64 +4,29 @@
*/
package com.sun.org.apache.bcel.internal.util;
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache BCEL" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache BCEL", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
*/
-import java.util.*;
import com.sun.org.apache.bcel.internal.Constants;
import com.sun.org.apache.bcel.internal.generic.*;
-import com.sun.org.apache.regexp.internal.*;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* InstructionFinder is a tool to search for given instructions patterns,
@@ -231,28 +196,22 @@
if(start == -1)
throw new ClassGenException("Instruction handle " + from +
" not found in instruction list.");
- try {
- RE regex = new RE(search);
- ArrayList matches = new ArrayList();
-
- while(start < il_string.length() && regex.match(il_string, start)) {
- int startExpr = regex.getParenStart(0);
- int endExpr = regex.getParenEnd(0);
- int lenExpr = regex.getParenLength(0);
- InstructionHandle[] match = getMatch(startExpr, lenExpr);
+ Pattern regex = Pattern.compile(search);
+ List<InstructionHandle[]> matches = new ArrayList<>();
+ Matcher matcher = regex.matcher(il_string);
+ while(start < il_string.length() && matcher.find(start)) {
+ int startExpr = matcher.start();
+ int endExpr = matcher.end();
+ int lenExpr = endExpr - startExpr;
+ InstructionHandle[] match = getMatch(startExpr, lenExpr);
- if((constraint == null) || constraint.checkCode(match))
- matches.add(match);
- start = endExpr;
- }
-
- return matches.iterator();
- } catch(RESyntaxException e) {
- System.err.println(e);
+ if((constraint == null) || constraint.checkCode(match))
+ matches.add(match);
+ start = endExpr;
}
- return null;
+ return matches.iterator();
}
/**
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/CharacterArrayCharacterIterator.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-/**
- * Encapsulates char[] as CharacterIterator
- *
- * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
- */
-public final class CharacterArrayCharacterIterator implements CharacterIterator
-{
- /** encapsulated */
- private final char[] src;
- /** offset in the char array */
- private final int off;
- /** used portion of the array */
- private final int len;
-
- /** @param src - encapsulated String */
- public CharacterArrayCharacterIterator(char[] src, int off, int len)
- {
- this.src = src;
- this.off = off;
- this.len = len;
- }
-
- /** @return a substring */
- public String substring(int beginIndex, int endIndex)
- {
- if (endIndex > len) {
- throw new IndexOutOfBoundsException("endIndex=" + endIndex
- + "; sequence size=" + len);
- }
- if (beginIndex < 0 || beginIndex > endIndex) {
- throw new IndexOutOfBoundsException("beginIndex=" + beginIndex
- + "; endIndex=" + endIndex);
- }
- return new String(src, off + beginIndex, endIndex - beginIndex);
- }
-
- /** @return a substring */
- public String substring(int beginIndex)
- {
- return substring(beginIndex, len);
- }
-
- /** @return a character at the specified position. */
- public char charAt(int pos)
- {
- return src[off + pos];
- }
-
- /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
- public boolean isEnd(int pos)
- {
- return (pos >= len);
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/CharacterIterator.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-/**
- * Encapsulates different types of character sources - String, InputStream, ...
- * Defines a set of common methods
- *
- * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
- */
-public interface CharacterIterator
-{
- /** @return a substring */
- String substring(int beginIndex, int endIndex);
-
- /** @return a substring */
- String substring(int beginIndex);
-
- /** @return a character at the specified position. */
- char charAt(int pos);
-
- /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
- boolean isEnd(int pos);
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/RE.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1760 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import java.io.Serializable;
-import java.util.Vector;
-
-/**
- * RE is an efficient, lightweight regular expression evaluator/matcher
- * class. Regular expressions are pattern descriptions which enable
- * sophisticated matching of strings. In addition to being able to
- * match a string against a pattern, you can also extract parts of the
- * match. This is especially useful in text parsing! Details on the
- * syntax of regular expression patterns are given below.
- *
- * <p>
- * To compile a regular expression (RE), you can simply construct an RE
- * matcher object from the string specification of the pattern, like this:
- *
- * <pre>
- * RE r = new RE("a*b");
- * </pre>
- *
- * <p>
- * Once you have done this, you can call either of the RE.match methods to
- * perform matching on a String. For example:
- *
- * <pre>
- * boolean matched = r.match("aaaab");
- * </pre>
- *
- * will cause the boolean matched to be set to true because the
- * pattern "a*b" matches the string "aaaab".
- *
- * <p>
- * If you were interested in the <i>number</i> of a's which matched the
- * first part of our example expression, you could change the expression to
- * "(a*)b". Then when you compiled the expression and matched it against
- * something like "xaaaab", you would get results like this:
- *
- * <pre>
- * RE r = new RE("(a*)b"); // Compile expression
- * boolean matched = r.match("xaaaab"); // Match against "xaaaab"
- *
- * String wholeExpr = r.getParen(0); // wholeExpr will be 'aaaab'
- * String insideParens = r.getParen(1); // insideParens will be 'aaaa'
- *
- * int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
- * int endWholeExpr = r.getParenEnd(0); // endWholeExpr will be index 6
- * int lenWholeExpr = r.getParenLength(0); // lenWholeExpr will be 5
- *
- * int startInside = r.getParenStart(1); // startInside will be index 1
- * int endInside = r.getParenEnd(1); // endInside will be index 5
- * int lenInside = r.getParenLength(1); // lenInside will be 4
- * </pre>
- *
- * You can also refer to the contents of a parenthesized expression
- * within a regular expression itself. This is called a
- * 'backreference'. The first backreference in a regular expression is
- * denoted by \1, the second by \2 and so on. So the expression:
- *
- * <pre>
- * ([0-9]+)=\1
- * </pre>
- *
- * will match any string of the form n=n (like 0=0 or 2=2).
- *
- * <p>
- * The full regular expression syntax accepted by RE is described here:
- *
- * <pre>
- *
- * <b><font face=times roman>Characters</font></b>
- *
- * <i>unicodeChar</i> Matches any identical unicode character
- * \ Used to quote a meta-character (like '*')
- * \\ Matches a single '\' character
- * \0nnn Matches a given octal character
- * \xhh Matches a given 8-bit hexadecimal character
- * \\uhhhh Matches a given 16-bit hexadecimal character
- * \t Matches an ASCII tab character
- * \n Matches an ASCII newline character
- * \r Matches an ASCII return character
- * \f Matches an ASCII form feed character
- *
- *
- * <b><font face=times roman>Character Classes</font></b>
- *
- * [abc] Simple character class
- * [a-zA-Z] Character class with ranges
- * [^abc] Negated character class
- * </pre>
- *
- * <b>NOTE:</b> Incomplete ranges will be interpreted as "starts
- * from zero" or "ends with last character".
- * <br>
- * I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF],
- * [-] means "all characters".
- *
- * <pre>
- *
- * <b><font face=times roman>Standard POSIX Character Classes</font></b>
- *
- * [:alnum:] Alphanumeric characters.
- * [:alpha:] Alphabetic characters.
- * [:blank:] Space and tab characters.
- * [:cntrl:] Control characters.
- * [:digit:] Numeric characters.
- * [:graph:] Characters that are printable and are also visible.
- * (A space is printable, but not visible, while an
- * `a' is both.)
- * [:lower:] Lower-case alphabetic characters.
- * [:print:] Printable characters (characters that are not
- * control characters.)
- * [:punct:] Punctuation characters (characters that are not letter,
- * digits, control characters, or space characters).
- * [:space:] Space characters (such as space, tab, and formfeed,
- * to name a few).
- * [:upper:] Upper-case alphabetic characters.
- * [:xdigit:] Characters that are hexadecimal digits.
- *
- *
- * <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b>
- *
- * [:javastart:] Start of a Java identifier
- * [:javapart:] Part of a Java identifier
- *
- *
- * <b><font face=times roman>Predefined Classes</font></b>
- *
- * . Matches any character other than newline
- * \w Matches a "word" character (alphanumeric plus "_")
- * \W Matches a non-word character
- * \s Matches a whitespace character
- * \S Matches a non-whitespace character
- * \d Matches a digit character
- * \D Matches a non-digit character
- *
- *
- * <b><font face=times roman>Boundary Matchers</font></b>
- *
- * ^ Matches only at the beginning of a line
- * $ Matches only at the end of a line
- * \b Matches only at a word boundary
- * \B Matches only at a non-word boundary
- *
- *
- * <b><font face=times roman>Greedy Closures</font></b>
- *
- * A* Matches A 0 or more times (greedy)
- * A+ Matches A 1 or more times (greedy)
- * A? Matches A 1 or 0 times (greedy)
- * A{n} Matches A exactly n times (greedy)
- * A{n,} Matches A at least n times (greedy)
- * A{n,m} Matches A at least n but not more than m times (greedy)
- *
- *
- * <b><font face=times roman>Reluctant Closures</font></b>
- *
- * A*? Matches A 0 or more times (reluctant)
- * A+? Matches A 1 or more times (reluctant)
- * A?? Matches A 0 or 1 times (reluctant)
- *
- *
- * <b><font face=times roman>Logical Operators</font></b>
- *
- * AB Matches A followed by B
- * A|B Matches either A or B
- * (A) Used for subexpression grouping
- * (?:A) Used for subexpression clustering (just like grouping but
- * no backrefs)
- *
- *
- * <b><font face=times roman>Backreferences</font></b>
- *
- * \1 Backreference to 1st parenthesized subexpression
- * \2 Backreference to 2nd parenthesized subexpression
- * \3 Backreference to 3rd parenthesized subexpression
- * \4 Backreference to 4th parenthesized subexpression
- * \5 Backreference to 5th parenthesized subexpression
- * \6 Backreference to 6th parenthesized subexpression
- * \7 Backreference to 7th parenthesized subexpression
- * \8 Backreference to 8th parenthesized subexpression
- * \9 Backreference to 9th parenthesized subexpression
- * </pre>
- *
- * <p>
- * All closure operators (+, *, ?, {m,n}) are greedy by default, meaning
- * that they match as many elements of the string as possible without
- * causing the overall match to fail. If you want a closure to be
- * reluctant (non-greedy), you can simply follow it with a '?'. A
- * reluctant closure will match as few elements of the string as
- * possible when finding matches. {m,n} closures don't currently
- * support reluctancy.
- *
- * <p>
- * <b><font face="times roman">Line terminators</font></b>
- * <br>
- * A line terminator is a one- or two-character sequence that marks
- * the end of a line of the input character sequence. The following
- * are recognized as line terminators:
- * <ul>
- * <li>A newline (line feed) character ('\n'),</li>
- * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
- * <li>A standalone carriage-return character ('\r'),</li>
- * <li>A next-line character ('\u0085'),</li>
- * <li>A line-separator character ('\u2028'), or</li>
- * <li>A paragraph-separator character ('\u2029).</li>
- * </ul>
- *
- * <p>
- * RE runs programs compiled by the RECompiler class. But the RE
- * matcher class does not include the actual regular expression compiler
- * for reasons of efficiency. In fact, if you want to pre-compile one
- * or more regular expressions, the 'recompile' class can be invoked
- * from the command line to produce compiled output like this:
- *
- * <pre>
- * // Pre-compiled regular expression "a*b"
- * char[] re1Instructions =
- * {
- * 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
- * 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
- * 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
- * 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
- * 0x0000,
- * };
- *
- *
- * REProgram re1 = new REProgram(re1Instructions);
- * </pre>
- *
- * You can then construct a regular expression matcher (RE) object from
- * the pre-compiled expression re1 and thus avoid the overhead of
- * compiling the expression at runtime. If you require more dynamic
- * regular expressions, you can construct a single RECompiler object and
- * re-use it to compile each expression. Similarly, you can change the
- * program run by a given matcher object at any time. However, RE and
- * RECompiler are not threadsafe (for efficiency reasons, and because
- * requiring thread safety in this class is deemed to be a rare
- * requirement), so you will need to construct a separate compiler or
- * matcher object for each thread (unless you do thread synchronization
- * yourself). Once expression compiled into the REProgram object, REProgram
- * can be safely shared across multiple threads and RE objects.
- *
- * <br><p><br>
- *
- * <font color="red">
- * <i>ISSUES:</i>
- *
- * <ul>
- * <li>com.weusours.util.re is not currently compatible with all
- * standard POSIX regcomp flags</li>
- * <li>com.weusours.util.re does not support POSIX equivalence classes
- * ([=foo=] syntax) (I18N/locale issue)</li>
- * <li>com.weusours.util.re does not support nested POSIX character
- * classes (definitely should, but not completely trivial)</li>
- * <li>com.weusours.util.re Does not support POSIX character collation
- * concepts ([.foo.] syntax) (I18N/locale issue)</li>
- * <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li>
- * <li>Should RE support character iterators (for backwards RE matching!)?</li>
- * <li>Should RE support reluctant {m,n} closures (does anyone care)?</li>
- * <li>Not *all* possibilities are considered for greediness when backreferences
- * are involved (as POSIX suggests should be the case). The POSIX RE
- * "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match
- * of acdacaa where \1 is "a". This is not the case in this RE package,
- * and actually Perl doesn't go to this extent either! Until someone
- * actually complains about this, I'm not sure it's worth "fixing".
- * If it ever is fixed, test #137 in RETest.txt should be updated.</li>
- * </ul>
- *
- * </font>
- *
- * @see recompile
- * @see RECompiler
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @author <a href="mailto:ts@sch-fer.de">Tobias Schäfer</a>
- */
-public class RE implements Serializable
-{
- /**
- * Specifies normal, case-sensitive matching behaviour.
- */
- public static final int MATCH_NORMAL = 0x0000;
-
- /**
- * Flag to indicate that matching should be case-independent (folded)
- */
- public static final int MATCH_CASEINDEPENDENT = 0x0001;
-
- /**
- * Newlines should match as BOL/EOL (^ and $)
- */
- public static final int MATCH_MULTILINE = 0x0002;
-
- /**
- * Consider all input a single body of text - newlines are matched by .
- */
- public static final int MATCH_SINGLELINE = 0x0004;
-
- /************************************************
- * *
- * The format of a node in a program is: *
- * *
- * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *
- * *
- * char OPCODE - instruction *
- * char OPDATA - modifying data *
- * char OPNEXT - next node (relative offset) *
- * *
- ************************************************/
-
- // Opcode Char Opdata/Operand Meaning
- // ---------- ---------- --------------- --------------------------------------------------
- static final char OP_END = 'E'; // end of program
- static final char OP_BOL = '^'; // match only if at beginning of line
- static final char OP_EOL = '$'; // match only if at end of line
- static final char OP_ANY = '.'; // match any single character except newline
- static final char OP_ANYOF = '['; // count/ranges match any char in the list of ranges
- static final char OP_BRANCH = '|'; // node match this alternative or the next one
- static final char OP_ATOM = 'A'; // length/string length of string followed by string itself
- static final char OP_STAR = '*'; // node kleene closure
- static final char OP_PLUS = '+'; // node positive closure
- static final char OP_MAYBE = '?'; // node optional closure
- static final char OP_ESCAPE = '\\'; // escape special escape code char class (escape is E_* code)
- static final char OP_OPEN = '('; // number nth opening paren
- static final char OP_OPEN_CLUSTER = '<'; // opening cluster
- static final char OP_CLOSE = ')'; // number nth closing paren
- static final char OP_CLOSE_CLUSTER = '>'; // closing cluster
- static final char OP_BACKREF = '#'; // number reference nth already matched parenthesized string
- static final char OP_GOTO = 'G'; // nothing but a (back-)pointer
- static final char OP_NOTHING = 'N'; // match null string such as in '(a|)'
- static final char OP_RELUCTANTSTAR = '8'; // none/expr reluctant '*' (mnemonic for char is unshifted '*')
- static final char OP_RELUCTANTPLUS = '='; // none/expr reluctant '+' (mnemonic for char is unshifted '+')
- static final char OP_RELUCTANTMAYBE = '/'; // none/expr reluctant '?' (mnemonic for char is unshifted '?')
- static final char OP_POSIXCLASS = 'P'; // classid one of the posix character classes
-
- // Escape codes
- static final char E_ALNUM = 'w'; // Alphanumeric
- static final char E_NALNUM = 'W'; // Non-alphanumeric
- static final char E_BOUND = 'b'; // Word boundary
- static final char E_NBOUND = 'B'; // Non-word boundary
- static final char E_SPACE = 's'; // Whitespace
- static final char E_NSPACE = 'S'; // Non-whitespace
- static final char E_DIGIT = 'd'; // Digit
- static final char E_NDIGIT = 'D'; // Non-digit
-
- // Posix character classes
- static final char POSIX_CLASS_ALNUM = 'w'; // Alphanumerics
- static final char POSIX_CLASS_ALPHA = 'a'; // Alphabetics
- static final char POSIX_CLASS_BLANK = 'b'; // Blanks
- static final char POSIX_CLASS_CNTRL = 'c'; // Control characters
- static final char POSIX_CLASS_DIGIT = 'd'; // Digits
- static final char POSIX_CLASS_GRAPH = 'g'; // Graphic characters
- static final char POSIX_CLASS_LOWER = 'l'; // Lowercase characters
- static final char POSIX_CLASS_PRINT = 'p'; // Printable characters
- static final char POSIX_CLASS_PUNCT = '!'; // Punctuation
- static final char POSIX_CLASS_SPACE = 's'; // Spaces
- static final char POSIX_CLASS_UPPER = 'u'; // Uppercase characters
- static final char POSIX_CLASS_XDIGIT = 'x'; // Hexadecimal digits
- static final char POSIX_CLASS_JSTART = 'j'; // Java identifier start
- static final char POSIX_CLASS_JPART = 'k'; // Java identifier part
-
- // Limits
- static final int maxNode = 65536; // Maximum number of nodes in a program
- static final int MAX_PAREN = 16; // Number of paren pairs (only 9 can be backrefs)
-
- // Node layout constants
- static final int offsetOpcode = 0; // Opcode offset (first character)
- static final int offsetOpdata = 1; // Opdata offset (second char)
- static final int offsetNext = 2; // Next index offset (third char)
- static final int nodeSize = 3; // Node size (in chars)
-
- // State of current program
- REProgram program; // Compiled regular expression 'program'
- transient CharacterIterator search; // The string being matched against
- int matchFlags; // Match behaviour flags
- int maxParen = MAX_PAREN;
-
- // Parenthesized subexpressions
- transient int parenCount; // Number of subexpressions matched (num open parens + 1)
- transient int start0; // Cache of start[0]
- transient int end0; // Cache of start[0]
- transient int start1; // Cache of start[1]
- transient int end1; // Cache of start[1]
- transient int start2; // Cache of start[2]
- transient int end2; // Cache of start[2]
- transient int[] startn; // Lazy-alloced array of sub-expression starts
- transient int[] endn; // Lazy-alloced array of sub-expression ends
-
- // Backreferences
- transient int[] startBackref; // Lazy-alloced array of backref starts
- transient int[] endBackref; // Lazy-alloced array of backref ends
-
- /**
- * Constructs a regular expression matcher from a String by compiling it
- * using a new instance of RECompiler. If you will be compiling many
- * expressions, you may prefer to use a single RECompiler object instead.
- *
- * @param pattern The regular expression pattern to compile.
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- * @see RECompiler
- * @see recompile
- */
- public RE(String pattern) throws RESyntaxException
- {
- this(pattern, MATCH_NORMAL);
- }
-
- /**
- * Constructs a regular expression matcher from a String by compiling it
- * using a new instance of RECompiler. If you will be compiling many
- * expressions, you may prefer to use a single RECompiler object instead.
- *
- * @param pattern The regular expression pattern to compile.
- * @param matchFlags The matching style
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- * @see RECompiler
- * @see recompile
- */
- public RE(String pattern, int matchFlags) throws RESyntaxException
- {
- this(new RECompiler().compile(pattern));
- setMatchFlags(matchFlags);
- }
-
- /**
- * Construct a matcher for a pre-compiled regular expression from program
- * (bytecode) data. Permits special flags to be passed in to modify matching
- * behaviour.
- *
- * @param program Compiled regular expression program (see RECompiler and/or recompile)
- * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
- *
- * <pre>
- * MATCH_NORMAL // Normal (case-sensitive) matching
- * MATCH_CASEINDEPENDENT // Case folded comparisons
- * MATCH_MULTILINE // Newline matches as BOL/EOL
- * </pre>
- *
- * @see RECompiler
- * @see REProgram
- * @see recompile
- */
- public RE(REProgram program, int matchFlags)
- {
- setProgram(program);
- setMatchFlags(matchFlags);
- }
-
- /**
- * Construct a matcher for a pre-compiled regular expression from program
- * (bytecode) data.
- *
- * @param program Compiled regular expression program
- * @see RECompiler
- * @see recompile
- */
- public RE(REProgram program)
- {
- this(program, MATCH_NORMAL);
- }
-
- /**
- * Constructs a regular expression matcher with no initial program.
- * This is likely to be an uncommon practice, but is still supported.
- */
- public RE()
- {
- this((REProgram)null, MATCH_NORMAL);
- }
-
- /**
- * Converts a 'simplified' regular expression to a full regular expression
- *
- * @param pattern The pattern to convert
- * @return The full regular expression
- */
- public static String simplePatternToFullRegularExpression(String pattern)
- {
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < pattern.length(); i++)
- {
- char c = pattern.charAt(i);
- switch (c)
- {
- case '*':
- buf.append(".*");
- break;
-
- case '.':
- case '[':
- case ']':
- case '\\':
- case '+':
- case '?':
- case '{':
- case '}':
- case '$':
- case '^':
- case '|':
- case '(':
- case ')':
- buf.append('\\');
- default:
- buf.append(c);
- break;
- }
- }
- return buf.toString();
- }
-
- /**
- * Sets match behaviour flags which alter the way RE does matching.
- * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
- *
- * <pre>
- * MATCH_NORMAL // Normal (case-sensitive) matching
- * MATCH_CASEINDEPENDENT // Case folded comparisons
- * MATCH_MULTILINE // Newline matches as BOL/EOL
- * </pre>
- */
- public void setMatchFlags(int matchFlags)
- {
- this.matchFlags = matchFlags;
- }
-
- /**
- * Returns the current match behaviour flags.
- * @return Current match behaviour flags (RE.MATCH_*).
- *
- * <pre>
- * MATCH_NORMAL // Normal (case-sensitive) matching
- * MATCH_CASEINDEPENDENT // Case folded comparisons
- * MATCH_MULTILINE // Newline matches as BOL/EOL
- * </pre>
- *
- * @see #setMatchFlags
- */
- public int getMatchFlags()
- {
- return matchFlags;
- }
-
- /**
- * Sets the current regular expression program used by this matcher object.
- *
- * @param program Regular expression program compiled by RECompiler.
- * @see RECompiler
- * @see REProgram
- * @see recompile
- */
- public void setProgram(REProgram program)
- {
- this.program = program;
- if (program != null && program.maxParens != -1) {
- this.maxParen = program.maxParens;
- } else {
- this.maxParen = MAX_PAREN;
- }
- }
-
- /**
- * Returns the current regular expression program in use by this matcher object.
- *
- * @return Regular expression program
- * @see #setProgram
- */
- public REProgram getProgram()
- {
- return program;
- }
-
- /**
- * Returns the number of parenthesized subexpressions available after a successful match.
- *
- * @return Number of available parenthesized subexpressions
- */
- public int getParenCount()
- {
- return parenCount;
- }
-
- /**
- * Gets the contents of a parenthesized subexpression after a successful match.
- *
- * @param which Nesting level of subexpression
- * @return String
- */
- public String getParen(int which)
- {
- int start;
- if (which < parenCount && (start = getParenStart(which)) >= 0)
- {
- return search.substring(start, getParenEnd(which));
- }
- return null;
- }
-
- /**
- * Returns the start index of a given paren level.
- *
- * @param which Nesting level of subexpression
- * @return String index
- */
- public final int getParenStart(int which)
- {
- if (which < parenCount)
- {
- switch (which)
- {
- case 0:
- return start0;
-
- case 1:
- return start1;
-
- case 2:
- return start2;
-
- default:
- if (startn == null)
- {
- allocParens();
- }
- return startn[which];
- }
- }
- return -1;
- }
-
- /**
- * Returns the end index of a given paren level.
- *
- * @param which Nesting level of subexpression
- * @return String index
- */
- public final int getParenEnd(int which)
- {
- if (which < parenCount)
- {
- switch (which)
- {
- case 0:
- return end0;
-
- case 1:
- return end1;
-
- case 2:
- return end2;
-
- default:
- if (endn == null)
- {
- allocParens();
- }
- return endn[which];
- }
- }
- return -1;
- }
-
- /**
- * Returns the length of a given paren level.
- *
- * @param which Nesting level of subexpression
- * @return Number of characters in the parenthesized subexpression
- */
- public final int getParenLength(int which)
- {
- if (which < parenCount)
- {
- return getParenEnd(which) - getParenStart(which);
- }
- return -1;
- }
-
- /**
- * Sets the start of a paren level
- *
- * @param which Which paren level
- * @param i Index in input array
- */
- protected final void setParenStart(int which, int i)
- {
- if (which < parenCount)
- {
- switch (which)
- {
- case 0:
- start0 = i;
- break;
-
- case 1:
- start1 = i;
- break;
-
- case 2:
- start2 = i;
- break;
-
- default:
- if (startn == null)
- {
- allocParens();
- }
- startn[which] = i;
- break;
- }
- }
- }
-
- /**
- * Sets the end of a paren level
- *
- * @param which Which paren level
- * @param i Index in input array
- */
- protected final void setParenEnd(int which, int i)
- {
- if (which < parenCount)
- {
- switch (which)
- {
- case 0:
- end0 = i;
- break;
-
- case 1:
- end1 = i;
- break;
-
- case 2:
- end2 = i;
- break;
-
- default:
- if (endn == null)
- {
- allocParens();
- }
- endn[which] = i;
- break;
- }
- }
- }
-
- /**
- * Throws an Error representing an internal error condition probably resulting
- * from a bug in the regular expression compiler (or possibly data corruption).
- * In practice, this should be very rare.
- *
- * @param s Error description
- */
- protected void internalError(String s) throws Error
- {
- throw new Error("RE internal error: " + s);
- }
-
- /**
- * Performs lazy allocation of subexpression arrays
- */
- private final void allocParens()
- {
- // Allocate arrays for subexpressions
- startn = new int[maxParen];
- endn = new int[maxParen];
-
- // Set sub-expression pointers to invalid values
- for (int i = 0; i < maxParen; i++)
- {
- startn[i] = -1;
- endn[i] = -1;
- }
- }
-
- /**
- * Try to match a string against a subset of nodes in the program
- *
- * @param firstNode Node to start at in program
- * @param lastNode Last valid node (used for matching a subexpression without
- * matching the rest of the program as well).
- * @param idxStart Starting position in character array
- * @return Final input array index if match succeeded. -1 if not.
- */
- protected int matchNodes(int firstNode, int lastNode, int idxStart)
- {
- // Our current place in the string
- int idx = idxStart;
-
- // Loop while node is valid
- int next, opcode, opdata;
- int idxNew;
- char[] instruction = program.instruction;
- for (int node = firstNode; node < lastNode; )
- {
- opcode = instruction[node + offsetOpcode];
- next = node + (short)instruction[node + offsetNext];
- opdata = instruction[node + offsetOpdata];
-
- switch (opcode)
- {
- case OP_RELUCTANTMAYBE:
- {
- int once = 0;
- do
- {
- // Try to match the rest without using the reluctant subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
- {
- return idxNew;
- }
- }
- while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1);
- return -1;
- }
-
- case OP_RELUCTANTPLUS:
- while ((idx = matchNodes(node + nodeSize, next, idx)) != -1)
- {
- // Try to match the rest without using the reluctant subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
- {
- return idxNew;
- }
- }
- return -1;
-
- case OP_RELUCTANTSTAR:
- do
- {
- // Try to match the rest without using the reluctant subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
- {
- return idxNew;
- }
- }
- while ((idx = matchNodes(node + nodeSize, next, idx)) != -1);
- return -1;
-
- case OP_OPEN:
-
- // Match subexpression
- if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
- {
- startBackref[opdata] = idx;
- }
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
- {
- // Increase valid paren count
- if ((opdata + 1) > parenCount)
- {
- parenCount = opdata + 1;
- }
-
- // Don't set paren if already set later on
- if (getParenStart(opdata) == -1)
- {
- setParenStart(opdata, idx);
- }
- }
- return idxNew;
-
- case OP_CLOSE:
-
- // Done matching subexpression
- if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
- {
- endBackref[opdata] = idx;
- }
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
- {
- // Increase valid paren count
- if ((opdata + 1) > parenCount)
- {
- parenCount = opdata + 1;
- }
-
- // Don't set paren if already set later on
- if (getParenEnd(opdata) == -1)
- {
- setParenEnd(opdata, idx);
- }
- }
- return idxNew;
-
- case OP_OPEN_CLUSTER:
- case OP_CLOSE_CLUSTER:
- // starting or ending the matching of a subexpression which has no backref.
- return matchNodes( next, maxNode, idx );
-
- case OP_BACKREF:
- {
- // Get the start and end of the backref
- int s = startBackref[opdata];
- int e = endBackref[opdata];
-
- // We don't know the backref yet
- if (s == -1 || e == -1)
- {
- return -1;
- }
-
- // The backref is empty size
- if (s == e)
- {
- break;
- }
-
- // Get the length of the backref
- int l = e - s;
-
- // If there's not enough input left, give up.
- if (search.isEnd(idx + l - 1))
- {
- return -1;
- }
-
- // Case fold the backref?
- final boolean caseFold =
- ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
- // Compare backref to input
- for (int i = 0; i < l; i++)
- {
- if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0)
- {
- return -1;
- }
- }
- }
- break;
-
- case OP_BOL:
-
- // Fail if we're not at the start of the string
- if (idx != 0)
- {
- // If we're multiline matching, we could still be at the start of a line
- if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
- {
- // If not at start of line, give up
- if (idx <= 0 || !isNewline(idx - 1)) {
- return -1;
- } else {
- break;
- }
- }
- return -1;
- }
- break;
-
- case OP_EOL:
-
- // If we're not at the end of string
- if (!search.isEnd(0) && !search.isEnd(idx))
- {
- // If we're multi-line matching
- if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
- {
- // Give up if we're not at the end of a line
- if (!isNewline(idx)) {
- return -1;
- } else {
- break;
- }
- }
- return -1;
- }
- break;
-
- case OP_ESCAPE:
-
- // Which escape?
- switch (opdata)
- {
- // Word boundary match
- case E_NBOUND:
- case E_BOUND:
- {
- char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1));
- char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
- if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND))
- {
- return -1;
- }
- }
- break;
-
- // Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit
- case E_ALNUM:
- case E_NALNUM:
- case E_DIGIT:
- case E_NDIGIT:
- case E_SPACE:
- case E_NSPACE:
-
- // Give up if out of input
- if (search.isEnd(idx))
- {
- return -1;
- }
-
- char c = search.charAt(idx);
-
- // Switch on escape
- switch (opdata)
- {
- case E_ALNUM:
- case E_NALNUM:
- if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM)))
- {
- return -1;
- }
- break;
-
- case E_DIGIT:
- case E_NDIGIT:
- if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
- {
- return -1;
- }
- break;
-
- case E_SPACE:
- case E_NSPACE:
- if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
- {
- return -1;
- }
- break;
- }
- idx++;
- break;
-
- default:
- internalError("Unrecognized escape '" + opdata + "'");
- }
- break;
-
- case OP_ANY:
-
- if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
- // Match anything
- if (search.isEnd(idx))
- {
- return -1;
- }
- }
- else
- {
- // Match anything but a newline
- if (search.isEnd(idx) || isNewline(idx))
- {
- return -1;
- }
- }
- idx++;
- break;
-
- case OP_ATOM:
- {
- // Match an atom value
- if (search.isEnd(idx))
- {
- return -1;
- }
-
- // Get length of atom and starting index
- int lenAtom = opdata;
- int startAtom = node + nodeSize;
-
- // Give up if not enough input remains to have a match
- if (search.isEnd(lenAtom + idx - 1))
- {
- return -1;
- }
-
- // Match atom differently depending on casefolding flag
- final boolean caseFold =
- ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
-
- for (int i = 0; i < lenAtom; i++)
- {
- if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0)
- {
- return -1;
- }
- }
- }
- break;
-
- case OP_POSIXCLASS:
- {
- // Out of input?
- if (search.isEnd(idx))
- {
- return -1;
- }
-
- switch (opdata)
- {
- case POSIX_CLASS_ALNUM:
- if (!Character.isLetterOrDigit(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_ALPHA:
- if (!Character.isLetter(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_DIGIT:
- if (!Character.isDigit(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_BLANK: // JWL - bugbug: is this right??
- if (!Character.isSpaceChar(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_SPACE:
- if (!Character.isWhitespace(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_CNTRL:
- if (Character.getType(search.charAt(idx)) != Character.CONTROL)
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_GRAPH: // JWL - bugbug???
- switch (Character.getType(search.charAt(idx)))
- {
- case Character.MATH_SYMBOL:
- case Character.CURRENCY_SYMBOL:
- case Character.MODIFIER_SYMBOL:
- case Character.OTHER_SYMBOL:
- break;
-
- default:
- return -1;
- }
- break;
-
- case POSIX_CLASS_LOWER:
- if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER)
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_UPPER:
- if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER)
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_PRINT:
- if (Character.getType(search.charAt(idx)) == Character.CONTROL)
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_PUNCT:
- {
- int type = Character.getType(search.charAt(idx));
- switch(type)
- {
- case Character.DASH_PUNCTUATION:
- case Character.START_PUNCTUATION:
- case Character.END_PUNCTUATION:
- case Character.CONNECTOR_PUNCTUATION:
- case Character.OTHER_PUNCTUATION:
- break;
-
- default:
- return -1;
- }
- }
- break;
-
- case POSIX_CLASS_XDIGIT: // JWL - bugbug??
- {
- boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') ||
- (search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') ||
- (search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F'));
- if (!isXDigit)
- {
- return -1;
- }
- }
- break;
-
- case POSIX_CLASS_JSTART:
- if (!Character.isJavaIdentifierStart(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- case POSIX_CLASS_JPART:
- if (!Character.isJavaIdentifierPart(search.charAt(idx)))
- {
- return -1;
- }
- break;
-
- default:
- internalError("Bad posix class");
- break;
- }
-
- // Matched.
- idx++;
- }
- break;
-
- case OP_ANYOF:
- {
- // Out of input?
- if (search.isEnd(idx))
- {
- return -1;
- }
-
- // Get character to match against character class and maybe casefold
- char c = search.charAt(idx);
- boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
- // Loop through character class checking our match character
- int idxRange = node + nodeSize;
- int idxEnd = idxRange + (opdata * 2);
- boolean match = false;
- for (int i = idxRange; !match && i < idxEnd; )
- {
- // Get start, end and match characters
- char s = instruction[i++];
- char e = instruction[i++];
-
- match = ((compareChars(c, s, caseFold) >= 0)
- && (compareChars(c, e, caseFold) <= 0));
- }
-
- // Fail if we didn't match the character class
- if (!match)
- {
- return -1;
- }
- idx++;
- }
- break;
-
- case OP_BRANCH:
- {
- // Check for choices
- if (instruction[next + offsetOpcode] != OP_BRANCH)
- {
- // If there aren't any other choices, just evaluate this branch.
- node += nodeSize;
- continue;
- }
-
- // Try all available branches
- short nextBranch;
- do
- {
- // Try matching the branch against the string
- if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
- {
- return idxNew;
- }
-
- // Go to next branch (if any)
- nextBranch = (short)instruction[node + offsetNext];
- node += nextBranch;
- }
- while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH));
-
- // Failed to match any branch!
- return -1;
- }
-
- case OP_NOTHING:
- case OP_GOTO:
-
- // Just advance to the next node without doing anything
- break;
-
- case OP_END:
-
- // Match has succeeded!
- setParenEnd(0, idx);
- return idx;
-
- default:
-
- // Corrupt program
- internalError("Invalid opcode '" + opcode + "'");
- }
-
- // Advance to the next node in the program
- node = next;
- }
-
- // We "should" never end up here
- internalError("Corrupt program");
- return -1;
- }
-
- /**
- * Match the current regular expression program against the current
- * input string, starting at index i of the input string. This method
- * is only meant for internal use.
- *
- * @param i The input string index to start matching at
- * @return True if the input matched the expression
- */
- protected boolean matchAt(int i)
- {
- // Initialize start pointer, paren cache and paren count
- start0 = -1;
- end0 = -1;
- start1 = -1;
- end1 = -1;
- start2 = -1;
- end2 = -1;
- startn = null;
- endn = null;
- parenCount = 1;
- setParenStart(0, i);
-
- // Allocate backref arrays (unless optimizations indicate otherwise)
- if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
- {
- startBackref = new int[maxParen];
- endBackref = new int[maxParen];
- }
-
- // Match against string
- int idx;
- if ((idx = matchNodes(0, maxNode, i)) != -1)
- {
- setParenEnd(0, idx);
- return true;
- }
-
- // Didn't match
- parenCount = 0;
- return false;
- }
-
- /**
- * Matches the current regular expression program against a character array,
- * starting at a given index.
- *
- * @param search String to match against
- * @param i Index to start searching at
- * @return True if string matched
- */
- public boolean match(String search, int i)
- {
- return match(new StringCharacterIterator(search), i);
- }
-
- /**
- * Matches the current regular expression program against a character array,
- * starting at a given index.
- *
- * @param search String to match against
- * @param i Index to start searching at
- * @return True if string matched
- */
- public boolean match(CharacterIterator search, int i)
- {
- // There is no compiled program to search with!
- if (program == null)
- {
- // This should be uncommon enough to be an error case rather
- // than an exception (which would have to be handled everywhere)
- internalError("No RE program to run!");
- }
-
- // Save string to search
- this.search = search;
-
- // Can we optimize the search by looking for a prefix string?
- if (program.prefix == null)
- {
- // Unprefixed matching must try for a match at each character
- for ( ;! search.isEnd(i - 1); i++)
- {
- // Try a match at index i
- if (matchAt(i))
- {
- return true;
- }
- }
- return false;
- }
- else
- {
- // Prefix-anchored matching is possible
- boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
- char[] prefix = program.prefix;
- for ( ; !search.isEnd(i + prefix.length - 1); i++)
- {
- int j = i;
- int k = 0;
-
- boolean match;
- do {
- // If there's a mismatch of any character in the prefix, give up
- match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0);
- } while (match && k < prefix.length);
-
- // See if the whole prefix string matched
- if (k == prefix.length)
- {
- // We matched the full prefix at firstChar, so try it
- if (matchAt(i))
- {
- return true;
- }
- }
- }
- return false;
- }
- }
-
- /**
- * Matches the current regular expression program against a String.
- *
- * @param search String to match against
- * @return True if string matched
- */
- public boolean match(String search)
- {
- return match(search, 0);
- }
-
- /**
- * Splits a string into an array of strings on regular expression boundaries.
- * This function works the same way as the Perl function of the same name.
- * Given a regular expression of "[ab]+" and a string to split of
- * "xyzzyababbayyzabbbab123", the result would be the array of Strings
- * "[xyzzy, yyz, 123]".
- *
- * <p>Please note that the first string in the resulting array may be an empty
- * string. This happens when the very first character of input string is
- * matched by the pattern.
- *
- * @param s String to split on this regular exression
- * @return Array of strings
- */
- public String[] split(String s)
- {
- // Create new vector
- Vector v = new Vector();
-
- // Start at position 0 and search the whole string
- int pos = 0;
- int len = s.length();
-
- // Try a match at each position
- while (pos < len && match(s, pos))
- {
- // Get start of match
- int start = getParenStart(0);
-
- // Get end of match
- int newpos = getParenEnd(0);
-
- // Check if no progress was made
- if (newpos == pos)
- {
- v.addElement(s.substring(pos, start + 1));
- newpos++;
- }
- else
- {
- v.addElement(s.substring(pos, start));
- }
-
- // Move to new position
- pos = newpos;
- }
-
- // Push remainder if it's not empty
- String remainder = s.substring(pos);
- if (remainder.length() != 0)
- {
- v.addElement(remainder);
- }
-
- // Return vector as an array of strings
- String[] ret = new String[v.size()];
- v.copyInto(ret);
- return ret;
- }
-
- /**
- * Flag bit that indicates that subst should replace all occurrences of this
- * regular expression.
- */
- public static final int REPLACE_ALL = 0x0000;
-
- /**
- * Flag bit that indicates that subst should only replace the first occurrence
- * of this regular expression.
- */
- public static final int REPLACE_FIRSTONLY = 0x0001;
-
- /**
- * Flag bit that indicates that subst should replace backreferences
- */
- public static final int REPLACE_BACKREFERENCES = 0x0002;
-
- /**
- * Substitutes a string for this regular expression in another string.
- * This method works like the Perl function of the same name.
- * Given a regular expression of "a*b", a String to substituteIn of
- * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
- * resulting String returned by subst would be "-foo-garply-wacky-".
- *
- * @param substituteIn String to substitute within
- * @param substitution String to substitute for all matches of this regular expression.
- * @return The string substituteIn with zero or more occurrences of the current
- * regular expression replaced with the substitution String (if this regular
- * expression object doesn't match at any position, the original String is returned
- * unchanged).
- */
- public String subst(String substituteIn, String substitution)
- {
- return subst(substituteIn, substitution, REPLACE_ALL);
- }
-
- /**
- * Substitutes a string for this regular expression in another string.
- * This method works like the Perl function of the same name.
- * Given a regular expression of "a*b", a String to substituteIn of
- * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
- * resulting String returned by subst would be "-foo-garply-wacky-".
- * <p>
- * It is also possible to reference the contents of a parenthesized expression
- * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
- * a String to substituteIn of "visit us: http://www.apache.org!" and the
- * substitution String "<a href=\"$0\">$0</a>", the resulting String
- * returned by subst would be
- * "visit us: <a href=\"http://www.apache.org\">http://www.apache.org</a>!".
- * <p>
- * <i>Note:</i> $0 represents the whole match.
- *
- * @param substituteIn String to substitute within
- * @param substitution String to substitute for matches of this regular expression
- * @param flags One or more bitwise flags from REPLACE_*. If the REPLACE_FIRSTONLY
- * flag bit is set, only the first occurrence of this regular expression is replaced.
- * If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be
- * replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will
- * be processed.
- * @return The string substituteIn with zero or more occurrences of the current
- * regular expression replaced with the substitution String (if this regular
- * expression object doesn't match at any position, the original String is returned
- * unchanged).
- */
- public String subst(String substituteIn, String substitution, int flags)
- {
- // String to return
- StringBuffer ret = new StringBuffer();
-
- // Start at position 0 and search the whole string
- int pos = 0;
- int len = substituteIn.length();
-
- // Try a match at each position
- while (pos < len && match(substituteIn, pos))
- {
- // Append string before match
- ret.append(substituteIn.substring(pos, getParenStart(0)));
-
- if ((flags & REPLACE_BACKREFERENCES) != 0)
- {
- // Process backreferences
- int lCurrentPosition = 0;
- int lLastPosition = -2;
- int lLength = substitution.length();
- boolean bAddedPrefix = false;
-
- while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0)
- {
- if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\')
- && lCurrentPosition+1 < lLength)
- {
- char c = substitution.charAt(lCurrentPosition + 1);
- if (c >= '0' && c <= '9')
- {
- if (bAddedPrefix == false)
- {
- // Append everything between the beginning of the
- // substitution string and the current $ sign
- ret.append(substitution.substring(0, lCurrentPosition));
- bAddedPrefix = true;
- }
- else
- {
- // Append everything between the last and the current $ sign
- ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition));
- }
-
- // Append the parenthesized expression
- // Note: if a parenthesized expression of the requested
- // index is not available "null" is added to the string
- ret.append(getParen(c - '0'));
- lLastPosition = lCurrentPosition;
- }
- }
-
- // Move forward, skipping past match
- lCurrentPosition++;
- }
-
- // Append everything after the last $ sign
- ret.append(substitution.substring(lLastPosition + 2, lLength));
- }
- else
- {
- // Append substitution without processing backreferences
- ret.append(substitution);
- }
-
- // Move forward, skipping past match
- int newpos = getParenEnd(0);
-
- // We always want to make progress!
- if (newpos == pos)
- {
- newpos++;
- }
-
- // Try new position
- pos = newpos;
-
- // Break out if we're only supposed to replace one occurrence
- if ((flags & REPLACE_FIRSTONLY) != 0)
- {
- break;
- }
- }
-
- // If there's remaining input, append it
- if (pos < len)
- {
- ret.append(substituteIn.substring(pos));
- }
-
- // Return string buffer as string
- return ret.toString();
- }
-
- /**
- * Returns an array of Strings, whose toString representation matches a regular
- * expression. This method works like the Perl function of the same name. Given
- * a regular expression of "a*b" and an array of String objects of [foo, aab, zzz,
- * aaaab], the array of Strings returned by grep would be [aab, aaaab].
- *
- * @param search Array of Objects to search
- * @return Array of Strings whose toString() value matches this regular expression.
- */
- public String[] grep(Object[] search)
- {
- // Create new vector to hold return items
- Vector v = new Vector();
-
- // Traverse array of objects
- for (int i = 0; i < search.length; i++)
- {
- // Get next object as a string
- String s = search[i].toString();
-
- // If it matches this regexp, add it to the list
- if (match(s))
- {
- v.addElement(s);
- }
- }
-
- // Return vector as an array of strings
- String[] ret = new String[v.size()];
- v.copyInto(ret);
- return ret;
- }
-
- /**
- * @return true if character at i-th position in the <code>search</code> string is a newline
- */
- private boolean isNewline(int i)
- {
- char nextChar = search.charAt(i);
-
- if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
- || nextChar == '\u2028' || nextChar == '\u2029')
- {
- return true;
- }
-
- return false;
- }
-
- /**
- * Compares two characters.
- *
- * @param c1 first character to compare.
- * @param c2 second character to compare.
- * @param caseIndependent whether comparision is case insensitive or not.
- * @return negative, 0, or positive integer as the first character
- * less than, equal to, or greater then the second.
- */
- private int compareChars(char c1, char c2, boolean caseIndependent)
- {
- if (caseIndependent)
- {
- c1 = Character.toLowerCase(c1);
- c2 = Character.toLowerCase(c2);
- }
- return ((int)c1 - (int)c2);
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/RECompiler.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1520 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import com.sun.org.apache.regexp.internal.RE;
-import java.util.Hashtable;
-
-/**
- * A regular expression compiler class. This class compiles a pattern string into a
- * regular expression program interpretable by the RE evaluator class. The 'recompile'
- * command line tool uses this compiler to pre-compile regular expressions for use
- * with RE. For a description of the syntax accepted by RECompiler and what you can
- * do with regular expressions, see the documentation for the RE matcher class.
- *
- * @see RE
- * @see recompile
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
- */
-public class RECompiler
-{
- // The compiled program
- char[] instruction; // The compiled RE 'program' instruction buffer
- int lenInstruction; // The amount of the program buffer currently in use
-
- // Input state for compiling regular expression
- String pattern; // Input string
- int len; // Length of the pattern string
- int idx; // Current input index into ac
- int parens; // Total number of paren pairs
-
- // Node flags
- static final int NODE_NORMAL = 0; // No flags (nothing special)
- static final int NODE_NULLABLE = 1; // True if node is potentially null
- static final int NODE_TOPLEVEL = 2; // True if top level expr
-
- // Special types of 'escapes'
- static final int ESC_MASK = 0xffff0; // Escape complexity mask
- static final int ESC_BACKREF = 0xfffff; // Escape is really a backreference
- static final int ESC_COMPLEX = 0xffffe; // Escape isn't really a true character
- static final int ESC_CLASS = 0xffffd; // Escape represents a whole class of characters
-
- // {m,n} stacks
- int maxBrackets = 10; // Maximum number of bracket pairs
- static final int bracketUnbounded = -1; // Unbounded value
- int brackets = 0; // Number of bracket sets
- int[] bracketStart = null; // Starting point
- int[] bracketEnd = null; // Ending point
- int[] bracketMin = null; // Minimum number of matches
- int[] bracketOpt = null; // Additional optional matches
-
- // Lookup table for POSIX character class names
- static Hashtable hashPOSIX = new Hashtable();
- static
- {
- hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM));
- hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA));
- hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK));
- hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL));
- hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT));
- hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH));
- hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER));
- hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT));
- hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT));
- hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE));
- hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER));
- hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT));
- hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART));
- hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART));
- }
-
- /**
- * Constructor. Creates (initially empty) storage for a regular expression program.
- */
- public RECompiler()
- {
- // Start off with a generous, yet reasonable, initial size
- instruction = new char[128];
- lenInstruction = 0;
- }
-
- /**
- * Ensures that n more characters can fit in the program buffer.
- * If n more can't fit, then the size is doubled until it can.
- * @param n Number of additional characters to ensure will fit.
- */
- void ensure(int n)
- {
- // Get current program length
- int curlen = instruction.length;
-
- // If the current length + n more is too much
- if (lenInstruction + n >= curlen)
- {
- // Double the size of the program array until n more will fit
- while (lenInstruction + n >= curlen)
- {
- curlen *= 2;
- }
-
- // Allocate new program array and move data into it
- char[] newInstruction = new char[curlen];
- System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction);
- instruction = newInstruction;
- }
- }
-
- /**
- * Emit a single character into the program stream.
- * @param c Character to add
- */
- void emit(char c)
- {
- // Make room for character
- ensure(1);
-
- // Add character
- instruction[lenInstruction++] = c;
- }
-
- /**
- * Inserts a node with a given opcode and opdata at insertAt. The node relative next
- * pointer is initialized to 0.
- * @param opcode Opcode for new node
- * @param opdata Opdata for new node (only the low 16 bits are currently used)
- * @param insertAt Index at which to insert the new node in the program
- */
- void nodeInsert(char opcode, int opdata, int insertAt)
- {
- // Make room for a new node
- ensure(RE.nodeSize);
-
- // Move everything from insertAt to the end down nodeSize elements
- System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt);
- instruction[insertAt + RE.offsetOpcode] = opcode;
- instruction[insertAt + RE.offsetOpdata] = (char)opdata;
- instruction[insertAt + RE.offsetNext] = 0;
- lenInstruction += RE.nodeSize;
- }
-
- /**
- * Appends a node to the end of a node chain
- * @param node Start of node chain to traverse
- * @param pointTo Node to have the tail of the chain point to
- */
- void setNextOfEnd(int node, int pointTo)
- {
- // Traverse the chain until the next offset is 0
- int next = instruction[node + RE.offsetNext];
- // while the 'node' is not the last in the chain
- // and the 'node' is not the last in the program.
- while ( next != 0 && node < lenInstruction )
- {
- // if the node we are supposed to point to is in the chain then
- // point to the end of the program instead.
- // Michael McCallum <gholam@xtra.co.nz>
- // FIXME: // This is a _hack_ to stop infinite programs.
- // I believe that the implementation of the reluctant matches is wrong but
- // have not worked out a better way yet.
- if ( node == pointTo ) {
- pointTo = lenInstruction;
- }
- node += next;
- next = instruction[node + RE.offsetNext];
- }
- // if we have reached the end of the program then dont set the pointTo.
- // im not sure if this will break any thing but passes all the tests.
- if ( node < lenInstruction ) {
- // Point the last node in the chain to pointTo.
- instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
- }
- }
-
- /**
- * Adds a new node
- * @param opcode Opcode for node
- * @param opdata Opdata for node (only the low 16 bits are currently used)
- * @return Index of new node in program
- */
- int node(char opcode, int opdata)
- {
- // Make room for a new node
- ensure(RE.nodeSize);
-
- // Add new node at end
- instruction[lenInstruction + RE.offsetOpcode] = opcode;
- instruction[lenInstruction + RE.offsetOpdata] = (char)opdata;
- instruction[lenInstruction + RE.offsetNext] = 0;
- lenInstruction += RE.nodeSize;
-
- // Return index of new node
- return lenInstruction - RE.nodeSize;
- }
-
-
- /**
- * Throws a new internal error exception
- * @exception Error Thrown in the event of an internal error.
- */
- void internalError() throws Error
- {
- throw new Error("Internal error!");
- }
-
- /**
- * Throws a new syntax error exception
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- void syntaxError(String s) throws RESyntaxException
- {
- throw new RESyntaxException(s);
- }
-
- /**
- * Allocate storage for brackets only as needed
- */
- void allocBrackets()
- {
- // Allocate bracket stacks if not already done
- if (bracketStart == null)
- {
- // Allocate storage
- bracketStart = new int[maxBrackets];
- bracketEnd = new int[maxBrackets];
- bracketMin = new int[maxBrackets];
- bracketOpt = new int[maxBrackets];
-
- // Initialize to invalid values
- for (int i = 0; i < maxBrackets; i++)
- {
- bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1;
- }
- }
- }
-
- /** Enlarge storage for brackets only as needed. */
- synchronized void reallocBrackets() {
- // trick the tricky
- if (bracketStart == null) {
- allocBrackets();
- }
-
- int new_size = maxBrackets * 2;
- int[] new_bS = new int[new_size];
- int[] new_bE = new int[new_size];
- int[] new_bM = new int[new_size];
- int[] new_bO = new int[new_size];
- // Initialize to invalid values
- for (int i=brackets; i<new_size; i++) {
- new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1;
- }
- System.arraycopy(bracketStart,0, new_bS,0, brackets);
- System.arraycopy(bracketEnd,0, new_bE,0, brackets);
- System.arraycopy(bracketMin,0, new_bM,0, brackets);
- System.arraycopy(bracketOpt,0, new_bO,0, brackets);
- bracketStart = new_bS;
- bracketEnd = new_bE;
- bracketMin = new_bM;
- bracketOpt = new_bO;
- maxBrackets = new_size;
- }
-
- /**
- * Match bracket {m,n} expression put results in bracket member variables
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- void bracket() throws RESyntaxException
- {
- // Current character must be a '{'
- if (idx >= len || pattern.charAt(idx++) != '{')
- {
- internalError();
- }
-
- // Next char must be a digit
- if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
- {
- syntaxError("Expected digit");
- }
-
- // Get min ('m' of {m,n}) number
- StringBuffer number = new StringBuffer();
- while (idx < len && Character.isDigit(pattern.charAt(idx)))
- {
- number.append(pattern.charAt(idx++));
- }
- try
- {
- bracketMin[brackets] = Integer.parseInt(number.toString());
- }
- catch (NumberFormatException e)
- {
- syntaxError("Expected valid number");
- }
-
- // If out of input, fail
- if (idx >= len)
- {
- syntaxError("Expected comma or right bracket");
- }
-
- // If end of expr, optional limit is 0
- if (pattern.charAt(idx) == '}')
- {
- idx++;
- bracketOpt[brackets] = 0;
- return;
- }
-
- // Must have at least {m,} and maybe {m,n}.
- if (idx >= len || pattern.charAt(idx++) != ',')
- {
- syntaxError("Expected comma");
- }
-
- // If out of input, fail
- if (idx >= len)
- {
- syntaxError("Expected comma or right bracket");
- }
-
- // If {m,} max is unlimited
- if (pattern.charAt(idx) == '}')
- {
- idx++;
- bracketOpt[brackets] = bracketUnbounded;
- return;
- }
-
- // Next char must be a digit
- if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
- {
- syntaxError("Expected digit");
- }
-
- // Get max number
- number.setLength(0);
- while (idx < len && Character.isDigit(pattern.charAt(idx)))
- {
- number.append(pattern.charAt(idx++));
- }
- try
- {
- bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
- }
- catch (NumberFormatException e)
- {
- syntaxError("Expected valid number");
- }
-
- // Optional repetitions must be >= 0
- if (bracketOpt[brackets] < 0)
- {
- syntaxError("Bad range");
- }
-
- // Must have close brace
- if (idx >= len || pattern.charAt(idx++) != '}')
- {
- syntaxError("Missing close brace");
- }
- }
-
- /**
- * Match an escape sequence. Handles quoted chars and octal escapes as well
- * as normal escape characters. Always advances the input stream by the
- * right amount. This code "understands" the subtle difference between an
- * octal escape and a backref. You can access the type of ESC_CLASS or
- * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1].
- * @return ESC_* code or character if simple escape
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int escape() throws RESyntaxException
- {
- // "Shouldn't" happen
- if (pattern.charAt(idx) != '\\')
- {
- internalError();
- }
-
- // Escape shouldn't occur as last character in string!
- if (idx + 1 == len)
- {
- syntaxError("Escape terminates string");
- }
-
- // Switch on character after backslash
- idx += 2;
- char escapeChar = pattern.charAt(idx - 1);
- switch (escapeChar)
- {
- case RE.E_BOUND:
- case RE.E_NBOUND:
- return ESC_COMPLEX;
-
- case RE.E_ALNUM:
- case RE.E_NALNUM:
- case RE.E_SPACE:
- case RE.E_NSPACE:
- case RE.E_DIGIT:
- case RE.E_NDIGIT:
- return ESC_CLASS;
-
- case 'u':
- case 'x':
- {
- // Exact required hex digits for escape type
- int hexDigits = (escapeChar == 'u' ? 4 : 2);
-
- // Parse up to hexDigits characters from input
- int val = 0;
- for ( ; idx < len && hexDigits-- > 0; idx++)
- {
- // Get char
- char c = pattern.charAt(idx);
-
- // If it's a hexadecimal digit (0-9)
- if (c >= '0' && c <= '9')
- {
- // Compute new value
- val = (val << 4) + c - '0';
- }
- else
- {
- // If it's a hexadecimal letter (a-f)
- c = Character.toLowerCase(c);
- if (c >= 'a' && c <= 'f')
- {
- // Compute new value
- val = (val << 4) + (c - 'a') + 10;
- }
- else
- {
- // If it's not a valid digit or hex letter, the escape must be invalid
- // because hexDigits of input have not been absorbed yet.
- syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar);
- }
- }
- }
- return val;
- }
-
- case 't':
- return '\t';
-
- case 'n':
- return '\n';
-
- case 'r':
- return '\r';
-
- case 'f':
- return '\f';
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
-
- // An octal escape starts with a 0 or has two digits in a row
- if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0')
- {
- // Handle \nnn octal escapes
- int val = escapeChar - '0';
- if (idx < len && Character.isDigit(pattern.charAt(idx)))
- {
- val = ((val << 3) + (pattern.charAt(idx++) - '0'));
- if (idx < len && Character.isDigit(pattern.charAt(idx)))
- {
- val = ((val << 3) + (pattern.charAt(idx++) - '0'));
- }
- }
- return val;
- }
-
- // It's actually a backreference (\[1-9]), not an escape
- return ESC_BACKREF;
-
- default:
-
- // Simple quoting of a character
- return escapeChar;
- }
- }
-
- /**
- * Compile a character class
- * @return Index of class node
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int characterClass() throws RESyntaxException
- {
- // Check for bad calling or empty class
- if (pattern.charAt(idx) != '[')
- {
- internalError();
- }
-
- // Check for unterminated or empty class
- if ((idx + 1) >= len || pattern.charAt(++idx) == ']')
- {
- syntaxError("Empty or unterminated class");
- }
-
- // Check for POSIX character class
- if (idx < len && pattern.charAt(idx) == ':')
- {
- // Skip colon
- idx++;
-
- // POSIX character classes are denoted with lowercase ASCII strings
- int idxStart = idx;
- while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z')
- {
- idx++;
- }
-
- // Should be a ":]" to terminate the POSIX character class
- if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']')
- {
- // Get character class
- String charClass = pattern.substring(idxStart, idx);
-
- // Select the POSIX class id
- Character i = (Character)hashPOSIX.get(charClass);
- if (i != null)
- {
- // Move past colon and right bracket
- idx += 2;
-
- // Return new POSIX character class node
- return node(RE.OP_POSIXCLASS, i.charValue());
- }
- syntaxError("Invalid POSIX character class '" + charClass + "'");
- }
- syntaxError("Invalid POSIX character class syntax");
- }
-
- // Try to build a class. Create OP_ANYOF node
- int ret = node(RE.OP_ANYOF, 0);
-
- // Parse class declaration
- char CHAR_INVALID = Character.MAX_VALUE;
- char last = CHAR_INVALID;
- char simpleChar = 0;
- boolean include = true;
- boolean definingRange = false;
- int idxFirst = idx;
- char rangeStart = Character.MIN_VALUE;
- char rangeEnd;
- RERange range = new RERange();
- while (idx < len && pattern.charAt(idx) != ']')
- {
-
- switchOnCharacter:
-
- // Switch on character
- switch (pattern.charAt(idx))
- {
- case '^':
- include = !include;
- if (idx == idxFirst)
- {
- range.include(Character.MIN_VALUE, Character.MAX_VALUE, true);
- }
- idx++;
- continue;
-
- case '\\':
- {
- // Escape always advances the stream
- int c;
- switch (c = escape ())
- {
- case ESC_COMPLEX:
- case ESC_BACKREF:
-
- // Word boundaries and backrefs not allowed in a character class!
- syntaxError("Bad character class");
-
- case ESC_CLASS:
-
- // Classes can't be an endpoint of a range
- if (definingRange)
- {
- syntaxError("Bad character class");
- }
-
- // Handle specific type of class (some are ok)
- switch (pattern.charAt(idx - 1))
- {
- case RE.E_NSPACE:
- case RE.E_NDIGIT:
- case RE.E_NALNUM:
- syntaxError("Bad character class");
-
- case RE.E_SPACE:
- range.include('\t', include);
- range.include('\r', include);
- range.include('\f', include);
- range.include('\n', include);
- range.include('\b', include);
- range.include(' ', include);
- break;
-
- case RE.E_ALNUM:
- range.include('a', 'z', include);
- range.include('A', 'Z', include);
- range.include('_', include);
-
- // Fall through!
-
- case RE.E_DIGIT:
- range.include('0', '9', include);
- break;
- }
-
- // Make last char invalid (can't be a range start)
- last = CHAR_INVALID;
- break;
-
- default:
-
- // Escape is simple so treat as a simple char
- simpleChar = (char) c;
- break switchOnCharacter;
- }
- }
- continue;
-
- case '-':
-
- // Start a range if one isn't already started
- if (definingRange)
- {
- syntaxError("Bad class range");
- }
- definingRange = true;
-
- // If no last character, start of range is 0
- rangeStart = (last == CHAR_INVALID ? 0 : last);
-
- // Premature end of range. define up to Character.MAX_VALUE
- if ((idx + 1) < len && pattern.charAt(++idx) == ']')
- {
- simpleChar = Character.MAX_VALUE;
- break;
- }
- continue;
-
- default:
- simpleChar = pattern.charAt(idx++);
- break;
- }
-
- // Handle simple character simpleChar
- if (definingRange)
- {
- // if we are defining a range make it now
- rangeEnd = simpleChar;
-
- // Actually create a range if the range is ok
- if (rangeStart >= rangeEnd)
- {
- syntaxError("Bad character class");
- }
- range.include(rangeStart, rangeEnd, include);
-
- // We are done defining the range
- last = CHAR_INVALID;
- definingRange = false;
- }
- else
- {
- // If simple character and not start of range, include it
- if (idx >= len || pattern.charAt(idx) != '-')
- {
- range.include(simpleChar, include);
- }
- last = simpleChar;
- }
- }
-
- // Shouldn't be out of input
- if (idx == len)
- {
- syntaxError("Unterminated character class");
- }
-
- // Absorb the ']' end of class marker
- idx++;
-
- // Emit character class definition
- instruction[ret + RE.offsetOpdata] = (char)range.num;
- for (int i = 0; i < range.num; i++)
- {
- emit((char)range.minRange[i]);
- emit((char)range.maxRange[i]);
- }
- return ret;
- }
-
- /**
- * Absorb an atomic character string. This method is a little tricky because
- * it can un-include the last character of string if a closure operator follows.
- * This is correct because *+? have higher precedence than concatentation (thus
- * ABC* means AB(C*) and NOT (ABC)*).
- * @return Index of new atom node
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int atom() throws RESyntaxException
- {
- // Create a string node
- int ret = node(RE.OP_ATOM, 0);
-
- // Length of atom
- int lenAtom = 0;
-
- // Loop while we've got input
-
- atomLoop:
-
- while (idx < len)
- {
- // Is there a next char?
- if ((idx + 1) < len)
- {
- char c = pattern.charAt(idx + 1);
-
- // If the next 'char' is an escape, look past the whole escape
- if (pattern.charAt(idx) == '\\')
- {
- int idxEscape = idx;
- escape();
- if (idx < len)
- {
- c = pattern.charAt(idx);
- }
- idx = idxEscape;
- }
-
- // Switch on next char
- switch (c)
- {
- case '{':
- case '?':
- case '*':
- case '+':
-
- // If the next character is a closure operator and our atom is non-empty, the
- // current character should bind to the closure operator rather than the atom
- if (lenAtom != 0)
- {
- break atomLoop;
- }
- }
- }
-
- // Switch on current char
- switch (pattern.charAt(idx))
- {
- case ']':
- case '^':
- case '$':
- case '.':
- case '[':
- case '(':
- case ')':
- case '|':
- break atomLoop;
-
- case '{':
- case '?':
- case '*':
- case '+':
-
- // We should have an atom by now
- if (lenAtom == 0)
- {
- // No atom before closure
- syntaxError("Missing operand to closure");
- }
- break atomLoop;
-
- case '\\':
-
- {
- // Get the escaped character (advances input automatically)
- int idxBeforeEscape = idx;
- int c = escape();
-
- // Check if it's a simple escape (as opposed to, say, a backreference)
- if ((c & ESC_MASK) == ESC_MASK)
- {
- // Not a simple escape, so backup to where we were before the escape.
- idx = idxBeforeEscape;
- break atomLoop;
- }
-
- // Add escaped char to atom
- emit((char) c);
- lenAtom++;
- }
- break;
-
- default:
-
- // Add normal character to atom
- emit(pattern.charAt(idx++));
- lenAtom++;
- break;
- }
- }
-
- // This "shouldn't" happen
- if (lenAtom == 0)
- {
- internalError();
- }
-
- // Emit the atom length into the program
- instruction[ret + RE.offsetOpdata] = (char)lenAtom;
- return ret;
- }
-
- /**
- * Match a terminal node.
- * @param flags Flags
- * @return Index of terminal node (closeable)
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int terminal(int[] flags) throws RESyntaxException
- {
- switch (pattern.charAt(idx))
- {
- case RE.OP_EOL:
- case RE.OP_BOL:
- case RE.OP_ANY:
- return node(pattern.charAt(idx++), 0);
-
- case '[':
- return characterClass();
-
- case '(':
- return expr(flags);
-
- case ')':
- syntaxError("Unexpected close paren");
-
- case '|':
- internalError();
-
- case ']':
- syntaxError("Mismatched class");
-
- case 0:
- syntaxError("Unexpected end of input");
-
- case '?':
- case '+':
- case '{':
- case '*':
- syntaxError("Missing operand to closure");
-
- case '\\':
- {
- // Don't forget, escape() advances the input stream!
- int idxBeforeEscape = idx;
-
- // Switch on escaped character
- switch (escape())
- {
- case ESC_CLASS:
- case ESC_COMPLEX:
- flags[0] &= ~NODE_NULLABLE;
- return node(RE.OP_ESCAPE, pattern.charAt(idx - 1));
-
- case ESC_BACKREF:
- {
- char backreference = (char)(pattern.charAt(idx - 1) - '0');
- if (parens <= backreference)
- {
- syntaxError("Bad backreference");
- }
- flags[0] |= NODE_NULLABLE;
- return node(RE.OP_BACKREF, backreference);
- }
-
- default:
-
- // We had a simple escape and we want to have it end up in
- // an atom, so we back up and fall though to the default handling
- idx = idxBeforeEscape;
- flags[0] &= ~NODE_NULLABLE;
- break;
- }
- }
- }
-
- // Everything above either fails or returns.
- // If it wasn't one of the above, it must be the start of an atom.
- flags[0] &= ~NODE_NULLABLE;
- return atom();
- }
-
- /**
- * Compile a possibly closured terminal
- * @param flags Flags passed by reference
- * @return Index of closured node
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int closure(int[] flags) throws RESyntaxException
- {
- // Before terminal
- int idxBeforeTerminal = idx;
-
- // Values to pass by reference to terminal()
- int[] terminalFlags = { NODE_NORMAL };
-
- // Get terminal symbol
- int ret = terminal(terminalFlags);
-
- // Or in flags from terminal symbol
- flags[0] |= terminalFlags[0];
-
- // Advance input, set NODE_NULLABLE flag and do sanity checks
- if (idx >= len)
- {
- return ret;
- }
- boolean greedy = true;
- char closureType = pattern.charAt(idx);
- switch (closureType)
- {
- case '?':
- case '*':
-
- // The current node can be null
- flags[0] |= NODE_NULLABLE;
-
- case '+':
-
- // Eat closure character
- idx++;
-
- case '{':
-
- // Don't allow blantant stupidity
- int opcode = instruction[ret + RE.offsetOpcode];
- if (opcode == RE.OP_BOL || opcode == RE.OP_EOL)
- {
- syntaxError("Bad closure operand");
- }
- if ((terminalFlags[0] & NODE_NULLABLE) != 0)
- {
- syntaxError("Closure operand can't be nullable");
- }
- break;
- }
-
- // If the next character is a '?', make the closure non-greedy (reluctant)
- if (idx < len && pattern.charAt(idx) == '?')
- {
- idx++;
- greedy = false;
- }
-
- if (greedy)
- {
- // Actually do the closure now
- switch (closureType)
- {
- case '{':
- {
- // We look for our bracket in the list
- boolean found = false;
- int i;
- allocBrackets();
- for (i = 0; i < brackets; i++)
- {
- if (bracketStart[i] == idx)
- {
- found = true;
- break;
- }
- }
-
- // If its not in the list we parse the {m,n}
- if (!found)
- {
- if (brackets >= maxBrackets)
- {
- reallocBrackets();
- }
- bracketStart[brackets] = idx;
- bracket();
- bracketEnd[brackets] = idx;
- i = brackets++;
- }
-
- // Process min first
- if (bracketMin[i]-- > 0)
- {
- if (bracketMin[i] > 0 || bracketOpt[i] != 0) {
- // Rewind stream and run it through again - more matchers coming
- for (int j = 0; j < brackets; j++) {
- if (j != i && bracketStart[j] < idx
- && bracketStart[j] >= idxBeforeTerminal)
- {
- brackets--;
- bracketStart[j] = bracketStart[brackets];
- bracketEnd[j] = bracketEnd[brackets];
- bracketMin[j] = bracketMin[brackets];
- bracketOpt[j] = bracketOpt[brackets];
- }
- }
-
- idx = idxBeforeTerminal;
- } else {
- // Bug #1030: No optinal matches - no need to rewind
- idx = bracketEnd[i];
- }
- break;
- }
-
- // Do the right thing for maximum ({m,})
- if (bracketOpt[i] == bracketUnbounded)
- {
- // Drop through now and closure expression.
- // We are done with the {m,} expr, so skip rest
- closureType = '*';
- bracketOpt[i] = 0;
- idx = bracketEnd[i];
- }
- else
- if (bracketOpt[i]-- > 0)
- {
- if (bracketOpt[i] > 0)
- {
- // More optional matchers - 'play it again sam!'
- idx = idxBeforeTerminal;
- } else {
- // Bug #1030: We are done - this one is last and optional
- idx = bracketEnd[i];
- }
- // Drop through to optionally close
- closureType = '?';
- }
- else
- {
- // Rollback terminal - neither min nor opt matchers present
- lenInstruction = ret;
- node(RE.OP_NOTHING, 0);
-
- // We are done. skip the rest of {m,n} expr
- idx = bracketEnd[i];
- break;
- }
- }
-
- // Fall through!
-
- case '?':
- case '*':
-
- if (!greedy)
- {
- break;
- }
-
- if (closureType == '?')
- {
- // X? is compiled as (X|)
- nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X
- setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); // inserted branch to option
- int nothing = node (RE.OP_NOTHING, 0); // which is OP_NOTHING
- setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING
- setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node
- }
-
- if (closureType == '*')
- {
- // X* is compiled as (X{gotoX}|)
- nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X
- setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); // end of X points to an option
- setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); // to goto
- setNextOfEnd(ret + RE.nodeSize, ret); // the start again
- setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); // the other option is
- setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING
- }
- break;
-
- case '+':
- {
- // X+ is compiled as X({gotoX}|)
- int branch;
- branch = node(RE.OP_BRANCH, 0); // a new branch
- setNextOfEnd(ret, branch); // is added to the end of X
- setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one option is to go back to the start
- setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the other option
- setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is OP_NOTHING
- }
- break;
- }
- }
- else
- {
- // Add end after closured subexpr
- setNextOfEnd(ret, node(RE.OP_END, 0));
-
- // Actually do the closure now
- switch (closureType)
- {
- case '?':
- nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
- break;
-
- case '*':
- nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
- break;
-
- case '+':
- nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
- break;
- }
-
- // Point to the expr after the closure
- setNextOfEnd(ret, lenInstruction);
- }
- return ret;
- }
-
- /**
- * Compile one branch of an or operator (implements concatenation)
- * @param flags Flags passed by reference
- * @return Pointer to branch node
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int branch(int[] flags) throws RESyntaxException
- {
- // Get each possibly closured piece and concat
- int node;
- int ret = node(RE.OP_BRANCH, 0);
- int chain = -1;
- int[] closureFlags = new int[1];
- boolean nullable = true;
- while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')')
- {
- // Get new node
- closureFlags[0] = NODE_NORMAL;
- node = closure(closureFlags);
- if (closureFlags[0] == NODE_NORMAL)
- {
- nullable = false;
- }
-
- // If there's a chain, append to the end
- if (chain != -1)
- {
- setNextOfEnd(chain, node);
- }
-
- // Chain starts at current
- chain = node;
- }
-
- // If we don't run loop, make a nothing node
- if (chain == -1)
- {
- node(RE.OP_NOTHING, 0);
- }
-
- // Set nullable flag for this branch
- if (nullable)
- {
- flags[0] |= NODE_NULLABLE;
- }
- return ret;
- }
-
- /**
- * Compile an expression with possible parens around it. Paren matching
- * is done at this level so we can tie the branch tails together.
- * @param flags Flag value passed by reference
- * @return Node index of expression in instruction array
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- */
- int expr(int[] flags) throws RESyntaxException
- {
- // Create open paren node unless we were called from the top level (which has no parens)
- int paren = -1;
- int ret = -1;
- int closeParens = parens;
- if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(')
- {
- // if its a cluster ( rather than a proper subexpression ie with backrefs )
- if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' )
- {
- paren = 2;
- idx += 3;
- ret = node( RE.OP_OPEN_CLUSTER, 0 );
- }
- else
- {
- paren = 1;
- idx++;
- ret = node(RE.OP_OPEN, parens++);
- }
- }
- flags[0] &= ~NODE_TOPLEVEL;
-
- // Create a branch node
- int branch = branch(flags);
- if (ret == -1)
- {
- ret = branch;
- }
- else
- {
- setNextOfEnd(ret, branch);
- }
-
- // Loop through branches
- while (idx < len && pattern.charAt(idx) == '|')
- {
- idx++;
- branch = branch(flags);
- setNextOfEnd(ret, branch);
- }
-
- // Create an ending node (either a close paren or an OP_END)
- int end;
- if ( paren > 0 )
- {
- if (idx < len && pattern.charAt(idx) == ')')
- {
- idx++;
- }
- else
- {
- syntaxError("Missing close paren");
- }
- if ( paren == 1 )
- {
- end = node(RE.OP_CLOSE, closeParens);
- }
- else
- {
- end = node( RE.OP_CLOSE_CLUSTER, 0 );
- }
- }
- else
- {
- end = node(RE.OP_END, 0);
- }
-
- // Append the ending node to the ret nodelist
- setNextOfEnd(ret, end);
-
- // Hook the ends of each branch to the end node
- int currentNode = ret;
- int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
- // while the next node o
- while ( nextNodeOffset != 0 && currentNode < lenInstruction )
- {
- // If branch, make the end of the branch's operand chain point to the end node.
- if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
- {
- setNextOfEnd( currentNode + RE.nodeSize, end );
- }
- nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
- currentNode += nextNodeOffset;
- }
-
- // Return the node list
- return ret;
- }
-
- /**
- * Compiles a regular expression pattern into a program runnable by the pattern
- * matcher class 'RE'.
- * @param pattern Regular expression pattern to compile (see RECompiler class
- * for details).
- * @return A compiled regular expression program.
- * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
- * @see RECompiler
- * @see RE
- */
- public REProgram compile(String pattern) throws RESyntaxException
- {
- // Initialize variables for compilation
- this.pattern = pattern; // Save pattern in instance variable
- len = pattern.length(); // Precompute pattern length for speed
- idx = 0; // Set parsing index to the first character
- lenInstruction = 0; // Set emitted instruction count to zero
- parens = 1; // Set paren level to 1 (the implicit outer parens)
- brackets = 0; // No bracketed closures yet
-
- // Initialize pass by reference flags value
- int[] flags = { NODE_TOPLEVEL };
-
- // Parse expression
- expr(flags);
-
- // Should be at end of input
- if (idx != len)
- {
- if (pattern.charAt(idx) == ')')
- {
- syntaxError("Unmatched close paren");
- }
- syntaxError("Unexpected input remains");
- }
-
- // Return the result
- char[] ins = new char[lenInstruction];
- System.arraycopy(instruction, 0, ins, 0, lenInstruction);
- return new REProgram(parens, ins);
- }
-
- /**
- * Local, nested class for maintaining character ranges for character classes.
- */
- class RERange
- {
- int size = 16; // Capacity of current range arrays
- int[] minRange = new int[size]; // Range minima
- int[] maxRange = new int[size]; // Range maxima
- int num = 0; // Number of range array elements in use
-
- /**
- * Deletes the range at a given index from the range lists
- * @param index Index of range to delete from minRange and maxRange arrays.
- */
- void delete(int index)
- {
- // Return if no elements left or index is out of range
- if (num == 0 || index >= num)
- {
- return;
- }
-
- // Move elements down
- while (++index < num)
- {
- if (index - 1 >= 0)
- {
- minRange[index-1] = minRange[index];
- maxRange[index-1] = maxRange[index];
- }
- }
-
- // One less element now
- num--;
- }
-
- /**
- * Merges a range into the range list, coalescing ranges if possible.
- * @param min Minimum end of range
- * @param max Maximum end of range
- */
- void merge(int min, int max)
- {
- // Loop through ranges
- for (int i = 0; i < num; i++)
- {
- // Min-max is subsumed by minRange[i]-maxRange[i]
- if (min >= minRange[i] && max <= maxRange[i])
- {
- return;
- }
-
- // Min-max subsumes minRange[i]-maxRange[i]
- else if (min <= minRange[i] && max >= maxRange[i])
- {
- delete(i);
- merge(min, max);
- return;
- }
-
- // Min is in the range, but max is outside
- else if (min >= minRange[i] && min <= maxRange[i])
- {
- delete(i);
- min = minRange[i];
- merge(min, max);
- return;
- }
-
- // Max is in the range, but min is outside
- else if (max >= minRange[i] && max <= maxRange[i])
- {
- delete(i);
- max = maxRange[i];
- merge(min, max);
- return;
- }
- }
-
- // Must not overlap any other ranges
- if (num >= size)
- {
- size *= 2;
- int[] newMin = new int[size];
- int[] newMax = new int[size];
- System.arraycopy(minRange, 0, newMin, 0, num);
- System.arraycopy(maxRange, 0, newMax, 0, num);
- minRange = newMin;
- maxRange = newMax;
- }
- minRange[num] = min;
- maxRange[num] = max;
- num++;
- }
-
- /**
- * Removes a range by deleting or shrinking all other ranges
- * @param min Minimum end of range
- * @param max Maximum end of range
- */
- void remove(int min, int max)
- {
- // Loop through ranges
- for (int i = 0; i < num; i++)
- {
- // minRange[i]-maxRange[i] is subsumed by min-max
- if (minRange[i] >= min && maxRange[i] <= max)
- {
- delete(i);
- i--;
- return;
- }
-
- // min-max is subsumed by minRange[i]-maxRange[i]
- else if (min >= minRange[i] && max <= maxRange[i])
- {
- int minr = minRange[i];
- int maxr = maxRange[i];
- delete(i);
- if (minr < min)
- {
- merge(minr, min - 1);
- }
- if (max < maxr)
- {
- merge(max + 1, maxr);
- }
- return;
- }
-
- // minRange is in the range, but maxRange is outside
- else if (minRange[i] >= min && minRange[i] <= max)
- {
- minRange[i] = max + 1;
- return;
- }
-
- // maxRange is in the range, but minRange is outside
- else if (maxRange[i] >= min && maxRange[i] <= max)
- {
- maxRange[i] = min - 1;
- return;
- }
- }
- }
-
- /**
- * Includes (or excludes) the range from min to max, inclusive.
- * @param min Minimum end of range
- * @param max Maximum end of range
- * @param include True if range should be included. False otherwise.
- */
- void include(int min, int max, boolean include)
- {
- if (include)
- {
- merge(min, max);
- }
- else
- {
- remove(min, max);
- }
- }
-
- /**
- * Includes a range with the same min and max
- * @param minmax Minimum and maximum end of range (inclusive)
- * @param include True if range should be included. False otherwise.
- */
- void include(char minmax, boolean include)
- {
- include(minmax, minmax, include);
- }
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/REDebugCompiler.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import java.io.PrintWriter;
-import java.util.Hashtable;
-
-/**
- * A subclass of RECompiler which can dump a regular expression program
- * for debugging purposes.
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- */
-public class REDebugCompiler extends RECompiler
-{
- /**
- * Mapping from opcodes to descriptive strings
- */
- static Hashtable hashOpcode = new Hashtable();
- static
- {
- hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR), "OP_RELUCTANTSTAR");
- hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS), "OP_RELUCTANTPLUS");
- hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE), "OP_RELUCTANTMAYBE");
- hashOpcode.put(new Integer(RE.OP_END), "OP_END");
- hashOpcode.put(new Integer(RE.OP_BOL), "OP_BOL");
- hashOpcode.put(new Integer(RE.OP_EOL), "OP_EOL");
- hashOpcode.put(new Integer(RE.OP_ANY), "OP_ANY");
- hashOpcode.put(new Integer(RE.OP_ANYOF), "OP_ANYOF");
- hashOpcode.put(new Integer(RE.OP_BRANCH), "OP_BRANCH");
- hashOpcode.put(new Integer(RE.OP_ATOM), "OP_ATOM");
- hashOpcode.put(new Integer(RE.OP_STAR), "OP_STAR");
- hashOpcode.put(new Integer(RE.OP_PLUS), "OP_PLUS");
- hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE");
- hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING");
- hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO");
- hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE");
- hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN");
- hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE");
- hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF");
- hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS");
- hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER), "OP_OPEN_CLUSTER");
- hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER), "OP_CLOSE_CLUSTER");
- }
-
- /**
- * Returns a descriptive string for an opcode.
- * @param opcode Opcode to convert to a string
- * @return Description of opcode
- */
- String opcodeToString(char opcode)
- {
- // Get string for opcode
- String ret =(String)hashOpcode.get(new Integer(opcode));
-
- // Just in case we have a corrupt program
- if (ret == null)
- {
- ret = "OP_????";
- }
- return ret;
- }
-
- /**
- * Return a string describing a (possibly unprintable) character.
- * @param c Character to convert to a printable representation
- * @return String representation of character
- */
- String charToString(char c)
- {
- // If it's unprintable, convert to '\###'
- if (c < ' ' || c > 127)
- {
- return "\\" + (int)c;
- }
-
- // Return the character as a string
- return String.valueOf(c);
- }
-
- /**
- * Returns a descriptive string for a node in a regular expression program.
- * @param node Node to describe
- * @return Description of node
- */
- String nodeToString(int node)
- {
- // Get opcode and opdata for node
- char opcode = instruction[node + RE.offsetOpcode];
- int opdata = (int)instruction[node + RE.offsetOpdata];
-
- // Return opcode as a string and opdata value
- return opcodeToString(opcode) + ", opdata = " + opdata;
- }
-
- /**
- * Inserts a node with a given opcode and opdata at insertAt. The node relative next
- * pointer is initialized to 0.
- * @param opcode Opcode for new node
- * @param opdata Opdata for new node (only the low 16 bits are currently used)
- * @param insertAt Index at which to insert the new node in the program * /
- void nodeInsert(char opcode, int opdata, int insertAt) {
- System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
- PrintWriter writer = new PrintWriter( System.out );
- dumpProgram( writer );
- super.nodeInsert( opcode, opdata, insertAt );
- System.out.println( "====< " );
- dumpProgram( writer );
- writer.flush();
- }/**/
-
-
- /**
- * Appends a node to the end of a node chain
- * @param node Start of node chain to traverse
- * @param pointTo Node to have the tail of the chain point to * /
- void setNextOfEnd(int node, int pointTo) {
- System.out.println( "====> " + node + " " + pointTo );
- PrintWriter writer = new PrintWriter( System.out );
- dumpProgram( writer );
- super.setNextOfEnd( node, pointTo );
- System.out.println( "====< " );
- dumpProgram( writer );
- writer.flush();
- }/**/
-
-
- /**
- * Dumps the current program to a PrintWriter
- * @param p PrintWriter for program dump output
- */
- public void dumpProgram(PrintWriter p)
- {
- // Loop through the whole program
- for (int i = 0; i < lenInstruction; )
- {
- // Get opcode, opdata and next fields of current program node
- char opcode = instruction[i + RE.offsetOpcode];
- char opdata = instruction[i + RE.offsetOpdata];
- short next = (short)instruction[i + RE.offsetNext];
-
- // Display the current program node
- p.print(i + ". " + nodeToString(i) + ", next = ");
-
- // If there's no next, say 'none', otherwise give absolute index of next node
- if (next == 0)
- {
- p.print("none");
- }
- else
- {
- p.print(i + next);
- }
-
- // Move past node
- i += RE.nodeSize;
-
- // If character class
- if (opcode == RE.OP_ANYOF)
- {
- // Opening bracket for start of char class
- p.print(", [");
-
- // Show each range in the char class
- int rangeCount = opdata;
- for (int r = 0; r < rangeCount; r++)
- {
- // Get first and last chars in range
- char charFirst = instruction[i++];
- char charLast = instruction[i++];
-
- // Print range as X-Y, unless range encompasses only one char
- if (charFirst == charLast)
- {
- p.print(charToString(charFirst));
- }
- else
- {
- p.print(charToString(charFirst) + "-" + charToString(charLast));
- }
- }
-
- // Annotate the end of the char class
- p.print("]");
- }
-
- // If atom
- if (opcode == RE.OP_ATOM)
- {
- // Open quote
- p.print(", \"");
-
- // Print each character in the atom
- for (int len = opdata; len-- != 0; )
- {
- p.print(charToString(instruction[i++]));
- }
-
- // Close quote
- p.print("\"");
- }
-
- // Print a newline
- p.println("");
- }
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/REProgram.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,158 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import java.io.Serializable;
-
-/**
- * A class that holds compiled regular expressions. This is exposed mainly
- * for use by the recompile utility (which helps you produce precompiled
- * REProgram objects). You should not otherwise need to work directly with
- * this class.
-*
- * @see RE
- * @see RECompiler
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- */
-public class REProgram implements Serializable
-{
- static final int OPT_HASBACKREFS = 1;
-
- char[] instruction; // The compiled regular expression 'program'
- int lenInstruction; // The amount of the instruction buffer in use
- char[] prefix; // Prefix string optimization
- int flags; // Optimization flags (REProgram.OPT_*)
- int maxParens = -1;
-
- /**
- * Constructs a program object from a character array
- * @param instruction Character array with RE opcode instructions in it
- */
- public REProgram(char[] instruction)
- {
- this(instruction, instruction.length);
- }
-
- /**
- * Constructs a program object from a character array
- * @param parens Count of parens in the program
- * @param instruction Character array with RE opcode instructions in it
- */
- public REProgram(int parens, char[] instruction)
- {
- this(instruction, instruction.length);
- this.maxParens = parens;
- }
-
- /**
- * Constructs a program object from a character array
- * @param instruction Character array with RE opcode instructions in it
- * @param lenInstruction Amount of instruction array in use
- */
- public REProgram(char[] instruction, int lenInstruction)
- {
- setInstructions(instruction, lenInstruction);
- }
-
- /**
- * Returns a copy of the current regular expression program in a character
- * array that is exactly the right length to hold the program. If there is
- * no program compiled yet, getInstructions() will return null.
- * @return A copy of the current compiled RE program
- */
- public char[] getInstructions()
- {
- // Ensure program has been compiled!
- if (lenInstruction != 0)
- {
- // Return copy of program
- char[] ret = new char[lenInstruction];
- System.arraycopy(instruction, 0, ret, 0, lenInstruction);
- return ret;
- }
- return null;
- }
-
- /**
- * Sets a new regular expression program to run. It is this method which
- * performs any special compile-time search optimizations. Currently only
- * two optimizations are in place - one which checks for backreferences
- * (so that they can be lazily allocated) and another which attempts to
- * find an prefix anchor string so that substantial amounts of input can
- * potentially be skipped without running the actual program.
- * @param instruction Program instruction buffer
- * @param lenInstruction Length of instruction buffer in use
- */
- public void setInstructions(char[] instruction, int lenInstruction)
- {
- // Save reference to instruction array
- this.instruction = instruction;
- this.lenInstruction = lenInstruction;
-
- // Initialize other program-related variables
- flags = 0;
- prefix = null;
-
- // Try various compile-time optimizations if there's a program
- if (instruction != null && lenInstruction != 0)
- {
- // If the first node is a branch
- if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
- {
- // to the end node
- int next = instruction[0 + RE.offsetNext];
- if (instruction[next + RE.offsetOpcode] == RE.OP_END)
- {
- // and the branch starts with an atom
- if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
- {
- // then get that atom as an prefix because there's no other choice
- int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
- prefix = new char[lenAtom];
- System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
- }
- }
- }
-
- BackrefScanLoop:
-
- // Check for backreferences
- for (int i = 0; i < lenInstruction; i += RE.nodeSize)
- {
- switch (instruction[i + RE.offsetOpcode])
- {
- case RE.OP_ANYOF:
- i += (instruction[i + RE.offsetOpdata] * 2);
- break;
-
- case RE.OP_ATOM:
- i += instruction[i + RE.offsetOpdata];
- break;
-
- case RE.OP_BACKREF:
- flags |= OPT_HASBACKREFS;
- break BackrefScanLoop;
- }
- }
- }
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/RESyntaxException.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-/**
- * Exception thrown to indicate a syntax error in a regular expression.
- * This is a non-checked exception because you should only have problems compiling
- * a regular expression during development.
- * If you are making regular expresion programs dynamically then you can catch it
- * if you wish. But should not be forced to.
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @author <a href="mailto:gholam@xtra.co.nz>Michael McCallum</a>
- */
-public class RESyntaxException extends RuntimeException
-{
- /**
- * Constructor.
- * @param s Further description of the syntax error
- */
- public RESyntaxException(String s)
- {
- super("Syntax error: " + s);
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/RETest.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,883 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.io.File;
-import java.io.ByteArrayOutputStream;
-import java.io.ObjectOutputStream;
-import java.io.ByteArrayInputStream;
-import java.io.ObjectInputStream;
-import java.io.StringBufferInputStream;
-import java.io.StringReader;
-import java.io.IOException;
-
-/**
- * Data driven (and optionally interactive) testing harness to exercise regular
- * expression compiler and matching engine.
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
- * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
- */
-public class RETest
-{
- // True if we want to see output from success cases
- static final boolean showSuccesses = false;
-
- // A new line character.
- static final String NEW_LINE = System.getProperty( "line.separator" );
-
- // Construct a debug compiler
- REDebugCompiler compiler = new REDebugCompiler();
-
- /**
- * Main program entrypoint. If an argument is given, it will be compiled
- * and interactive matching will ensue. If no argument is given, the
- * file RETest.txt will be used as automated testing input.
- * @param args Command line arguments (optional regular expression)
- */
- public static void main(String[] args)
- {
- try
- {
- if (!test( args )) {
- System.exit(1);
- }
- }
- catch (Exception e)
- {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Testing entrypoint.
- * @param args Command line arguments
- * @exception Exception thrown in case of error
- */
- public static boolean test( String[] args ) throws Exception
- {
- RETest test = new RETest();
- // Run interactive tests against a single regexp
- if (args.length == 2)
- {
- test.runInteractiveTests(args[1]);
- }
- else if (args.length == 1)
- {
- // Run automated tests
- test.runAutomatedTests(args[0]);
- }
- else
- {
- System.out.println( "Usage: RETest ([-i] [regex]) ([/path/to/testfile.txt])" );
- System.out.println( "By Default will run automated tests from file 'docs/RETest.txt' ..." );
- System.out.println();
- test.runAutomatedTests("docs/RETest.txt");
- }
- return test.failures == 0;
- }
-
- /**
- * Constructor
- */
- public RETest()
- {
- }
-
- /**
- * Compile and test matching against a single expression
- * @param expr Expression to compile and test
- */
- void runInteractiveTests(String expr)
- {
- RE r = new RE();
- try
- {
- // Compile expression
- r.setProgram(compiler.compile(expr));
-
- // Show expression
- say("" + NEW_LINE + "" + expr + "" + NEW_LINE + "");
-
- // Show program for compiled expression
- PrintWriter writer = new PrintWriter( System.out );
- compiler.dumpProgram( writer );
- writer.flush();
-
- boolean running = true;
- // Test matching against compiled expression
- while ( running )
- {
- // Read from keyboard
- BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
- System.out.print("> ");
- System.out.flush();
- String match = br.readLine();
-
- if ( match != null )
- {
- // Try a match against the keyboard input
- if (r.match(match))
- {
- say("Match successful.");
- }
- else
- {
- say("Match failed.");
- }
-
- // Show subparen registers
- showParens(r);
- }
- else
- {
- running = false;
- System.out.println();
- }
- }
- }
- catch (Exception e)
- {
- say("Error: " + e.toString());
- e.printStackTrace();
- }
- }
-
- /**
- * Exit with a fatal error.
- * @param s Last famous words before exiting
- */
- void die(String s)
- {
- say("FATAL ERROR: " + s);
- System.exit(-1);
- }
-
- /**
- * Fail with an error. Will print a big failure message to System.out.
- *
- * @param log Output before failure
- * @param s Failure description
- */
- void fail(StringBuffer log, String s)
- {
- System.out.print(log.toString());
- fail(s);
- }
-
- /**
- * Fail with an error. Will print a big failure message to System.out.
- *
- * @param s Failure description
- */
- void fail(String s)
- {
- failures++;
- say("" + NEW_LINE + "");
- say("*******************************************************");
- say("********************* FAILURE! **********************");
- say("*******************************************************");
- say("" + NEW_LINE + "");
- say(s);
- say("");
- // make sure the writer gets flushed.
- if (compiler != null) {
- PrintWriter writer = new PrintWriter( System.out );
- compiler.dumpProgram( writer );
- writer.flush();
- say("" + NEW_LINE + "");
- }
- }
-
- /**
- * Say something to standard out
- * @param s What to say
- */
- void say(String s)
- {
- System.out.println(s);
- }
-
- /**
- * Dump parenthesized subexpressions found by a regular expression matcher object
- * @param r Matcher object with results to show
- */
- void showParens(RE r)
- {
- // Loop through each paren
- for (int i = 0; i < r.getParenCount(); i++)
- {
- // Show paren register
- say("$" + i + " = " + r.getParen(i));
- }
- }
-
- /*
- * number in automated test
- */
- int testCount = 0;
-
- /*
- * Count of failures in automated test
- */
- int failures = 0;
-
- /**
- * Run automated tests in RETest.txt file (from Perl 4.0 test battery)
- * @exception Exception thrown in case of error
- */
- void runAutomatedTests(String testDocument) throws Exception
- {
- long ms = System.currentTimeMillis();
-
- // Some unit tests
- testPrecompiledRE();
- testSplitAndGrep();
- testSubst();
- testOther();
-
- // Test from script file
- File testInput = new File(testDocument);
- if (! testInput.exists()) {
- throw new Exception ("Could not find: " + testDocument);
- }
-
- BufferedReader br = new BufferedReader(new FileReader(testInput));
- try
- {
- // While input is available, parse lines
- while (br.ready())
- {
- RETestCase testcase = getNextTestCase(br);
- if (testcase != null) {
- testcase.runTest();
- }
- }
- }
- finally
- {
- br.close();
- }
-
- // Show match time
- say(NEW_LINE + NEW_LINE + "Match time = " + (System.currentTimeMillis() - ms) + " ms.");
-
- // Print final results
- if (failures > 0) {
- say("*************** THERE ARE FAILURES! *******************");
- }
- say("Tests complete. " + testCount + " tests, " + failures + " failure(s).");
- }
-
- /**
- * Run automated unit test
- * @exception Exception thrown in case of error
- */
- void testOther() throws Exception
- {
- // Serialization test 1: Compile regexp and serialize/deserialize it
- RE r = new RE("(a*)b");
- say("Serialized/deserialized (a*)b");
- ByteArrayOutputStream out = new ByteArrayOutputStream(128);
- new ObjectOutputStream(out).writeObject(r);
- ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
- r = (RE)new ObjectInputStream(in).readObject();
- if (!r.match("aaab"))
- {
- fail("Did not match 'aaab' with deserialized RE.");
- } else {
- say("aaaab = true");
- showParens(r);
- }
-
- // Serialization test 2: serialize/deserialize used regexp
- out.reset();
- say("Deserialized (a*)b");
- new ObjectOutputStream(out).writeObject(r);
- in = new ByteArrayInputStream(out.toByteArray());
- r = (RE)new ObjectInputStream(in).readObject();
- if (r.getParenCount() != 0)
- {
- fail("Has parens after deserialization.");
- }
- if (!r.match("aaab"))
- {
- fail("Did not match 'aaab' with deserialized RE.");
- } else {
- say("aaaab = true");
- showParens(r);
- }
-
- // Test MATCH_CASEINDEPENDENT
- r = new RE("abc(\\w*)");
- say("MATCH_CASEINDEPENDENT abc(\\w*)");
- r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
- say("abc(d*)");
- if (!r.match("abcddd"))
- {
- fail("Did not match 'abcddd'.");
- } else {
- say("abcddd = true");
- showParens(r);
- }
-
- if (!r.match("aBcDDdd"))
- {
- fail("Did not match 'aBcDDdd'.");
- } else {
- say("aBcDDdd = true");
- showParens(r);
- }
-
- if (!r.match("ABCDDDDD"))
- {
- fail("Did not match 'ABCDDDDD'.");
- } else {
- say("ABCDDDDD = true");
- showParens(r);
- }
-
- r = new RE("(A*)b\\1");
- r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
- if (!r.match("AaAaaaBAAAAAA"))
- {
- fail("Did not match 'AaAaaaBAAAAAA'.");
- } else {
- say("AaAaaaBAAAAAA = true");
- showParens(r);
- }
-
- r = new RE("[A-Z]*");
- r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
- if (!r.match("CaBgDe12"))
- {
- fail("Did not match 'CaBgDe12'.");
- } else {
- say("CaBgDe12 = true");
- showParens(r);
- }
-
- // Test MATCH_MULTILINE. Test for eol/bol symbols.
- r = new RE("^abc$", RE.MATCH_MULTILINE);
- if (!r.match("\nabc")) {
- fail("\"\\nabc\" doesn't match \"^abc$\"");
- }
- if (!r.match("\rabc")) {
- fail("\"\\rabc\" doesn't match \"^abc$\"");
- }
- if (!r.match("\r\nabc")) {
- fail("\"\\r\\nabc\" doesn't match \"^abc$\"");
- }
- if (!r.match("\u0085abc")) {
- fail("\"\\u0085abc\" doesn't match \"^abc$\"");
- }
- if (!r.match("\u2028abc")) {
- fail("\"\\u2028abc\" doesn't match \"^abc$\"");
- }
- if (!r.match("\u2029abc")) {
- fail("\"\\u2029abc\" doesn't match \"^abc$\"");
- }
-
- // Test MATCH_MULTILINE. Test that '.' does not matches new line.
- r = new RE("^a.*b$", RE.MATCH_MULTILINE);
- if (r.match("a\nb")) {
- fail("\"a\\nb\" matches \"^a.*b$\"");
- }
- if (r.match("a\rb")) {
- fail("\"a\\rb\" matches \"^a.*b$\"");
- }
- if (r.match("a\r\nb")) {
- fail("\"a\\r\\nb\" matches \"^a.*b$\"");
- }
- if (r.match("a\u0085b")) {
- fail("\"a\\u0085b\" matches \"^a.*b$\"");
- }
- if (r.match("a\u2028b")) {
- fail("\"a\\u2028b\" matches \"^a.*b$\"");
- }
- if (r.match("a\u2029b")) {
- fail("\"a\\u2029b\" matches \"^a.*b$\"");
- }
- }
-
- private void testPrecompiledRE()
- {
- // Pre-compiled regular expression "a*b"
- char[] re1Instructions =
- {
- 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
- 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
- 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
- 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
- 0x0000,
- };
-
- REProgram re1 = new REProgram(re1Instructions);
-
- // Simple test of pre-compiled regular expressions
- RE r = new RE(re1);
- say("a*b");
- boolean result = r.match("aaab");
- say("aaab = " + result);
- showParens(r);
- if (!result) {
- fail("\"aaab\" doesn't match to precompiled \"a*b\"");
- }
-
- result = r.match("b");
- say("b = " + result);
- showParens(r);
- if (!result) {
- fail("\"b\" doesn't match to precompiled \"a*b\"");
- }
-
- result = r.match("c");
- say("c = " + result);
- showParens(r);
- if (result) {
- fail("\"c\" matches to precompiled \"a*b\"");
- }
-
- result = r.match("ccccaaaaab");
- say("ccccaaaaab = " + result);
- showParens(r);
- if (!result) {
- fail("\"ccccaaaaab\" doesn't match to precompiled \"a*b\"");
- }
- }
-
- private void testSplitAndGrep()
- {
- String[] expected = {"xxxx", "xxxx", "yyyy", "zzz"};
- RE r = new RE("a*b");
- String[] s = r.split("xxxxaabxxxxbyyyyaaabzzz");
- for (int i = 0; i < expected.length && i < s.length; i++) {
- assertEquals("Wrong splitted part", expected[i], s[i]);
- }
- assertEquals("Wrong number of splitted parts", expected.length,
- s.length);
-
- r = new RE("x+");
- expected = new String[] {"xxxx", "xxxx"};
- s = r.grep(s);
- for (int i = 0; i < s.length; i++)
- {
- say("s[" + i + "] = " + s[i]);
- assertEquals("Grep fails", expected[i], s[i]);
- }
- assertEquals("Wrong number of string found by grep", expected.length,
- s.length);
- }
-
- private void testSubst()
- {
- RE r = new RE("a*b");
- String expected = "-foo-garply-wacky-";
- String actual = r.subst("aaaabfooaaabgarplyaaabwackyb", "-");
- assertEquals("Wrong result of substitution in \"a*b\"", expected, actual);
-
- // Test subst() with backreferences
- r = new RE("http://[\\.\\w\\-\\?/~_@&=%]+");
- actual = r.subst("visit us: http://www.apache.org!",
- "1234<a href=\"$0\">$0</a>", RE.REPLACE_BACKREFERENCES);
- assertEquals("Wrong subst() result", "visit us: 1234<a href=\"http://www.apache.org\">http://www.apache.org</a>!", actual);
-
- // Test subst() with backreferences without leading characters
- // before first backreference
- r = new RE("(.*?)=(.*)");
- actual = r.subst("variable=value",
- "$1_test_$212", RE.REPLACE_BACKREFERENCES);
- assertEquals("Wrong subst() result", "variable_test_value12", actual);
-
- // Test subst() with NO backreferences
- r = new RE("^a$");
- actual = r.subst("a",
- "b", RE.REPLACE_BACKREFERENCES);
- assertEquals("Wrong subst() result", "b", actual);
-
- // Test subst() with NO backreferences
- r = new RE("^a$", RE.MATCH_MULTILINE);
- actual = r.subst("\r\na\r\n",
- "b", RE.REPLACE_BACKREFERENCES);
- assertEquals("Wrong subst() result", "\r\nb\r\n", actual);
- }
-
- public void assertEquals(String message, String expected, String actual)
- {
- if (expected != null && !expected.equals(actual)
- || actual != null && !actual.equals(expected))
- {
- fail(message + " (expected \"" + expected
- + "\", actual \"" + actual + "\")");
- }
- }
-
- public void assertEquals(String message, int expected, int actual)
- {
- if (expected != actual) {
- fail(message + " (expected \"" + expected
- + "\", actual \"" + actual + "\")");
- }
- }
-
- /**
- * Converts yesno string to boolean.
- * @param yesno string representation of expected result
- * @return true if yesno is "YES", false if yesno is "NO"
- * stops program otherwise.
- */
- private boolean getExpectedResult(String yesno)
- {
- if ("NO".equals(yesno))
- {
- return false;
- }
- else if ("YES".equals(yesno))
- {
- return true;
- }
- else
- {
- // Bad test script
- die("Test script error!");
- return false; //to please javac
- }
- }
-
- /**
- * Finds next test description in a given script.
- * @param br <code>BufferedReader</code> for a script file
- * @return strign tag for next test description
- * @exception IOException if some io problems occured
- */
- private String findNextTest(BufferedReader br) throws IOException
- {
- String number = "";
-
- while (br.ready())
- {
- number = br.readLine();
- if (number == null)
- {
- break;
- }
- number = number.trim();
- if (number.startsWith("#"))
- {
- break;
- }
- if (!number.equals(""))
- {
- say("Script error. Line = " + number);
- System.exit(-1);
- }
- }
- return number;
- }
-
- /**
- * Creates testcase for the next test description in the script file.
- * @param br <code>BufferedReader</code> for script file.
- * @return a new tescase or null.
- * @exception IOException if some io problems occured
- */
- private RETestCase getNextTestCase(BufferedReader br) throws IOException
- {
- // Find next re test case
- final String tag = findNextTest(br);
-
- // Are we done?
- if (!br.ready())
- {
- return null;
- }
-
- // Get expression
- final String expr = br.readLine();
-
- // Get test information
- final String matchAgainst = br.readLine();
- final boolean badPattern = "ERR".equals(matchAgainst);
- boolean shouldMatch = false;
- int expectedParenCount = 0;
- String[] expectedParens = null;
-
- if (!badPattern) {
- shouldMatch = getExpectedResult(br.readLine().trim());
- if (shouldMatch) {
- expectedParenCount = Integer.parseInt(br.readLine().trim());
- expectedParens = new String[expectedParenCount];
- for (int i = 0; i < expectedParenCount; i++) {
- expectedParens[i] = br.readLine();
- }
- }
- }
-
- return new RETestCase(this, tag, expr, matchAgainst, badPattern,
- shouldMatch, expectedParens);
- }
-}
-
-final class RETestCase
-{
- final private StringBuffer log = new StringBuffer();
- final private int number;
- final private String tag; // number from script file
- final private String pattern;
- final private String toMatch;
- final private boolean badPattern;
- final private boolean shouldMatch;
- final private String[] parens;
- final private RETest test;
- private RE regexp;
-
- public RETestCase(RETest test, String tag, String pattern,
- String toMatch, boolean badPattern,
- boolean shouldMatch, String[] parens)
- {
- this.number = ++test.testCount;
- this.test = test;
- this.tag = tag;
- this.pattern = pattern;
- this.toMatch = toMatch;
- this.badPattern = badPattern;
- this.shouldMatch = shouldMatch;
- if (parens != null) {
- this.parens = new String[parens.length];
- for (int i = 0; i < parens.length; i++) {
- this.parens[i] = parens[i];
- }
- } else {
- this.parens = null;
- }
- }
-
- public void runTest()
- {
- test.say(tag + "(" + number + "): " + pattern);
- if (testCreation()) {
- testMatch();
- }
- }
-
- boolean testCreation()
- {
- try
- {
- // Compile it
- regexp = new RE();
- regexp.setProgram(test.compiler.compile(pattern));
- // Expression didn't cause an expected error
- if (badPattern)
- {
- test.fail(log, "Was expected to be an error, but wasn't.");
- return false;
- }
-
- return true;
- }
- // Some expressions *should* cause exceptions to be thrown
- catch (Exception e)
- {
- // If it was supposed to be an error, report success and continue
- if (badPattern)
- {
- log.append(" Match: ERR\n");
- success("Produces an error (" + e.toString() + "), as expected.");
- return false;
- }
-
- // Wasn't supposed to be an error
- String message = (e.getMessage() == null) ? e.toString() : e.getMessage();
- test.fail(log, "Produces an unexpected exception \"" + message + "\"");
- e.printStackTrace();
- }
- catch (Error e)
- {
- // Internal error happened
- test.fail(log, "Compiler threw fatal error \"" + e.getMessage() + "\"");
- e.printStackTrace();
- }
-
- return false;
- }
-
- private void testMatch()
- {
- log.append(" Match against: '" + toMatch + "'\n");
- // Try regular matching
- try
- {
- // Match against the string
- boolean result = regexp.match(toMatch);
- log.append(" Matched: " + (result ? "YES" : "NO") + "\n");
-
- // Check result, parens, and iterators
- if (checkResult(result) && (!shouldMatch || checkParens()))
- {
- // test match(CharacterIterator, int)
- // for every CharacterIterator implementation.
- log.append(" Match using StringCharacterIterator\n");
- if (!tryMatchUsingCI(new StringCharacterIterator(toMatch)))
- return;
-
- log.append(" Match using CharacterArrayCharacterIterator\n");
- if (!tryMatchUsingCI(new CharacterArrayCharacterIterator(toMatch.toCharArray(), 0, toMatch.length())))
- return;
-
- log.append(" Match using StreamCharacterIterator\n");
- if (!tryMatchUsingCI(new StreamCharacterIterator(new StringBufferInputStream(toMatch))))
- return;
-
- log.append(" Match using ReaderCharacterIterator\n");
- if (!tryMatchUsingCI(new ReaderCharacterIterator(new StringReader(toMatch))))
- return;
- }
- }
- // Matcher blew it
- catch(Exception e)
- {
- test.fail(log, "Matcher threw exception: " + e.toString());
- e.printStackTrace();
- }
- // Internal error
- catch(Error e)
- {
- test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\"");
- e.printStackTrace();
- }
- }
-
- private boolean checkResult(boolean result)
- {
- // Write status
- if (result == shouldMatch) {
- success((shouldMatch ? "Matched" : "Did not match")
- + " \"" + toMatch + "\", as expected:");
- return true;
- } else {
- if (shouldMatch) {
- test.fail(log, "Did not match \"" + toMatch + "\", when expected to.");
- } else {
- test.fail(log, "Matched \"" + toMatch + "\", when not expected to.");
- }
- return false;
- }
- }
-
- private boolean checkParens()
- {
- // Show subexpression registers
- if (RETest.showSuccesses)
- {
- test.showParens(regexp);
- }
-
- log.append(" Paren count: " + regexp.getParenCount() + "\n");
- if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount()))
- {
- return false;
- }
-
- // Check registers against expected contents
- for (int p = 0; p < regexp.getParenCount(); p++)
- {
- log.append(" Paren " + p + ": " + regexp.getParen(p) + "\n");
-
- // Compare expected result with actual
- if ("null".equals(parens[p]) && regexp.getParen(p) == null)
- {
- // Consider "null" in test file equal to null
- continue;
- }
- if (!assertEquals(log, "Wrong register " + p, parens[p], regexp.getParen(p)))
- {
- return false;
- }
- }
-
- return true;
- }
-
- boolean tryMatchUsingCI(CharacterIterator matchAgainst)
- {
- try {
- boolean result = regexp.match(matchAgainst, 0);
- log.append(" Match: " + (result ? "YES" : "NO") + "\n");
- return checkResult(result) && (!shouldMatch || checkParens());
- }
- // Matcher blew it
- catch(Exception e)
- {
- test.fail(log, "Matcher threw exception: " + e.toString());
- e.printStackTrace();
- }
- // Internal error
- catch(Error e)
- {
- test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\"");
- e.printStackTrace();
- }
- return false;
- }
-
- public boolean assertEquals(StringBuffer log, String message, String expected, String actual)
- {
- if (expected != null && !expected.equals(actual)
- || actual != null && !actual.equals(expected))
- {
- test.fail(log, message + " (expected \"" + expected
- + "\", actual \"" + actual + "\")");
- return false;
- }
- return true;
- }
-
- public boolean assertEquals(StringBuffer log, String message, int expected, int actual)
- {
- if (expected != actual) {
- test.fail(log, message + " (expected \"" + expected
- + "\", actual \"" + actual + "\")");
- return false;
- }
- return true;
- }
-
- /**
- * Show a success
- * @param s Success story
- */
- void success(String s)
- {
- if (RETest.showSuccesses)
- {
- test.say("" + RETest.NEW_LINE + "-----------------------" + RETest.NEW_LINE + "");
- test.say("Expression #" + (number) + " \"" + pattern + "\" ");
- test.say("Success: " + s);
- }
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/REUtil.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-/**
- * This is a class that contains utility helper methods for this package.
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- */
-public class REUtil
-{
- /** complex: */
- private static final String complexPrefix = "complex:";
-
- /**
- * Creates a regular expression, permitting simple or complex syntax
- * @param expression The expression, beginning with a prefix if it's complex or
- * having no prefix if it's simple
- * @param matchFlags Matching style flags
- * @return The regular expression object
- * @exception RESyntaxException thrown in case of error
- */
- public static RE createRE(String expression, int matchFlags) throws RESyntaxException
- {
- if (expression.startsWith(complexPrefix))
- {
- return new RE(expression.substring(complexPrefix.length()), matchFlags);
- }
- return new RE(RE.simplePatternToFullRegularExpression(expression), matchFlags);
- }
-
- /**
- * Creates a regular expression, permitting simple or complex syntax
- * @param expression The expression, beginning with a prefix if it's complex or
- * having no prefix if it's simple
- * @return The regular expression object
- * @exception RESyntaxException thrown in case of error
- */
- public static RE createRE(String expression) throws RESyntaxException
- {
- return createRE(expression, RE.MATCH_NORMAL);
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/ReaderCharacterIterator.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,164 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import java.io.Reader;
-import java.io.IOException;
-
-/**
- * Encapsulates java.io.Reader as CharacterIterator
- *
- * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
- */
-public final class ReaderCharacterIterator implements CharacterIterator
-{
- /** Underlying reader */
- private final Reader reader;
-
- /** Buffer of read chars */
- private final StringBuffer buff;
-
- /** read end? */
- private boolean closed;
-
- /** @param reader a Reader, which is parsed */
- public ReaderCharacterIterator(Reader reader)
- {
- this.reader = reader;
- this.buff = new StringBuffer(512);
- this.closed = false;
- }
-
- /** @return a substring */
- public String substring(int beginIndex, int endIndex)
- {
- try
- {
- ensure(endIndex);
- return buff.toString().substring(beginIndex, endIndex);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
-
- /** @return a substring */
- public String substring(int beginIndex)
- {
- try
- {
- readAll();
- return buff.toString().substring(beginIndex);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
-
- /** @return a character at the specified position. */
- public char charAt(int pos)
- {
- try
- {
- ensure(pos);
- return buff.charAt(pos);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
-
- /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
- public boolean isEnd(int pos)
- {
- if (buff.length() > pos)
- {
- return false;
- }
- else
- {
- try
- {
- ensure(pos);
- return (buff.length() <= pos);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
- }
-
- /** Reads n characters from the stream and appends them to the buffer */
- private int read(int n) throws IOException
- {
- if (closed)
- {
- return 0;
- }
-
- char[] c = new char[n];
- int count = 0;
- int read = 0;
-
- do
- {
- read = reader.read(c);
- if (read < 0) // EOF
- {
- closed = true;
- break;
- }
- count += read;
- buff.append(c, 0, read);
- }
- while (count < n);
-
- return count;
- }
-
- /** Reads rest of the stream. */
- private void readAll() throws IOException
- {
- while(! closed)
- {
- read(1000);
- }
- }
-
- /** Reads chars up to the idx */
- private void ensure(int idx) throws IOException
- {
- if (closed)
- {
- return;
- }
-
- if (idx < buff.length())
- {
- return;
- }
- read(idx + 1 - buff.length());
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/StreamCharacterIterator.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,161 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import java.io.InputStream;
-import java.io.IOException;
-
-/**
- * Encapsulates java.io.InputStream as CharacterIterator.
- *
- * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
- */
-public final class StreamCharacterIterator implements CharacterIterator
-{
- /** Underlying is */
- private final InputStream is;
-
- /** Buffer of read chars */
- private final StringBuffer buff;
-
- /** read end? */
- private boolean closed;
-
- /** @param is an InputStream, which is parsed */
- public StreamCharacterIterator(InputStream is)
- {
- this.is = is;
- this.buff = new StringBuffer(512);
- this.closed = false;
- }
-
- /** @return a substring */
- public String substring(int beginIndex, int endIndex)
- {
- try
- {
- ensure(endIndex);
- return buff.toString().substring(beginIndex, endIndex);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
-
- /** @return a substring */
- public String substring(int beginIndex)
- {
- try
- {
- readAll();
- return buff.toString().substring(beginIndex);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
-
-
- /** @return a character at the specified position. */
- public char charAt(int pos)
- {
- try
- {
- ensure(pos);
- return buff.charAt(pos);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
-
- /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
- public boolean isEnd(int pos)
- {
- if (buff.length() > pos)
- {
- return false;
- }
- else
- {
- try
- {
- ensure(pos);
- return (buff.length() <= pos);
- }
- catch (IOException e)
- {
- throw new StringIndexOutOfBoundsException(e.getMessage());
- }
- }
- }
-
- /** Reads n characters from the stream and appends them to the buffer */
- private int read(int n) throws IOException
- {
- if (closed)
- {
- return 0;
- }
-
- int c;
- int i = n;
- while (--i >= 0)
- {
- c = is.read();
- if (c < 0) // EOF
- {
- closed = true;
- break;
- }
- buff.append((char) c);
- }
- return n - i;
- }
-
- /** Reads rest of the stream. */
- private void readAll() throws IOException
- {
- while(! closed)
- {
- read(1000);
- }
- }
-
- /** Reads chars up to the idx */
- private void ensure(int idx) throws IOException
- {
- if (closed)
- {
- return;
- }
-
- if (idx < buff.length())
- {
- return;
- }
-
- read(idx + 1 - buff.length());
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/StringCharacterIterator.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-/**
- * Encapsulates String as CharacterIterator.
- *
- * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
- */
-public final class StringCharacterIterator implements CharacterIterator
-{
- /** encapsulated */
- private final String src;
-
- /** @param src - encapsulated String */
- public StringCharacterIterator(String src)
- {
- this.src = src;
- }
-
- /** @return a substring */
- public String substring(int beginIndex, int endIndex)
- {
- return src.substring(beginIndex, endIndex);
- }
-
- /** @return a substring */
- public String substring(int beginIndex)
- {
- return src.substring(beginIndex);
- }
-
- /** @return a character at the specified position. */
- public char charAt(int pos)
- {
- return src.charAt(pos);
- }
-
- /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
- public boolean isEnd(int pos)
- {
- return (pos >= src.length());
- }
-}
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/regexp/internal/recompile.java Tue May 10 00:24:24 2016 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,137 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2004 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.regexp.internal;
-
-import com.sun.org.apache.regexp.internal.RECompiler;
-import com.sun.org.apache.regexp.internal.RESyntaxException;
-
-/**
- * 'recompile' is a command line tool that pre-compiles one or more regular expressions
- * for use with the regular expression matcher class 'RE'. For example, the command
- * "java recompile a*b" produces output like this:
- *
- * <pre>
- *
- * // Pre-compiled regular expression "a*b"
- * char[] re1Instructions =
- * {
- * 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
- * 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
- * 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
- * 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
- * 0x0000,
- * };
- *
- * REProgram re1 = new REProgram(re1Instructions);
- *
- * </pre>
- *
- * By pasting this output into your code, you can construct a regular expression matcher
- * (RE) object directly from the pre-compiled data (the character array re1), thus avoiding
- * the overhead of compiling the expression at runtime. For example:
- *
- * <pre>
- *
- * RE r = new RE(re1);
- *
- * </pre>
- *
- * @see RE
- * @see RECompiler
- *
- * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- */
-public class recompile
-{
- /**
- * Main application entrypoint.
- * @param arg Command line arguments
- */
- static public void main(String[] arg)
- {
- // Create a compiler object
- RECompiler r = new RECompiler();
-
- // Print usage if arguments are incorrect
- if (arg.length <= 0 || arg.length % 2 != 0)
- {
- System.out.println("Usage: recompile <patternname> <pattern>");
- System.exit(0);
- }
-
- // Loop through arguments, compiling each
- for (int i = 0; i < arg.length; i += 2)
- {
- try
- {
- // Compile regular expression
- String name = arg[i];
- String pattern = arg[i+1];
- String instructions = name + "PatternInstructions";
-
- // Output program as a nice, formatted character array
- System.out.print("\n // Pre-compiled regular expression '" + pattern + "'\n"
- + " private static char[] " + instructions + " = \n {");
-
- // Compile program for pattern
- REProgram program = r.compile(pattern);
-
- // Number of columns in output
- int numColumns = 7;
-
- // Loop through program
- char[] p = program.getInstructions();
- for (int j = 0; j < p.length; j++)
- {
- // End of column?
- if ((j % numColumns) == 0)
- {
- System.out.print("\n ");
- }
-
- // Print character as padded hex number
- String hex = Integer.toHexString(p[j]);
- while (hex.length() < 4)
- {
- hex = "0" + hex;
- }
- System.out.print("0x" + hex + ", ");
- }
-
- // End of program block
- System.out.println("\n };");
- System.out.println("\n private static RE " + name + "Pattern = new RE(new REProgram(" + instructions + "));");
- }
- catch (RESyntaxException e)
- {
- System.out.println("Syntax error in expression \"" + arg[i] + "\": " + e.toString());
- }
- catch (Exception e)
- {
- System.out.println("Unexpected exception: " + e.toString());
- }
- catch (Error e)
- {
- System.out.println("Internal error: " + e.toString());
- }
- }
- }
-}