jdk-sandbox: jaxp/src/com/sun/org/apache/regexp/internal/RE.java@9001e536ab4e (annotated)

6 7f561c08de6b Initial load duke parents: diff changeset	1	/*
7f561c08de6b Initial load duke parents: diff changeset	2	* reserved comment block
7f561c08de6b Initial load duke parents: diff changeset	3	* DO NOT REMOVE OR ALTER!
7f561c08de6b Initial load duke parents: diff changeset	4	*/
7f561c08de6b Initial load duke parents: diff changeset	5	/*
7f561c08de6b Initial load duke parents: diff changeset	6	* Copyright 1999-2004 The Apache Software Foundation.
7f561c08de6b Initial load duke parents: diff changeset	7	*
7f561c08de6b Initial load duke parents: diff changeset	8	* Licensed under the Apache License, Version 2.0 (the "License");
7f561c08de6b Initial load duke parents: diff changeset	9	* you may not use this file except in compliance with the License.
7f561c08de6b Initial load duke parents: diff changeset	10	* You may obtain a copy of the License at
7f561c08de6b Initial load duke parents: diff changeset	11	*
7f561c08de6b Initial load duke parents: diff changeset	12	* http://www.apache.org/licenses/LICENSE-2.0
7f561c08de6b Initial load duke parents: diff changeset	13	*
7f561c08de6b Initial load duke parents: diff changeset	14	* Unless required by applicable law or agreed to in writing, software
7f561c08de6b Initial load duke parents: diff changeset	15	* distributed under the License is distributed on an "AS IS" BASIS,
7f561c08de6b Initial load duke parents: diff changeset	16	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
7f561c08de6b Initial load duke parents: diff changeset	17	* See the License for the specific language governing permissions and
7f561c08de6b Initial load duke parents: diff changeset	18	* limitations under the License.
7f561c08de6b Initial load duke parents: diff changeset	19	*/
7f561c08de6b Initial load duke parents: diff changeset	20
7f561c08de6b Initial load duke parents: diff changeset	21	package com.sun.org.apache.regexp.internal;
7f561c08de6b Initial load duke parents: diff changeset	22
7f561c08de6b Initial load duke parents: diff changeset	23	import java.io.Serializable;
7f561c08de6b Initial load duke parents: diff changeset	24	import java.util.Vector;
7f561c08de6b Initial load duke parents: diff changeset	25
7f561c08de6b Initial load duke parents: diff changeset	26	/**
7f561c08de6b Initial load duke parents: diff changeset	27	* RE is an efficient, lightweight regular expression evaluator/matcher
7f561c08de6b Initial load duke parents: diff changeset	28	* class. Regular expressions are pattern descriptions which enable
7f561c08de6b Initial load duke parents: diff changeset	29	* sophisticated matching of strings. In addition to being able to
7f561c08de6b Initial load duke parents: diff changeset	30	* match a string against a pattern, you can also extract parts of the
7f561c08de6b Initial load duke parents: diff changeset	31	* match. This is especially useful in text parsing! Details on the
7f561c08de6b Initial load duke parents: diff changeset	32	* syntax of regular expression patterns are given below.
7f561c08de6b Initial load duke parents: diff changeset	33	*
7f561c08de6b Initial load duke parents: diff changeset	34	* <p>
7f561c08de6b Initial load duke parents: diff changeset	35	* To compile a regular expression (RE), you can simply construct an RE
7f561c08de6b Initial load duke parents: diff changeset	36	* matcher object from the string specification of the pattern, like this:
7f561c08de6b Initial load duke parents: diff changeset	37	*
7f561c08de6b Initial load duke parents: diff changeset	38	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	39	* RE r = new RE("a*b");
7f561c08de6b Initial load duke parents: diff changeset	40	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	41	*
7f561c08de6b Initial load duke parents: diff changeset	42	* <p>
7f561c08de6b Initial load duke parents: diff changeset	43	* Once you have done this, you can call either of the RE.match methods to
7f561c08de6b Initial load duke parents: diff changeset	44	* perform matching on a String. For example:
7f561c08de6b Initial load duke parents: diff changeset	45	*
7f561c08de6b Initial load duke parents: diff changeset	46	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	47	* boolean matched = r.match("aaaab");
7f561c08de6b Initial load duke parents: diff changeset	48	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	49	*
7f561c08de6b Initial load duke parents: diff changeset	50	* will cause the boolean matched to be set to true because the
7f561c08de6b Initial load duke parents: diff changeset	51	* pattern "a*b" matches the string "aaaab".
7f561c08de6b Initial load duke parents: diff changeset	52	*
7f561c08de6b Initial load duke parents: diff changeset	53	* <p>
7f561c08de6b Initial load duke parents: diff changeset	54	* If you were interested in the <i>number</i> of a's which matched the
7f561c08de6b Initial load duke parents: diff changeset	55	* first part of our example expression, you could change the expression to
7f561c08de6b Initial load duke parents: diff changeset	56	* "(a*)b". Then when you compiled the expression and matched it against
7f561c08de6b Initial load duke parents: diff changeset	57	* something like "xaaaab", you would get results like this:
7f561c08de6b Initial load duke parents: diff changeset	58	*
7f561c08de6b Initial load duke parents: diff changeset	59	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	60	* RE r = new RE("(a*)b"); // Compile expression
7f561c08de6b Initial load duke parents: diff changeset	61	* boolean matched = r.match("xaaaab"); // Match against "xaaaab"
7f561c08de6b Initial load duke parents: diff changeset	62	*
7f561c08de6b Initial load duke parents: diff changeset	63	* String wholeExpr = r.getParen(0); // wholeExpr will be 'aaaab'
7f561c08de6b Initial load duke parents: diff changeset	64	* String insideParens = r.getParen(1); // insideParens will be 'aaaa'
7f561c08de6b Initial load duke parents: diff changeset	65	*
7f561c08de6b Initial load duke parents: diff changeset	66	* int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
7f561c08de6b Initial load duke parents: diff changeset	67	* int endWholeExpr = r.getParenEnd(0); // endWholeExpr will be index 6
7f561c08de6b Initial load duke parents: diff changeset	68	* int lenWholeExpr = r.getParenLength(0); // lenWholeExpr will be 5
7f561c08de6b Initial load duke parents: diff changeset	69	*
7f561c08de6b Initial load duke parents: diff changeset	70	* int startInside = r.getParenStart(1); // startInside will be index 1
7f561c08de6b Initial load duke parents: diff changeset	71	* int endInside = r.getParenEnd(1); // endInside will be index 5
7f561c08de6b Initial load duke parents: diff changeset	72	* int lenInside = r.getParenLength(1); // lenInside will be 4
7f561c08de6b Initial load duke parents: diff changeset	73	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	74	*
7f561c08de6b Initial load duke parents: diff changeset	75	* You can also refer to the contents of a parenthesized expression
7f561c08de6b Initial load duke parents: diff changeset	76	* within a regular expression itself. This is called a
7f561c08de6b Initial load duke parents: diff changeset	77	* 'backreference'. The first backreference in a regular expression is
7f561c08de6b Initial load duke parents: diff changeset	78	* denoted by \1, the second by \2 and so on. So the expression:
7f561c08de6b Initial load duke parents: diff changeset	79	*
7f561c08de6b Initial load duke parents: diff changeset	80	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	81	* ([0-9]+)=\1
7f561c08de6b Initial load duke parents: diff changeset	82	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	83	*
7f561c08de6b Initial load duke parents: diff changeset	84	* will match any string of the form n=n (like 0=0 or 2=2).
7f561c08de6b Initial load duke parents: diff changeset	85	*
7f561c08de6b Initial load duke parents: diff changeset	86	* <p>
7f561c08de6b Initial load duke parents: diff changeset	87	* The full regular expression syntax accepted by RE is described here:
7f561c08de6b Initial load duke parents: diff changeset	88	*
7f561c08de6b Initial load duke parents: diff changeset	89	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	90	*
7f561c08de6b Initial load duke parents: diff changeset	91	* <b><font face=times roman>Characters</font></b>
7f561c08de6b Initial load duke parents: diff changeset	92	*
7f561c08de6b Initial load duke parents: diff changeset	93	* <i>unicodeChar</i> Matches any identical unicode character
7f561c08de6b Initial load duke parents: diff changeset	94	* \ Used to quote a meta-character (like '*')
7f561c08de6b Initial load duke parents: diff changeset	95	* \\ Matches a single '\' character
7f561c08de6b Initial load duke parents: diff changeset	96	* \0nnn Matches a given octal character
7f561c08de6b Initial load duke parents: diff changeset	97	* \xhh Matches a given 8-bit hexadecimal character
7f561c08de6b Initial load duke parents: diff changeset	98	* \\uhhhh Matches a given 16-bit hexadecimal character
7f561c08de6b Initial load duke parents: diff changeset	99	* \t Matches an ASCII tab character
7f561c08de6b Initial load duke parents: diff changeset	100	* \n Matches an ASCII newline character
7f561c08de6b Initial load duke parents: diff changeset	101	* \r Matches an ASCII return character
7f561c08de6b Initial load duke parents: diff changeset	102	* \f Matches an ASCII form feed character
7f561c08de6b Initial load duke parents: diff changeset	103	*
7f561c08de6b Initial load duke parents: diff changeset	104	*
7f561c08de6b Initial load duke parents: diff changeset	105	* <b><font face=times roman>Character Classes</font></b>
7f561c08de6b Initial load duke parents: diff changeset	106	*
7f561c08de6b Initial load duke parents: diff changeset	107	* [abc] Simple character class
7f561c08de6b Initial load duke parents: diff changeset	108	* [a-zA-Z] Character class with ranges
7f561c08de6b Initial load duke parents: diff changeset	109	* [^abc] Negated character class
7f561c08de6b Initial load duke parents: diff changeset	110	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	111	*
7f561c08de6b Initial load duke parents: diff changeset	112	* <b>NOTE:</b> Incomplete ranges will be interpreted as "starts
7f561c08de6b Initial load duke parents: diff changeset	113	* from zero" or "ends with last character".
7f561c08de6b Initial load duke parents: diff changeset	114	* <br>
7f561c08de6b Initial load duke parents: diff changeset	115	* I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF],
7f561c08de6b Initial load duke parents: diff changeset	116	* [-] means "all characters".
7f561c08de6b Initial load duke parents: diff changeset	117	*
7f561c08de6b Initial load duke parents: diff changeset	118	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	119	*
7f561c08de6b Initial load duke parents: diff changeset	120	* <b><font face=times roman>Standard POSIX Character Classes</font></b>
7f561c08de6b Initial load duke parents: diff changeset	121	*
7f561c08de6b Initial load duke parents: diff changeset	122	* [:alnum:] Alphanumeric characters.
7f561c08de6b Initial load duke parents: diff changeset	123	* [:alpha:] Alphabetic characters.
7f561c08de6b Initial load duke parents: diff changeset	124	* [:blank:] Space and tab characters.
7f561c08de6b Initial load duke parents: diff changeset	125	* [:cntrl:] Control characters.
7f561c08de6b Initial load duke parents: diff changeset	126	* [:digit:] Numeric characters.
7f561c08de6b Initial load duke parents: diff changeset	127	* [:graph:] Characters that are printable and are also visible.
7f561c08de6b Initial load duke parents: diff changeset	128	* (A space is printable, but not visible, while an
7f561c08de6b Initial load duke parents: diff changeset	129	* `a' is both.)
7f561c08de6b Initial load duke parents: diff changeset	130	* [:lower:] Lower-case alphabetic characters.
7f561c08de6b Initial load duke parents: diff changeset	131	* [:print:] Printable characters (characters that are not
7f561c08de6b Initial load duke parents: diff changeset	132	* control characters.)
7f561c08de6b Initial load duke parents: diff changeset	133	* [:punct:] Punctuation characters (characters that are not letter,
7f561c08de6b Initial load duke parents: diff changeset	134	* digits, control characters, or space characters).
7f561c08de6b Initial load duke parents: diff changeset	135	* [:space:] Space characters (such as space, tab, and formfeed,
7f561c08de6b Initial load duke parents: diff changeset	136	* to name a few).
7f561c08de6b Initial load duke parents: diff changeset	137	* [:upper:] Upper-case alphabetic characters.
7f561c08de6b Initial load duke parents: diff changeset	138	* [:xdigit:] Characters that are hexadecimal digits.
7f561c08de6b Initial load duke parents: diff changeset	139	*
7f561c08de6b Initial load duke parents: diff changeset	140	*
7f561c08de6b Initial load duke parents: diff changeset	141	* <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b>
7f561c08de6b Initial load duke parents: diff changeset	142	*
7f561c08de6b Initial load duke parents: diff changeset	143	* [:javastart:] Start of a Java identifier
7f561c08de6b Initial load duke parents: diff changeset	144	* [:javapart:] Part of a Java identifier
7f561c08de6b Initial load duke parents: diff changeset	145	*
7f561c08de6b Initial load duke parents: diff changeset	146	*
7f561c08de6b Initial load duke parents: diff changeset	147	* <b><font face=times roman>Predefined Classes</font></b>
7f561c08de6b Initial load duke parents: diff changeset	148	*
7f561c08de6b Initial load duke parents: diff changeset	149	* . Matches any character other than newline
7f561c08de6b Initial load duke parents: diff changeset	150	* \w Matches a "word" character (alphanumeric plus "_")
7f561c08de6b Initial load duke parents: diff changeset	151	* \W Matches a non-word character
7f561c08de6b Initial load duke parents: diff changeset	152	* \s Matches a whitespace character
7f561c08de6b Initial load duke parents: diff changeset	153	* \S Matches a non-whitespace character
7f561c08de6b Initial load duke parents: diff changeset	154	* \d Matches a digit character
7f561c08de6b Initial load duke parents: diff changeset	155	* \D Matches a non-digit character
7f561c08de6b Initial load duke parents: diff changeset	156	*
7f561c08de6b Initial load duke parents: diff changeset	157	*
7f561c08de6b Initial load duke parents: diff changeset	158	* <b><font face=times roman>Boundary Matchers</font></b>
7f561c08de6b Initial load duke parents: diff changeset	159	*
7f561c08de6b Initial load duke parents: diff changeset	160	* ^ Matches only at the beginning of a line
7f561c08de6b Initial load duke parents: diff changeset	161	* $ Matches only at the end of a line
7f561c08de6b Initial load duke parents: diff changeset	162	* \b Matches only at a word boundary
7f561c08de6b Initial load duke parents: diff changeset	163	* \B Matches only at a non-word boundary
7f561c08de6b Initial load duke parents: diff changeset	164	*
7f561c08de6b Initial load duke parents: diff changeset	165	*
7f561c08de6b Initial load duke parents: diff changeset	166	* <b><font face=times roman>Greedy Closures</font></b>
7f561c08de6b Initial load duke parents: diff changeset	167	*
7f561c08de6b Initial load duke parents: diff changeset	168	* A* Matches A 0 or more times (greedy)
7f561c08de6b Initial load duke parents: diff changeset	169	* A+ Matches A 1 or more times (greedy)
7f561c08de6b Initial load duke parents: diff changeset	170	* A? Matches A 1 or 0 times (greedy)
7f561c08de6b Initial load duke parents: diff changeset	171	* A{n} Matches A exactly n times (greedy)
7f561c08de6b Initial load duke parents: diff changeset	172	* A{n,} Matches A at least n times (greedy)
7f561c08de6b Initial load duke parents: diff changeset	173	* A{n,m} Matches A at least n but not more than m times (greedy)
7f561c08de6b Initial load duke parents: diff changeset	174	*
7f561c08de6b Initial load duke parents: diff changeset	175	*
7f561c08de6b Initial load duke parents: diff changeset	176	* <b><font face=times roman>Reluctant Closures</font></b>
7f561c08de6b Initial load duke parents: diff changeset	177	*
7f561c08de6b Initial load duke parents: diff changeset	178	* A*? Matches A 0 or more times (reluctant)
7f561c08de6b Initial load duke parents: diff changeset	179	* A+? Matches A 1 or more times (reluctant)
7f561c08de6b Initial load duke parents: diff changeset	180	* A?? Matches A 0 or 1 times (reluctant)
7f561c08de6b Initial load duke parents: diff changeset	181	*
7f561c08de6b Initial load duke parents: diff changeset	182	*
7f561c08de6b Initial load duke parents: diff changeset	183	* <b><font face=times roman>Logical Operators</font></b>
7f561c08de6b Initial load duke parents: diff changeset	184	*
7f561c08de6b Initial load duke parents: diff changeset	185	* AB Matches A followed by B
7f561c08de6b Initial load duke parents: diff changeset	186	* A\|B Matches either A or B
7f561c08de6b Initial load duke parents: diff changeset	187	* (A) Used for subexpression grouping
7f561c08de6b Initial load duke parents: diff changeset	188	* (?:A) Used for subexpression clustering (just like grouping but
7f561c08de6b Initial load duke parents: diff changeset	189	* no backrefs)
7f561c08de6b Initial load duke parents: diff changeset	190	*
7f561c08de6b Initial load duke parents: diff changeset	191	*
7f561c08de6b Initial load duke parents: diff changeset	192	* <b><font face=times roman>Backreferences</font></b>
7f561c08de6b Initial load duke parents: diff changeset	193	*
7f561c08de6b Initial load duke parents: diff changeset	194	* \1 Backreference to 1st parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	195	* \2 Backreference to 2nd parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	196	* \3 Backreference to 3rd parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	197	* \4 Backreference to 4th parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	198	* \5 Backreference to 5th parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	199	* \6 Backreference to 6th parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	200	* \7 Backreference to 7th parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	201	* \8 Backreference to 8th parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	202	* \9 Backreference to 9th parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	203	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	204	*
7f561c08de6b Initial load duke parents: diff changeset	205	* <p>
7f561c08de6b Initial load duke parents: diff changeset	206	* All closure operators (+, *, ?, {m,n}) are greedy by default, meaning
7f561c08de6b Initial load duke parents: diff changeset	207	* that they match as many elements of the string as possible without
7f561c08de6b Initial load duke parents: diff changeset	208	* causing the overall match to fail. If you want a closure to be
7f561c08de6b Initial load duke parents: diff changeset	209	* reluctant (non-greedy), you can simply follow it with a '?'. A
7f561c08de6b Initial load duke parents: diff changeset	210	* reluctant closure will match as few elements of the string as
7f561c08de6b Initial load duke parents: diff changeset	211	* possible when finding matches. {m,n} closures don't currently
7f561c08de6b Initial load duke parents: diff changeset	212	* support reluctancy.
7f561c08de6b Initial load duke parents: diff changeset	213	*
7f561c08de6b Initial load duke parents: diff changeset	214	* <p>
7f561c08de6b Initial load duke parents: diff changeset	215	* <b><font face="times roman">Line terminators</font></b>
7f561c08de6b Initial load duke parents: diff changeset	216	* <br>
7f561c08de6b Initial load duke parents: diff changeset	217	* A line terminator is a one- or two-character sequence that marks
7f561c08de6b Initial load duke parents: diff changeset	218	* the end of a line of the input character sequence. The following
7f561c08de6b Initial load duke parents: diff changeset	219	* are recognized as line terminators:
7f561c08de6b Initial load duke parents: diff changeset	220	* <ul>
7f561c08de6b Initial load duke parents: diff changeset	221	* <li>A newline (line feed) character ('\n'),</li>
7f561c08de6b Initial load duke parents: diff changeset	222	* <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
7f561c08de6b Initial load duke parents: diff changeset	223	* <li>A standalone carriage-return character ('\r'),</li>
7f561c08de6b Initial load duke parents: diff changeset	224	* <li>A next-line character ('\u0085'),</li>
7f561c08de6b Initial load duke parents: diff changeset	225	* <li>A line-separator character ('\u2028'), or</li>
7f561c08de6b Initial load duke parents: diff changeset	226	* <li>A paragraph-separator character ('\u2029).</li>
7f561c08de6b Initial load duke parents: diff changeset	227	* </ul>
7f561c08de6b Initial load duke parents: diff changeset	228	*
7f561c08de6b Initial load duke parents: diff changeset	229	* <p>
7f561c08de6b Initial load duke parents: diff changeset	230	* RE runs programs compiled by the RECompiler class. But the RE
7f561c08de6b Initial load duke parents: diff changeset	231	* matcher class does not include the actual regular expression compiler
7f561c08de6b Initial load duke parents: diff changeset	232	* for reasons of efficiency. In fact, if you want to pre-compile one
7f561c08de6b Initial load duke parents: diff changeset	233	* or more regular expressions, the 'recompile' class can be invoked
7f561c08de6b Initial load duke parents: diff changeset	234	* from the command line to produce compiled output like this:
7f561c08de6b Initial load duke parents: diff changeset	235	*
7f561c08de6b Initial load duke parents: diff changeset	236	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	237	* // Pre-compiled regular expression "a*b"
7f561c08de6b Initial load duke parents: diff changeset	238	* char[] re1Instructions =
7f561c08de6b Initial load duke parents: diff changeset	239	* {
7f561c08de6b Initial load duke parents: diff changeset	240	* 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
7f561c08de6b Initial load duke parents: diff changeset	241	* 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
7f561c08de6b Initial load duke parents: diff changeset	242	* 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
7f561c08de6b Initial load duke parents: diff changeset	243	* 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
7f561c08de6b Initial load duke parents: diff changeset	244	* 0x0000,
7f561c08de6b Initial load duke parents: diff changeset	245	* };
7f561c08de6b Initial load duke parents: diff changeset	246	*
7f561c08de6b Initial load duke parents: diff changeset	247	*
7f561c08de6b Initial load duke parents: diff changeset	248	* REProgram re1 = new REProgram(re1Instructions);
7f561c08de6b Initial load duke parents: diff changeset	249	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	250	*
7f561c08de6b Initial load duke parents: diff changeset	251	* You can then construct a regular expression matcher (RE) object from
7f561c08de6b Initial load duke parents: diff changeset	252	* the pre-compiled expression re1 and thus avoid the overhead of
7f561c08de6b Initial load duke parents: diff changeset	253	* compiling the expression at runtime. If you require more dynamic
7f561c08de6b Initial load duke parents: diff changeset	254	* regular expressions, you can construct a single RECompiler object and
7f561c08de6b Initial load duke parents: diff changeset	255	* re-use it to compile each expression. Similarly, you can change the
7f561c08de6b Initial load duke parents: diff changeset	256	* program run by a given matcher object at any time. However, RE and
7f561c08de6b Initial load duke parents: diff changeset	257	* RECompiler are not threadsafe (for efficiency reasons, and because
7f561c08de6b Initial load duke parents: diff changeset	258	* requiring thread safety in this class is deemed to be a rare
7f561c08de6b Initial load duke parents: diff changeset	259	* requirement), so you will need to construct a separate compiler or
7f561c08de6b Initial load duke parents: diff changeset	260	* matcher object for each thread (unless you do thread synchronization
7f561c08de6b Initial load duke parents: diff changeset	261	* yourself). Once expression compiled into the REProgram object, REProgram
7f561c08de6b Initial load duke parents: diff changeset	262	* can be safely shared across multiple threads and RE objects.
7f561c08de6b Initial load duke parents: diff changeset	263	*
7f561c08de6b Initial load duke parents: diff changeset	264	* <br><p><br>
7f561c08de6b Initial load duke parents: diff changeset	265	*
7f561c08de6b Initial load duke parents: diff changeset	266	* <font color="red">
7f561c08de6b Initial load duke parents: diff changeset	267	* <i>ISSUES:</i>
7f561c08de6b Initial load duke parents: diff changeset	268	*
7f561c08de6b Initial load duke parents: diff changeset	269	* <ul>
7f561c08de6b Initial load duke parents: diff changeset	270	* <li>com.weusours.util.re is not currently compatible with all
7f561c08de6b Initial load duke parents: diff changeset	271	* standard POSIX regcomp flags</li>
7f561c08de6b Initial load duke parents: diff changeset	272	* <li>com.weusours.util.re does not support POSIX equivalence classes
7f561c08de6b Initial load duke parents: diff changeset	273	* ([=foo=] syntax) (I18N/locale issue)</li>
7f561c08de6b Initial load duke parents: diff changeset	274	* <li>com.weusours.util.re does not support nested POSIX character
7f561c08de6b Initial load duke parents: diff changeset	275	* classes (definitely should, but not completely trivial)</li>
7f561c08de6b Initial load duke parents: diff changeset	276	* <li>com.weusours.util.re Does not support POSIX character collation
7f561c08de6b Initial load duke parents: diff changeset	277	* concepts ([.foo.] syntax) (I18N/locale issue)</li>
7f561c08de6b Initial load duke parents: diff changeset	278	* <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li>
7f561c08de6b Initial load duke parents: diff changeset	279	* <li>Should RE support character iterators (for backwards RE matching!)?</li>
7f561c08de6b Initial load duke parents: diff changeset	280	* <li>Should RE support reluctant {m,n} closures (does anyone care)?</li>
7f561c08de6b Initial load duke parents: diff changeset	281	* <li>Not all possibilities are considered for greediness when backreferences
7f561c08de6b Initial load duke parents: diff changeset	282	* are involved (as POSIX suggests should be the case). The POSIX RE
7f561c08de6b Initial load duke parents: diff changeset	283	* "(ac)cd[ac]*\1", when matched against "acdacaa" should yield a match
7f561c08de6b Initial load duke parents: diff changeset	284	* of acdacaa where \1 is "a". This is not the case in this RE package,
7f561c08de6b Initial load duke parents: diff changeset	285	* and actually Perl doesn't go to this extent either! Until someone
7f561c08de6b Initial load duke parents: diff changeset	286	* actually complains about this, I'm not sure it's worth "fixing".
7f561c08de6b Initial load duke parents: diff changeset	287	* If it ever is fixed, test #137 in RETest.txt should be updated.</li>
7f561c08de6b Initial load duke parents: diff changeset	288	* </ul>
7f561c08de6b Initial load duke parents: diff changeset	289	*
7f561c08de6b Initial load duke parents: diff changeset	290	* </font>
7f561c08de6b Initial load duke parents: diff changeset	291	*
7f561c08de6b Initial load duke parents: diff changeset	292	* @see recompile
7f561c08de6b Initial load duke parents: diff changeset	293	* @see RECompiler
7f561c08de6b Initial load duke parents: diff changeset	294	*
7f561c08de6b Initial load duke parents: diff changeset	295	* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
7f561c08de6b Initial load duke parents: diff changeset	296	* @author <a href="mailto:ts@sch-fer.de">Tobias Schäfer</a>
7f561c08de6b Initial load duke parents: diff changeset	297	*/
7f561c08de6b Initial load duke parents: diff changeset	298	public class RE implements Serializable
7f561c08de6b Initial load duke parents: diff changeset	299	{
7f561c08de6b Initial load duke parents: diff changeset	300	/**
7f561c08de6b Initial load duke parents: diff changeset	301	* Specifies normal, case-sensitive matching behaviour.
7f561c08de6b Initial load duke parents: diff changeset	302	*/
7f561c08de6b Initial load duke parents: diff changeset	303	public static final int MATCH_NORMAL = 0x0000;
7f561c08de6b Initial load duke parents: diff changeset	304
7f561c08de6b Initial load duke parents: diff changeset	305	/**
7f561c08de6b Initial load duke parents: diff changeset	306	* Flag to indicate that matching should be case-independent (folded)
7f561c08de6b Initial load duke parents: diff changeset	307	*/
7f561c08de6b Initial load duke parents: diff changeset	308	public static final int MATCH_CASEINDEPENDENT = 0x0001;
7f561c08de6b Initial load duke parents: diff changeset	309
7f561c08de6b Initial load duke parents: diff changeset	310	/**
7f561c08de6b Initial load duke parents: diff changeset	311	* Newlines should match as BOL/EOL (^ and $)
7f561c08de6b Initial load duke parents: diff changeset	312	*/
7f561c08de6b Initial load duke parents: diff changeset	313	public static final int MATCH_MULTILINE = 0x0002;
7f561c08de6b Initial load duke parents: diff changeset	314
7f561c08de6b Initial load duke parents: diff changeset	315	/**
7f561c08de6b Initial load duke parents: diff changeset	316	* Consider all input a single body of text - newlines are matched by .
7f561c08de6b Initial load duke parents: diff changeset	317	*/
7f561c08de6b Initial load duke parents: diff changeset	318	public static final int MATCH_SINGLELINE = 0x0004;
7f561c08de6b Initial load duke parents: diff changeset	319
7f561c08de6b Initial load duke parents: diff changeset	320	/************************************************
7f561c08de6b Initial load duke parents: diff changeset	321	* *
7f561c08de6b Initial load duke parents: diff changeset	322	* The format of a node in a program is: *
7f561c08de6b Initial load duke parents: diff changeset	323	* *
7f561c08de6b Initial load duke parents: diff changeset	324	* [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *
7f561c08de6b Initial load duke parents: diff changeset	325	* *
7f561c08de6b Initial load duke parents: diff changeset	326	* char OPCODE - instruction *
7f561c08de6b Initial load duke parents: diff changeset	327	* char OPDATA - modifying data *
7f561c08de6b Initial load duke parents: diff changeset	328	* char OPNEXT - next node (relative offset) *
7f561c08de6b Initial load duke parents: diff changeset	329	* *
7f561c08de6b Initial load duke parents: diff changeset	330	************************************************/
7f561c08de6b Initial load duke parents: diff changeset	331
7f561c08de6b Initial load duke parents: diff changeset	332	// Opcode Char Opdata/Operand Meaning
7f561c08de6b Initial load duke parents: diff changeset	333	// ---------- ---------- --------------- --------------------------------------------------
7f561c08de6b Initial load duke parents: diff changeset	334	static final char OP_END = 'E'; // end of program
7f561c08de6b Initial load duke parents: diff changeset	335	static final char OP_BOL = '^'; // match only if at beginning of line
7f561c08de6b Initial load duke parents: diff changeset	336	static final char OP_EOL = '$'; // match only if at end of line
7f561c08de6b Initial load duke parents: diff changeset	337	static final char OP_ANY = '.'; // match any single character except newline
7f561c08de6b Initial load duke parents: diff changeset	338	static final char OP_ANYOF = '['; // count/ranges match any char in the list of ranges
7f561c08de6b Initial load duke parents: diff changeset	339	static final char OP_BRANCH = '\|'; // node match this alternative or the next one
7f561c08de6b Initial load duke parents: diff changeset	340	static final char OP_ATOM = 'A'; // length/string length of string followed by string itself
7f561c08de6b Initial load duke parents: diff changeset	341	static final char OP_STAR = '*'; // node kleene closure
7f561c08de6b Initial load duke parents: diff changeset	342	static final char OP_PLUS = '+'; // node positive closure
7f561c08de6b Initial load duke parents: diff changeset	343	static final char OP_MAYBE = '?'; // node optional closure
7f561c08de6b Initial load duke parents: diff changeset	344	static final char OP_ESCAPE = '\\'; // escape special escape code char class (escape is E_* code)
7f561c08de6b Initial load duke parents: diff changeset	345	static final char OP_OPEN = '('; // number nth opening paren
7f561c08de6b Initial load duke parents: diff changeset	346	static final char OP_OPEN_CLUSTER = '<'; // opening cluster
7f561c08de6b Initial load duke parents: diff changeset	347	static final char OP_CLOSE = ')'; // number nth closing paren
7f561c08de6b Initial load duke parents: diff changeset	348	static final char OP_CLOSE_CLUSTER = '>'; // closing cluster
7f561c08de6b Initial load duke parents: diff changeset	349	static final char OP_BACKREF = '#'; // number reference nth already matched parenthesized string
7f561c08de6b Initial load duke parents: diff changeset	350	static final char OP_GOTO = 'G'; // nothing but a (back-)pointer
7f561c08de6b Initial load duke parents: diff changeset	351	static final char OP_NOTHING = 'N'; // match null string such as in '(a\|)'
7f561c08de6b Initial load duke parents: diff changeset	352	static final char OP_RELUCTANTSTAR = '8'; // none/expr reluctant '' (mnemonic for char is unshifted '')
7f561c08de6b Initial load duke parents: diff changeset	353	static final char OP_RELUCTANTPLUS = '='; // none/expr reluctant '+' (mnemonic for char is unshifted '+')
7f561c08de6b Initial load duke parents: diff changeset	354	static final char OP_RELUCTANTMAYBE = '/'; // none/expr reluctant '?' (mnemonic for char is unshifted '?')
7f561c08de6b Initial load duke parents: diff changeset	355	static final char OP_POSIXCLASS = 'P'; // classid one of the posix character classes
7f561c08de6b Initial load duke parents: diff changeset	356
7f561c08de6b Initial load duke parents: diff changeset	357	// Escape codes
7f561c08de6b Initial load duke parents: diff changeset	358	static final char E_ALNUM = 'w'; // Alphanumeric
7f561c08de6b Initial load duke parents: diff changeset	359	static final char E_NALNUM = 'W'; // Non-alphanumeric
7f561c08de6b Initial load duke parents: diff changeset	360	static final char E_BOUND = 'b'; // Word boundary
7f561c08de6b Initial load duke parents: diff changeset	361	static final char E_NBOUND = 'B'; // Non-word boundary
7f561c08de6b Initial load duke parents: diff changeset	362	static final char E_SPACE = 's'; // Whitespace
7f561c08de6b Initial load duke parents: diff changeset	363	static final char E_NSPACE = 'S'; // Non-whitespace
7f561c08de6b Initial load duke parents: diff changeset	364	static final char E_DIGIT = 'd'; // Digit
7f561c08de6b Initial load duke parents: diff changeset	365	static final char E_NDIGIT = 'D'; // Non-digit
7f561c08de6b Initial load duke parents: diff changeset	366
7f561c08de6b Initial load duke parents: diff changeset	367	// Posix character classes
7f561c08de6b Initial load duke parents: diff changeset	368	static final char POSIX_CLASS_ALNUM = 'w'; // Alphanumerics
7f561c08de6b Initial load duke parents: diff changeset	369	static final char POSIX_CLASS_ALPHA = 'a'; // Alphabetics
7f561c08de6b Initial load duke parents: diff changeset	370	static final char POSIX_CLASS_BLANK = 'b'; // Blanks
7f561c08de6b Initial load duke parents: diff changeset	371	static final char POSIX_CLASS_CNTRL = 'c'; // Control characters
7f561c08de6b Initial load duke parents: diff changeset	372	static final char POSIX_CLASS_DIGIT = 'd'; // Digits
7f561c08de6b Initial load duke parents: diff changeset	373	static final char POSIX_CLASS_GRAPH = 'g'; // Graphic characters
7f561c08de6b Initial load duke parents: diff changeset	374	static final char POSIX_CLASS_LOWER = 'l'; // Lowercase characters
7f561c08de6b Initial load duke parents: diff changeset	375	static final char POSIX_CLASS_PRINT = 'p'; // Printable characters
7f561c08de6b Initial load duke parents: diff changeset	376	static final char POSIX_CLASS_PUNCT = '!'; // Punctuation
7f561c08de6b Initial load duke parents: diff changeset	377	static final char POSIX_CLASS_SPACE = 's'; // Spaces
7f561c08de6b Initial load duke parents: diff changeset	378	static final char POSIX_CLASS_UPPER = 'u'; // Uppercase characters
7f561c08de6b Initial load duke parents: diff changeset	379	static final char POSIX_CLASS_XDIGIT = 'x'; // Hexadecimal digits
7f561c08de6b Initial load duke parents: diff changeset	380	static final char POSIX_CLASS_JSTART = 'j'; // Java identifier start
7f561c08de6b Initial load duke parents: diff changeset	381	static final char POSIX_CLASS_JPART = 'k'; // Java identifier part
7f561c08de6b Initial load duke parents: diff changeset	382
7f561c08de6b Initial load duke parents: diff changeset	383	// Limits
7f561c08de6b Initial load duke parents: diff changeset	384	static final int maxNode = 65536; // Maximum number of nodes in a program
7f561c08de6b Initial load duke parents: diff changeset	385	static final int MAX_PAREN = 16; // Number of paren pairs (only 9 can be backrefs)
7f561c08de6b Initial load duke parents: diff changeset	386
7f561c08de6b Initial load duke parents: diff changeset	387	// Node layout constants
7f561c08de6b Initial load duke parents: diff changeset	388	static final int offsetOpcode = 0; // Opcode offset (first character)
7f561c08de6b Initial load duke parents: diff changeset	389	static final int offsetOpdata = 1; // Opdata offset (second char)
7f561c08de6b Initial load duke parents: diff changeset	390	static final int offsetNext = 2; // Next index offset (third char)
7f561c08de6b Initial load duke parents: diff changeset	391	static final int nodeSize = 3; // Node size (in chars)
7f561c08de6b Initial load duke parents: diff changeset	392
7f561c08de6b Initial load duke parents: diff changeset	393	// State of current program
7f561c08de6b Initial load duke parents: diff changeset	394	REProgram program; // Compiled regular expression 'program'
7f561c08de6b Initial load duke parents: diff changeset	395	transient CharacterIterator search; // The string being matched against
7f561c08de6b Initial load duke parents: diff changeset	396	int matchFlags; // Match behaviour flags
7f561c08de6b Initial load duke parents: diff changeset	397	int maxParen = MAX_PAREN;
7f561c08de6b Initial load duke parents: diff changeset	398
7f561c08de6b Initial load duke parents: diff changeset	399	// Parenthesized subexpressions
7f561c08de6b Initial load duke parents: diff changeset	400	transient int parenCount; // Number of subexpressions matched (num open parens + 1)
7f561c08de6b Initial load duke parents: diff changeset	401	transient int start0; // Cache of start[0]
7f561c08de6b Initial load duke parents: diff changeset	402	transient int end0; // Cache of start[0]
7f561c08de6b Initial load duke parents: diff changeset	403	transient int start1; // Cache of start[1]
7f561c08de6b Initial load duke parents: diff changeset	404	transient int end1; // Cache of start[1]
7f561c08de6b Initial load duke parents: diff changeset	405	transient int start2; // Cache of start[2]
7f561c08de6b Initial load duke parents: diff changeset	406	transient int end2; // Cache of start[2]
7f561c08de6b Initial load duke parents: diff changeset	407	transient int[] startn; // Lazy-alloced array of sub-expression starts
7f561c08de6b Initial load duke parents: diff changeset	408	transient int[] endn; // Lazy-alloced array of sub-expression ends
7f561c08de6b Initial load duke parents: diff changeset	409
7f561c08de6b Initial load duke parents: diff changeset	410	// Backreferences
7f561c08de6b Initial load duke parents: diff changeset	411	transient int[] startBackref; // Lazy-alloced array of backref starts
7f561c08de6b Initial load duke parents: diff changeset	412	transient int[] endBackref; // Lazy-alloced array of backref ends
7f561c08de6b Initial load duke parents: diff changeset	413
7f561c08de6b Initial load duke parents: diff changeset	414	/**
7f561c08de6b Initial load duke parents: diff changeset	415	* Constructs a regular expression matcher from a String by compiling it
7f561c08de6b Initial load duke parents: diff changeset	416	* using a new instance of RECompiler. If you will be compiling many
7f561c08de6b Initial load duke parents: diff changeset	417	* expressions, you may prefer to use a single RECompiler object instead.
7f561c08de6b Initial load duke parents: diff changeset	418	*
7f561c08de6b Initial load duke parents: diff changeset	419	* @param pattern The regular expression pattern to compile.
7f561c08de6b Initial load duke parents: diff changeset	420	* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load duke parents: diff changeset	421	* @see RECompiler
7f561c08de6b Initial load duke parents: diff changeset	422	* @see recompile
7f561c08de6b Initial load duke parents: diff changeset	423	*/
7f561c08de6b Initial load duke parents: diff changeset	424	public RE(String pattern) throws RESyntaxException
7f561c08de6b Initial load duke parents: diff changeset	425	{
7f561c08de6b Initial load duke parents: diff changeset	426	this(pattern, MATCH_NORMAL);
7f561c08de6b Initial load duke parents: diff changeset	427	}
7f561c08de6b Initial load duke parents: diff changeset	428
7f561c08de6b Initial load duke parents: diff changeset	429	/**
7f561c08de6b Initial load duke parents: diff changeset	430	* Constructs a regular expression matcher from a String by compiling it
7f561c08de6b Initial load duke parents: diff changeset	431	* using a new instance of RECompiler. If you will be compiling many
7f561c08de6b Initial load duke parents: diff changeset	432	* expressions, you may prefer to use a single RECompiler object instead.
7f561c08de6b Initial load duke parents: diff changeset	433	*
7f561c08de6b Initial load duke parents: diff changeset	434	* @param pattern The regular expression pattern to compile.
7f561c08de6b Initial load duke parents: diff changeset	435	* @param matchFlags The matching style
7f561c08de6b Initial load duke parents: diff changeset	436	* @exception RESyntaxException Thrown if the regular expression has invalid syntax.
7f561c08de6b Initial load duke parents: diff changeset	437	* @see RECompiler
7f561c08de6b Initial load duke parents: diff changeset	438	* @see recompile
7f561c08de6b Initial load duke parents: diff changeset	439	*/
7f561c08de6b Initial load duke parents: diff changeset	440	public RE(String pattern, int matchFlags) throws RESyntaxException
7f561c08de6b Initial load duke parents: diff changeset	441	{
7f561c08de6b Initial load duke parents: diff changeset	442	this(new RECompiler().compile(pattern));
7f561c08de6b Initial load duke parents: diff changeset	443	setMatchFlags(matchFlags);
7f561c08de6b Initial load duke parents: diff changeset	444	}
7f561c08de6b Initial load duke parents: diff changeset	445
7f561c08de6b Initial load duke parents: diff changeset	446	/**
7f561c08de6b Initial load duke parents: diff changeset	447	* Construct a matcher for a pre-compiled regular expression from program
7f561c08de6b Initial load duke parents: diff changeset	448	* (bytecode) data. Permits special flags to be passed in to modify matching
7f561c08de6b Initial load duke parents: diff changeset	449	* behaviour.
7f561c08de6b Initial load duke parents: diff changeset	450	*
7f561c08de6b Initial load duke parents: diff changeset	451	* @param program Compiled regular expression program (see RECompiler and/or recompile)
7f561c08de6b Initial load duke parents: diff changeset	452	* @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
7f561c08de6b Initial load duke parents: diff changeset	453	*
7f561c08de6b Initial load duke parents: diff changeset	454	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	455	* MATCH_NORMAL // Normal (case-sensitive) matching
7f561c08de6b Initial load duke parents: diff changeset	456	* MATCH_CASEINDEPENDENT // Case folded comparisons
7f561c08de6b Initial load duke parents: diff changeset	457	* MATCH_MULTILINE // Newline matches as BOL/EOL
7f561c08de6b Initial load duke parents: diff changeset	458	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	459	*
7f561c08de6b Initial load duke parents: diff changeset	460	* @see RECompiler
7f561c08de6b Initial load duke parents: diff changeset	461	* @see REProgram
7f561c08de6b Initial load duke parents: diff changeset	462	* @see recompile
7f561c08de6b Initial load duke parents: diff changeset	463	*/
7f561c08de6b Initial load duke parents: diff changeset	464	public RE(REProgram program, int matchFlags)
7f561c08de6b Initial load duke parents: diff changeset	465	{
7f561c08de6b Initial load duke parents: diff changeset	466	setProgram(program);
7f561c08de6b Initial load duke parents: diff changeset	467	setMatchFlags(matchFlags);
7f561c08de6b Initial load duke parents: diff changeset	468	}
7f561c08de6b Initial load duke parents: diff changeset	469
7f561c08de6b Initial load duke parents: diff changeset	470	/**
7f561c08de6b Initial load duke parents: diff changeset	471	* Construct a matcher for a pre-compiled regular expression from program
7f561c08de6b Initial load duke parents: diff changeset	472	* (bytecode) data.
7f561c08de6b Initial load duke parents: diff changeset	473	*
7f561c08de6b Initial load duke parents: diff changeset	474	* @param program Compiled regular expression program
7f561c08de6b Initial load duke parents: diff changeset	475	* @see RECompiler
7f561c08de6b Initial load duke parents: diff changeset	476	* @see recompile
7f561c08de6b Initial load duke parents: diff changeset	477	*/
7f561c08de6b Initial load duke parents: diff changeset	478	public RE(REProgram program)
7f561c08de6b Initial load duke parents: diff changeset	479	{
7f561c08de6b Initial load duke parents: diff changeset	480	this(program, MATCH_NORMAL);
7f561c08de6b Initial load duke parents: diff changeset	481	}
7f561c08de6b Initial load duke parents: diff changeset	482
7f561c08de6b Initial load duke parents: diff changeset	483	/**
7f561c08de6b Initial load duke parents: diff changeset	484	* Constructs a regular expression matcher with no initial program.
7f561c08de6b Initial load duke parents: diff changeset	485	* This is likely to be an uncommon practice, but is still supported.
7f561c08de6b Initial load duke parents: diff changeset	486	*/
7f561c08de6b Initial load duke parents: diff changeset	487	public RE()
7f561c08de6b Initial load duke parents: diff changeset	488	{
7f561c08de6b Initial load duke parents: diff changeset	489	this((REProgram)null, MATCH_NORMAL);
7f561c08de6b Initial load duke parents: diff changeset	490	}
7f561c08de6b Initial load duke parents: diff changeset	491
7f561c08de6b Initial load duke parents: diff changeset	492	/**
7f561c08de6b Initial load duke parents: diff changeset	493	* Converts a 'simplified' regular expression to a full regular expression
7f561c08de6b Initial load duke parents: diff changeset	494	*
7f561c08de6b Initial load duke parents: diff changeset	495	* @param pattern The pattern to convert
7f561c08de6b Initial load duke parents: diff changeset	496	* @return The full regular expression
7f561c08de6b Initial load duke parents: diff changeset	497	*/
7f561c08de6b Initial load duke parents: diff changeset	498	public static String simplePatternToFullRegularExpression(String pattern)
7f561c08de6b Initial load duke parents: diff changeset	499	{
7f561c08de6b Initial load duke parents: diff changeset	500	StringBuffer buf = new StringBuffer();
7f561c08de6b Initial load duke parents: diff changeset	501	for (int i = 0; i < pattern.length(); i++)
7f561c08de6b Initial load duke parents: diff changeset	502	{
7f561c08de6b Initial load duke parents: diff changeset	503	char c = pattern.charAt(i);
7f561c08de6b Initial load duke parents: diff changeset	504	switch (c)
7f561c08de6b Initial load duke parents: diff changeset	505	{
7f561c08de6b Initial load duke parents: diff changeset	506	case '*':
7f561c08de6b Initial load duke parents: diff changeset	507	buf.append(".*");
7f561c08de6b Initial load duke parents: diff changeset	508	break;
7f561c08de6b Initial load duke parents: diff changeset	509
7f561c08de6b Initial load duke parents: diff changeset	510	case '.':
7f561c08de6b Initial load duke parents: diff changeset	511	case '[':
7f561c08de6b Initial load duke parents: diff changeset	512	case ']':
7f561c08de6b Initial load duke parents: diff changeset	513	case '\\':
7f561c08de6b Initial load duke parents: diff changeset	514	case '+':
7f561c08de6b Initial load duke parents: diff changeset	515	case '?':
7f561c08de6b Initial load duke parents: diff changeset	516	case '{':
7f561c08de6b Initial load duke parents: diff changeset	517	case '}':
7f561c08de6b Initial load duke parents: diff changeset	518	case '$':
7f561c08de6b Initial load duke parents: diff changeset	519	case '^':
7f561c08de6b Initial load duke parents: diff changeset	520	case '\|':
7f561c08de6b Initial load duke parents: diff changeset	521	case '(':
7f561c08de6b Initial load duke parents: diff changeset	522	case ')':
7f561c08de6b Initial load duke parents: diff changeset	523	buf.append('\\');
7f561c08de6b Initial load duke parents: diff changeset	524	default:
7f561c08de6b Initial load duke parents: diff changeset	525	buf.append(c);
7f561c08de6b Initial load duke parents: diff changeset	526	break;
7f561c08de6b Initial load duke parents: diff changeset	527	}
7f561c08de6b Initial load duke parents: diff changeset	528	}
7f561c08de6b Initial load duke parents: diff changeset	529	return buf.toString();
7f561c08de6b Initial load duke parents: diff changeset	530	}
7f561c08de6b Initial load duke parents: diff changeset	531
7f561c08de6b Initial load duke parents: diff changeset	532	/**
7f561c08de6b Initial load duke parents: diff changeset	533	* Sets match behaviour flags which alter the way RE does matching.
7f561c08de6b Initial load duke parents: diff changeset	534	* @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
7f561c08de6b Initial load duke parents: diff changeset	535	*
7f561c08de6b Initial load duke parents: diff changeset	536	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	537	* MATCH_NORMAL // Normal (case-sensitive) matching
7f561c08de6b Initial load duke parents: diff changeset	538	* MATCH_CASEINDEPENDENT // Case folded comparisons
7f561c08de6b Initial load duke parents: diff changeset	539	* MATCH_MULTILINE // Newline matches as BOL/EOL
7f561c08de6b Initial load duke parents: diff changeset	540	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	541	*/
7f561c08de6b Initial load duke parents: diff changeset	542	public void setMatchFlags(int matchFlags)
7f561c08de6b Initial load duke parents: diff changeset	543	{
7f561c08de6b Initial load duke parents: diff changeset	544	this.matchFlags = matchFlags;
7f561c08de6b Initial load duke parents: diff changeset	545	}
7f561c08de6b Initial load duke parents: diff changeset	546
7f561c08de6b Initial load duke parents: diff changeset	547	/**
7f561c08de6b Initial load duke parents: diff changeset	548	* Returns the current match behaviour flags.
7f561c08de6b Initial load duke parents: diff changeset	549	* @return Current match behaviour flags (RE.MATCH_*).
7f561c08de6b Initial load duke parents: diff changeset	550	*
7f561c08de6b Initial load duke parents: diff changeset	551	* <pre>
7f561c08de6b Initial load duke parents: diff changeset	552	* MATCH_NORMAL // Normal (case-sensitive) matching
7f561c08de6b Initial load duke parents: diff changeset	553	* MATCH_CASEINDEPENDENT // Case folded comparisons
7f561c08de6b Initial load duke parents: diff changeset	554	* MATCH_MULTILINE // Newline matches as BOL/EOL
7f561c08de6b Initial load duke parents: diff changeset	555	* </pre>
7f561c08de6b Initial load duke parents: diff changeset	556	*
7f561c08de6b Initial load duke parents: diff changeset	557	* @see #setMatchFlags
7f561c08de6b Initial load duke parents: diff changeset	558	*/
7f561c08de6b Initial load duke parents: diff changeset	559	public int getMatchFlags()
7f561c08de6b Initial load duke parents: diff changeset	560	{
7f561c08de6b Initial load duke parents: diff changeset	561	return matchFlags;
7f561c08de6b Initial load duke parents: diff changeset	562	}
7f561c08de6b Initial load duke parents: diff changeset	563
7f561c08de6b Initial load duke parents: diff changeset	564	/**
7f561c08de6b Initial load duke parents: diff changeset	565	* Sets the current regular expression program used by this matcher object.
7f561c08de6b Initial load duke parents: diff changeset	566	*
7f561c08de6b Initial load duke parents: diff changeset	567	* @param program Regular expression program compiled by RECompiler.
7f561c08de6b Initial load duke parents: diff changeset	568	* @see RECompiler
7f561c08de6b Initial load duke parents: diff changeset	569	* @see REProgram
7f561c08de6b Initial load duke parents: diff changeset	570	* @see recompile
7f561c08de6b Initial load duke parents: diff changeset	571	*/
7f561c08de6b Initial load duke parents: diff changeset	572	public void setProgram(REProgram program)
7f561c08de6b Initial load duke parents: diff changeset	573	{
7f561c08de6b Initial load duke parents: diff changeset	574	this.program = program;
7f561c08de6b Initial load duke parents: diff changeset	575	if (program != null && program.maxParens != -1) {
7f561c08de6b Initial load duke parents: diff changeset	576	this.maxParen = program.maxParens;
7f561c08de6b Initial load duke parents: diff changeset	577	} else {
7f561c08de6b Initial load duke parents: diff changeset	578	this.maxParen = MAX_PAREN;
7f561c08de6b Initial load duke parents: diff changeset	579	}
7f561c08de6b Initial load duke parents: diff changeset	580	}
7f561c08de6b Initial load duke parents: diff changeset	581
7f561c08de6b Initial load duke parents: diff changeset	582	/**
7f561c08de6b Initial load duke parents: diff changeset	583	* Returns the current regular expression program in use by this matcher object.
7f561c08de6b Initial load duke parents: diff changeset	584	*
7f561c08de6b Initial load duke parents: diff changeset	585	* @return Regular expression program
7f561c08de6b Initial load duke parents: diff changeset	586	* @see #setProgram
7f561c08de6b Initial load duke parents: diff changeset	587	*/
7f561c08de6b Initial load duke parents: diff changeset	588	public REProgram getProgram()
7f561c08de6b Initial load duke parents: diff changeset	589	{
7f561c08de6b Initial load duke parents: diff changeset	590	return program;
7f561c08de6b Initial load duke parents: diff changeset	591	}
7f561c08de6b Initial load duke parents: diff changeset	592
7f561c08de6b Initial load duke parents: diff changeset	593	/**
7f561c08de6b Initial load duke parents: diff changeset	594	* Returns the number of parenthesized subexpressions available after a successful match.
7f561c08de6b Initial load duke parents: diff changeset	595	*
7f561c08de6b Initial load duke parents: diff changeset	596	* @return Number of available parenthesized subexpressions
7f561c08de6b Initial load duke parents: diff changeset	597	*/
7f561c08de6b Initial load duke parents: diff changeset	598	public int getParenCount()
7f561c08de6b Initial load duke parents: diff changeset	599	{
7f561c08de6b Initial load duke parents: diff changeset	600	return parenCount;
7f561c08de6b Initial load duke parents: diff changeset	601	}
7f561c08de6b Initial load duke parents: diff changeset	602
7f561c08de6b Initial load duke parents: diff changeset	603	/**
7f561c08de6b Initial load duke parents: diff changeset	604	* Gets the contents of a parenthesized subexpression after a successful match.
7f561c08de6b Initial load duke parents: diff changeset	605	*
7f561c08de6b Initial load duke parents: diff changeset	606	* @param which Nesting level of subexpression
7f561c08de6b Initial load duke parents: diff changeset	607	* @return String
7f561c08de6b Initial load duke parents: diff changeset	608	*/
7f561c08de6b Initial load duke parents: diff changeset	609	public String getParen(int which)
7f561c08de6b Initial load duke parents: diff changeset	610	{
7f561c08de6b Initial load duke parents: diff changeset	611	int start;
7f561c08de6b Initial load duke parents: diff changeset	612	if (which < parenCount && (start = getParenStart(which)) >= 0)
7f561c08de6b Initial load duke parents: diff changeset	613	{
7f561c08de6b Initial load duke parents: diff changeset	614	return search.substring(start, getParenEnd(which));
7f561c08de6b Initial load duke parents: diff changeset	615	}
7f561c08de6b Initial load duke parents: diff changeset	616	return null;
7f561c08de6b Initial load duke parents: diff changeset	617	}
7f561c08de6b Initial load duke parents: diff changeset	618
7f561c08de6b Initial load duke parents: diff changeset	619	/**
7f561c08de6b Initial load duke parents: diff changeset	620	* Returns the start index of a given paren level.
7f561c08de6b Initial load duke parents: diff changeset	621	*
7f561c08de6b Initial load duke parents: diff changeset	622	* @param which Nesting level of subexpression
7f561c08de6b Initial load duke parents: diff changeset	623	* @return String index
7f561c08de6b Initial load duke parents: diff changeset	624	*/
7f561c08de6b Initial load duke parents: diff changeset	625	public final int getParenStart(int which)
7f561c08de6b Initial load duke parents: diff changeset	626	{
7f561c08de6b Initial load duke parents: diff changeset	627	if (which < parenCount)
7f561c08de6b Initial load duke parents: diff changeset	628	{
7f561c08de6b Initial load duke parents: diff changeset	629	switch (which)
7f561c08de6b Initial load duke parents: diff changeset	630	{
7f561c08de6b Initial load duke parents: diff changeset	631	case 0:
7f561c08de6b Initial load duke parents: diff changeset	632	return start0;
7f561c08de6b Initial load duke parents: diff changeset	633
7f561c08de6b Initial load duke parents: diff changeset	634	case 1:
7f561c08de6b Initial load duke parents: diff changeset	635	return start1;
7f561c08de6b Initial load duke parents: diff changeset	636
7f561c08de6b Initial load duke parents: diff changeset	637	case 2:
7f561c08de6b Initial load duke parents: diff changeset	638	return start2;
7f561c08de6b Initial load duke parents: diff changeset	639
7f561c08de6b Initial load duke parents: diff changeset	640	default:
7f561c08de6b Initial load duke parents: diff changeset	641	if (startn == null)
7f561c08de6b Initial load duke parents: diff changeset	642	{
7f561c08de6b Initial load duke parents: diff changeset	643	allocParens();
7f561c08de6b Initial load duke parents: diff changeset	644	}
7f561c08de6b Initial load duke parents: diff changeset	645	return startn[which];
7f561c08de6b Initial load duke parents: diff changeset	646	}
7f561c08de6b Initial load duke parents: diff changeset	647	}
7f561c08de6b Initial load duke parents: diff changeset	648	return -1;
7f561c08de6b Initial load duke parents: diff changeset	649	}
7f561c08de6b Initial load duke parents: diff changeset	650
7f561c08de6b Initial load duke parents: diff changeset	651	/**
7f561c08de6b Initial load duke parents: diff changeset	652	* Returns the end index of a given paren level.
7f561c08de6b Initial load duke parents: diff changeset	653	*
7f561c08de6b Initial load duke parents: diff changeset	654	* @param which Nesting level of subexpression
7f561c08de6b Initial load duke parents: diff changeset	655	* @return String index
7f561c08de6b Initial load duke parents: diff changeset	656	*/
7f561c08de6b Initial load duke parents: diff changeset	657	public final int getParenEnd(int which)
7f561c08de6b Initial load duke parents: diff changeset	658	{
7f561c08de6b Initial load duke parents: diff changeset	659	if (which < parenCount)
7f561c08de6b Initial load duke parents: diff changeset	660	{
7f561c08de6b Initial load duke parents: diff changeset	661	switch (which)
7f561c08de6b Initial load duke parents: diff changeset	662	{
7f561c08de6b Initial load duke parents: diff changeset	663	case 0:
7f561c08de6b Initial load duke parents: diff changeset	664	return end0;
7f561c08de6b Initial load duke parents: diff changeset	665
7f561c08de6b Initial load duke parents: diff changeset	666	case 1:
7f561c08de6b Initial load duke parents: diff changeset	667	return end1;
7f561c08de6b Initial load duke parents: diff changeset	668
7f561c08de6b Initial load duke parents: diff changeset	669	case 2:
7f561c08de6b Initial load duke parents: diff changeset	670	return end2;
7f561c08de6b Initial load duke parents: diff changeset	671
7f561c08de6b Initial load duke parents: diff changeset	672	default:
7f561c08de6b Initial load duke parents: diff changeset	673	if (endn == null)
7f561c08de6b Initial load duke parents: diff changeset	674	{
7f561c08de6b Initial load duke parents: diff changeset	675	allocParens();
7f561c08de6b Initial load duke parents: diff changeset	676	}
7f561c08de6b Initial load duke parents: diff changeset	677	return endn[which];
7f561c08de6b Initial load duke parents: diff changeset	678	}
7f561c08de6b Initial load duke parents: diff changeset	679	}
7f561c08de6b Initial load duke parents: diff changeset	680	return -1;
7f561c08de6b Initial load duke parents: diff changeset	681	}
7f561c08de6b Initial load duke parents: diff changeset	682
7f561c08de6b Initial load duke parents: diff changeset	683	/**
7f561c08de6b Initial load duke parents: diff changeset	684	* Returns the length of a given paren level.
7f561c08de6b Initial load duke parents: diff changeset	685	*
7f561c08de6b Initial load duke parents: diff changeset	686	* @param which Nesting level of subexpression
7f561c08de6b Initial load duke parents: diff changeset	687	* @return Number of characters in the parenthesized subexpression
7f561c08de6b Initial load duke parents: diff changeset	688	*/
7f561c08de6b Initial load duke parents: diff changeset	689	public final int getParenLength(int which)
7f561c08de6b Initial load duke parents: diff changeset	690	{
7f561c08de6b Initial load duke parents: diff changeset	691	if (which < parenCount)
7f561c08de6b Initial load duke parents: diff changeset	692	{
7f561c08de6b Initial load duke parents: diff changeset	693	return getParenEnd(which) - getParenStart(which);
7f561c08de6b Initial load duke parents: diff changeset	694	}
7f561c08de6b Initial load duke parents: diff changeset	695	return -1;
7f561c08de6b Initial load duke parents: diff changeset	696	}
7f561c08de6b Initial load duke parents: diff changeset	697
7f561c08de6b Initial load duke parents: diff changeset	698	/**
7f561c08de6b Initial load duke parents: diff changeset	699	* Sets the start of a paren level
7f561c08de6b Initial load duke parents: diff changeset	700	*
7f561c08de6b Initial load duke parents: diff changeset	701	* @param which Which paren level
7f561c08de6b Initial load duke parents: diff changeset	702	* @param i Index in input array
7f561c08de6b Initial load duke parents: diff changeset	703	*/
7f561c08de6b Initial load duke parents: diff changeset	704	protected final void setParenStart(int which, int i)
7f561c08de6b Initial load duke parents: diff changeset	705	{
7f561c08de6b Initial load duke parents: diff changeset	706	if (which < parenCount)
7f561c08de6b Initial load duke parents: diff changeset	707	{
7f561c08de6b Initial load duke parents: diff changeset	708	switch (which)
7f561c08de6b Initial load duke parents: diff changeset	709	{
7f561c08de6b Initial load duke parents: diff changeset	710	case 0:
7f561c08de6b Initial load duke parents: diff changeset	711	start0 = i;
7f561c08de6b Initial load duke parents: diff changeset	712	break;
7f561c08de6b Initial load duke parents: diff changeset	713
7f561c08de6b Initial load duke parents: diff changeset	714	case 1:
7f561c08de6b Initial load duke parents: diff changeset	715	start1 = i;
7f561c08de6b Initial load duke parents: diff changeset	716	break;
7f561c08de6b Initial load duke parents: diff changeset	717
7f561c08de6b Initial load duke parents: diff changeset	718	case 2:
7f561c08de6b Initial load duke parents: diff changeset	719	start2 = i;
7f561c08de6b Initial load duke parents: diff changeset	720	break;
7f561c08de6b Initial load duke parents: diff changeset	721
7f561c08de6b Initial load duke parents: diff changeset	722	default:
7f561c08de6b Initial load duke parents: diff changeset	723	if (startn == null)
7f561c08de6b Initial load duke parents: diff changeset	724	{
7f561c08de6b Initial load duke parents: diff changeset	725	allocParens();
7f561c08de6b Initial load duke parents: diff changeset	726	}
7f561c08de6b Initial load duke parents: diff changeset	727	startn[which] = i;
7f561c08de6b Initial load duke parents: diff changeset	728	break;
7f561c08de6b Initial load duke parents: diff changeset	729	}
7f561c08de6b Initial load duke parents: diff changeset	730	}
7f561c08de6b Initial load duke parents: diff changeset	731	}
7f561c08de6b Initial load duke parents: diff changeset	732
7f561c08de6b Initial load duke parents: diff changeset	733	/**
7f561c08de6b Initial load duke parents: diff changeset	734	* Sets the end of a paren level
7f561c08de6b Initial load duke parents: diff changeset	735	*
7f561c08de6b Initial load duke parents: diff changeset	736	* @param which Which paren level
7f561c08de6b Initial load duke parents: diff changeset	737	* @param i Index in input array
7f561c08de6b Initial load duke parents: diff changeset	738	*/
7f561c08de6b Initial load duke parents: diff changeset	739	protected final void setParenEnd(int which, int i)
7f561c08de6b Initial load duke parents: diff changeset	740	{
7f561c08de6b Initial load duke parents: diff changeset	741	if (which < parenCount)
7f561c08de6b Initial load duke parents: diff changeset	742	{
7f561c08de6b Initial load duke parents: diff changeset	743	switch (which)
7f561c08de6b Initial load duke parents: diff changeset	744	{
7f561c08de6b Initial load duke parents: diff changeset	745	case 0:
7f561c08de6b Initial load duke parents: diff changeset	746	end0 = i;
7f561c08de6b Initial load duke parents: diff changeset	747	break;
7f561c08de6b Initial load duke parents: diff changeset	748
7f561c08de6b Initial load duke parents: diff changeset	749	case 1:
7f561c08de6b Initial load duke parents: diff changeset	750	end1 = i;
7f561c08de6b Initial load duke parents: diff changeset	751	break;
7f561c08de6b Initial load duke parents: diff changeset	752
7f561c08de6b Initial load duke parents: diff changeset	753	case 2:
7f561c08de6b Initial load duke parents: diff changeset	754	end2 = i;
7f561c08de6b Initial load duke parents: diff changeset	755	break;
7f561c08de6b Initial load duke parents: diff changeset	756
7f561c08de6b Initial load duke parents: diff changeset	757	default:
7f561c08de6b Initial load duke parents: diff changeset	758	if (endn == null)
7f561c08de6b Initial load duke parents: diff changeset	759	{
7f561c08de6b Initial load duke parents: diff changeset	760	allocParens();
7f561c08de6b Initial load duke parents: diff changeset	761	}
7f561c08de6b Initial load duke parents: diff changeset	762	endn[which] = i;
7f561c08de6b Initial load duke parents: diff changeset	763	break;
7f561c08de6b Initial load duke parents: diff changeset	764	}
7f561c08de6b Initial load duke parents: diff changeset	765	}
7f561c08de6b Initial load duke parents: diff changeset	766	}
7f561c08de6b Initial load duke parents: diff changeset	767
7f561c08de6b Initial load duke parents: diff changeset	768	/**
7f561c08de6b Initial load duke parents: diff changeset	769	* Throws an Error representing an internal error condition probably resulting
7f561c08de6b Initial load duke parents: diff changeset	770	* from a bug in the regular expression compiler (or possibly data corruption).
7f561c08de6b Initial load duke parents: diff changeset	771	* In practice, this should be very rare.
7f561c08de6b Initial load duke parents: diff changeset	772	*
7f561c08de6b Initial load duke parents: diff changeset	773	* @param s Error description
7f561c08de6b Initial load duke parents: diff changeset	774	*/
7f561c08de6b Initial load duke parents: diff changeset	775	protected void internalError(String s) throws Error
7f561c08de6b Initial load duke parents: diff changeset	776	{
7f561c08de6b Initial load duke parents: diff changeset	777	throw new Error("RE internal error: " + s);
7f561c08de6b Initial load duke parents: diff changeset	778	}
7f561c08de6b Initial load duke parents: diff changeset	779
7f561c08de6b Initial load duke parents: diff changeset	780	/**
7f561c08de6b Initial load duke parents: diff changeset	781	* Performs lazy allocation of subexpression arrays
7f561c08de6b Initial load duke parents: diff changeset	782	*/
7f561c08de6b Initial load duke parents: diff changeset	783	private final void allocParens()
7f561c08de6b Initial load duke parents: diff changeset	784	{
7f561c08de6b Initial load duke parents: diff changeset	785	// Allocate arrays for subexpressions
7f561c08de6b Initial load duke parents: diff changeset	786	startn = new int[maxParen];
7f561c08de6b Initial load duke parents: diff changeset	787	endn = new int[maxParen];
7f561c08de6b Initial load duke parents: diff changeset	788
7f561c08de6b Initial load duke parents: diff changeset	789	// Set sub-expression pointers to invalid values
7f561c08de6b Initial load duke parents: diff changeset	790	for (int i = 0; i < maxParen; i++)
7f561c08de6b Initial load duke parents: diff changeset	791	{
7f561c08de6b Initial load duke parents: diff changeset	792	startn[i] = -1;
7f561c08de6b Initial load duke parents: diff changeset	793	endn[i] = -1;
7f561c08de6b Initial load duke parents: diff changeset	794	}
7f561c08de6b Initial load duke parents: diff changeset	795	}
7f561c08de6b Initial load duke parents: diff changeset	796
7f561c08de6b Initial load duke parents: diff changeset	797	/**
7f561c08de6b Initial load duke parents: diff changeset	798	* Try to match a string against a subset of nodes in the program
7f561c08de6b Initial load duke parents: diff changeset	799	*
7f561c08de6b Initial load duke parents: diff changeset	800	* @param firstNode Node to start at in program
7f561c08de6b Initial load duke parents: diff changeset	801	* @param lastNode Last valid node (used for matching a subexpression without
7f561c08de6b Initial load duke parents: diff changeset	802	* matching the rest of the program as well).
7f561c08de6b Initial load duke parents: diff changeset	803	* @param idxStart Starting position in character array
7f561c08de6b Initial load duke parents: diff changeset	804	* @return Final input array index if match succeeded. -1 if not.
7f561c08de6b Initial load duke parents: diff changeset	805	*/
7f561c08de6b Initial load duke parents: diff changeset	806	protected int matchNodes(int firstNode, int lastNode, int idxStart)
7f561c08de6b Initial load duke parents: diff changeset	807	{
7f561c08de6b Initial load duke parents: diff changeset	808	// Our current place in the string
7f561c08de6b Initial load duke parents: diff changeset	809	int idx = idxStart;
7f561c08de6b Initial load duke parents: diff changeset	810
7f561c08de6b Initial load duke parents: diff changeset	811	// Loop while node is valid
7f561c08de6b Initial load duke parents: diff changeset	812	int next, opcode, opdata;
7f561c08de6b Initial load duke parents: diff changeset	813	int idxNew;
7f561c08de6b Initial load duke parents: diff changeset	814	char[] instruction = program.instruction;
7f561c08de6b Initial load duke parents: diff changeset	815	for (int node = firstNode; node < lastNode; )
7f561c08de6b Initial load duke parents: diff changeset	816	{
7f561c08de6b Initial load duke parents: diff changeset	817	opcode = instruction[node + offsetOpcode];
7f561c08de6b Initial load duke parents: diff changeset	818	next = node + (short)instruction[node + offsetNext];
7f561c08de6b Initial load duke parents: diff changeset	819	opdata = instruction[node + offsetOpdata];
7f561c08de6b Initial load duke parents: diff changeset	820
7f561c08de6b Initial load duke parents: diff changeset	821	switch (opcode)
7f561c08de6b Initial load duke parents: diff changeset	822	{
7f561c08de6b Initial load duke parents: diff changeset	823	case OP_RELUCTANTMAYBE:
7f561c08de6b Initial load duke parents: diff changeset	824	{
7f561c08de6b Initial load duke parents: diff changeset	825	int once = 0;
7f561c08de6b Initial load duke parents: diff changeset	826	do
7f561c08de6b Initial load duke parents: diff changeset	827	{
7f561c08de6b Initial load duke parents: diff changeset	828	// Try to match the rest without using the reluctant subexpr
7f561c08de6b Initial load duke parents: diff changeset	829	if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	830	{
7f561c08de6b Initial load duke parents: diff changeset	831	return idxNew;
7f561c08de6b Initial load duke parents: diff changeset	832	}
7f561c08de6b Initial load duke parents: diff changeset	833	}
7f561c08de6b Initial load duke parents: diff changeset	834	while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1);
7f561c08de6b Initial load duke parents: diff changeset	835	return -1;
7f561c08de6b Initial load duke parents: diff changeset	836	}
7f561c08de6b Initial load duke parents: diff changeset	837
7f561c08de6b Initial load duke parents: diff changeset	838	case OP_RELUCTANTPLUS:
7f561c08de6b Initial load duke parents: diff changeset	839	while ((idx = matchNodes(node + nodeSize, next, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	840	{
7f561c08de6b Initial load duke parents: diff changeset	841	// Try to match the rest without using the reluctant subexpr
7f561c08de6b Initial load duke parents: diff changeset	842	if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	843	{
7f561c08de6b Initial load duke parents: diff changeset	844	return idxNew;
7f561c08de6b Initial load duke parents: diff changeset	845	}
7f561c08de6b Initial load duke parents: diff changeset	846	}
7f561c08de6b Initial load duke parents: diff changeset	847	return -1;
7f561c08de6b Initial load duke parents: diff changeset	848
7f561c08de6b Initial load duke parents: diff changeset	849	case OP_RELUCTANTSTAR:
7f561c08de6b Initial load duke parents: diff changeset	850	do
7f561c08de6b Initial load duke parents: diff changeset	851	{
7f561c08de6b Initial load duke parents: diff changeset	852	// Try to match the rest without using the reluctant subexpr
7f561c08de6b Initial load duke parents: diff changeset	853	if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	854	{
7f561c08de6b Initial load duke parents: diff changeset	855	return idxNew;
7f561c08de6b Initial load duke parents: diff changeset	856	}
7f561c08de6b Initial load duke parents: diff changeset	857	}
7f561c08de6b Initial load duke parents: diff changeset	858	while ((idx = matchNodes(node + nodeSize, next, idx)) != -1);
7f561c08de6b Initial load duke parents: diff changeset	859	return -1;
7f561c08de6b Initial load duke parents: diff changeset	860
7f561c08de6b Initial load duke parents: diff changeset	861	case OP_OPEN:
7f561c08de6b Initial load duke parents: diff changeset	862
7f561c08de6b Initial load duke parents: diff changeset	863	// Match subexpression
7f561c08de6b Initial load duke parents: diff changeset	864	if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
7f561c08de6b Initial load duke parents: diff changeset	865	{
7f561c08de6b Initial load duke parents: diff changeset	866	startBackref[opdata] = idx;
7f561c08de6b Initial load duke parents: diff changeset	867	}
7f561c08de6b Initial load duke parents: diff changeset	868	if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	869	{
7f561c08de6b Initial load duke parents: diff changeset	870	// Increase valid paren count
7f561c08de6b Initial load duke parents: diff changeset	871	if ((opdata + 1) > parenCount)
7f561c08de6b Initial load duke parents: diff changeset	872	{
7f561c08de6b Initial load duke parents: diff changeset	873	parenCount = opdata + 1;
7f561c08de6b Initial load duke parents: diff changeset	874	}
7f561c08de6b Initial load duke parents: diff changeset	875
7f561c08de6b Initial load duke parents: diff changeset	876	// Don't set paren if already set later on
7f561c08de6b Initial load duke parents: diff changeset	877	if (getParenStart(opdata) == -1)
7f561c08de6b Initial load duke parents: diff changeset	878	{
7f561c08de6b Initial load duke parents: diff changeset	879	setParenStart(opdata, idx);
7f561c08de6b Initial load duke parents: diff changeset	880	}
7f561c08de6b Initial load duke parents: diff changeset	881	}
7f561c08de6b Initial load duke parents: diff changeset	882	return idxNew;
7f561c08de6b Initial load duke parents: diff changeset	883
7f561c08de6b Initial load duke parents: diff changeset	884	case OP_CLOSE:
7f561c08de6b Initial load duke parents: diff changeset	885
7f561c08de6b Initial load duke parents: diff changeset	886	// Done matching subexpression
7f561c08de6b Initial load duke parents: diff changeset	887	if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
7f561c08de6b Initial load duke parents: diff changeset	888	{
7f561c08de6b Initial load duke parents: diff changeset	889	endBackref[opdata] = idx;
7f561c08de6b Initial load duke parents: diff changeset	890	}
7f561c08de6b Initial load duke parents: diff changeset	891	if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	892	{
7f561c08de6b Initial load duke parents: diff changeset	893	// Increase valid paren count
7f561c08de6b Initial load duke parents: diff changeset	894	if ((opdata + 1) > parenCount)
7f561c08de6b Initial load duke parents: diff changeset	895	{
7f561c08de6b Initial load duke parents: diff changeset	896	parenCount = opdata + 1;
7f561c08de6b Initial load duke parents: diff changeset	897	}
7f561c08de6b Initial load duke parents: diff changeset	898
7f561c08de6b Initial load duke parents: diff changeset	899	// Don't set paren if already set later on
7f561c08de6b Initial load duke parents: diff changeset	900	if (getParenEnd(opdata) == -1)
7f561c08de6b Initial load duke parents: diff changeset	901	{
7f561c08de6b Initial load duke parents: diff changeset	902	setParenEnd(opdata, idx);
7f561c08de6b Initial load duke parents: diff changeset	903	}
7f561c08de6b Initial load duke parents: diff changeset	904	}
7f561c08de6b Initial load duke parents: diff changeset	905	return idxNew;
7f561c08de6b Initial load duke parents: diff changeset	906
7f561c08de6b Initial load duke parents: diff changeset	907	case OP_OPEN_CLUSTER:
7f561c08de6b Initial load duke parents: diff changeset	908	case OP_CLOSE_CLUSTER:
7f561c08de6b Initial load duke parents: diff changeset	909	// starting or ending the matching of a subexpression which has no backref.
7f561c08de6b Initial load duke parents: diff changeset	910	return matchNodes( next, maxNode, idx );
7f561c08de6b Initial load duke parents: diff changeset	911
7f561c08de6b Initial load duke parents: diff changeset	912	case OP_BACKREF:
7f561c08de6b Initial load duke parents: diff changeset	913	{
7f561c08de6b Initial load duke parents: diff changeset	914	// Get the start and end of the backref
7f561c08de6b Initial load duke parents: diff changeset	915	int s = startBackref[opdata];
7f561c08de6b Initial load duke parents: diff changeset	916	int e = endBackref[opdata];
7f561c08de6b Initial load duke parents: diff changeset	917
7f561c08de6b Initial load duke parents: diff changeset	918	// We don't know the backref yet
7f561c08de6b Initial load duke parents: diff changeset	919	if (s == -1 \|\| e == -1)
7f561c08de6b Initial load duke parents: diff changeset	920	{
7f561c08de6b Initial load duke parents: diff changeset	921	return -1;
7f561c08de6b Initial load duke parents: diff changeset	922	}
7f561c08de6b Initial load duke parents: diff changeset	923
7f561c08de6b Initial load duke parents: diff changeset	924	// The backref is empty size
7f561c08de6b Initial load duke parents: diff changeset	925	if (s == e)
7f561c08de6b Initial load duke parents: diff changeset	926	{
7f561c08de6b Initial load duke parents: diff changeset	927	break;
7f561c08de6b Initial load duke parents: diff changeset	928	}
7f561c08de6b Initial load duke parents: diff changeset	929
7f561c08de6b Initial load duke parents: diff changeset	930	// Get the length of the backref
7f561c08de6b Initial load duke parents: diff changeset	931	int l = e - s;
7f561c08de6b Initial load duke parents: diff changeset	932
7f561c08de6b Initial load duke parents: diff changeset	933	// If there's not enough input left, give up.
7f561c08de6b Initial load duke parents: diff changeset	934	if (search.isEnd(idx + l - 1))
7f561c08de6b Initial load duke parents: diff changeset	935	{
7f561c08de6b Initial load duke parents: diff changeset	936	return -1;
7f561c08de6b Initial load duke parents: diff changeset	937	}
7f561c08de6b Initial load duke parents: diff changeset	938
7f561c08de6b Initial load duke parents: diff changeset	939	// Case fold the backref?
7f561c08de6b Initial load duke parents: diff changeset	940	final boolean caseFold =
7f561c08de6b Initial load duke parents: diff changeset	941	((matchFlags & MATCH_CASEINDEPENDENT) != 0);
7f561c08de6b Initial load duke parents: diff changeset	942	// Compare backref to input
7f561c08de6b Initial load duke parents: diff changeset	943	for (int i = 0; i < l; i++)
7f561c08de6b Initial load duke parents: diff changeset	944	{
7f561c08de6b Initial load duke parents: diff changeset	945	if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0)
7f561c08de6b Initial load duke parents: diff changeset	946	{
7f561c08de6b Initial load duke parents: diff changeset	947	return -1;
7f561c08de6b Initial load duke parents: diff changeset	948	}
7f561c08de6b Initial load duke parents: diff changeset	949	}
7f561c08de6b Initial load duke parents: diff changeset	950	}
7f561c08de6b Initial load duke parents: diff changeset	951	break;
7f561c08de6b Initial load duke parents: diff changeset	952
7f561c08de6b Initial load duke parents: diff changeset	953	case OP_BOL:
7f561c08de6b Initial load duke parents: diff changeset	954
7f561c08de6b Initial load duke parents: diff changeset	955	// Fail if we're not at the start of the string
7f561c08de6b Initial load duke parents: diff changeset	956	if (idx != 0)
7f561c08de6b Initial load duke parents: diff changeset	957	{
7f561c08de6b Initial load duke parents: diff changeset	958	// If we're multiline matching, we could still be at the start of a line
7f561c08de6b Initial load duke parents: diff changeset	959	if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
7f561c08de6b Initial load duke parents: diff changeset	960	{
7f561c08de6b Initial load duke parents: diff changeset	961	// If not at start of line, give up
7f561c08de6b Initial load duke parents: diff changeset	962	if (idx <= 0 \|\| !isNewline(idx - 1)) {
7f561c08de6b Initial load duke parents: diff changeset	963	return -1;
7f561c08de6b Initial load duke parents: diff changeset	964	} else {
7f561c08de6b Initial load duke parents: diff changeset	965	break;
7f561c08de6b Initial load duke parents: diff changeset	966	}
7f561c08de6b Initial load duke parents: diff changeset	967	}
7f561c08de6b Initial load duke parents: diff changeset	968	return -1;
7f561c08de6b Initial load duke parents: diff changeset	969	}
7f561c08de6b Initial load duke parents: diff changeset	970	break;
7f561c08de6b Initial load duke parents: diff changeset	971
7f561c08de6b Initial load duke parents: diff changeset	972	case OP_EOL:
7f561c08de6b Initial load duke parents: diff changeset	973
7f561c08de6b Initial load duke parents: diff changeset	974	// If we're not at the end of string
7f561c08de6b Initial load duke parents: diff changeset	975	if (!search.isEnd(0) && !search.isEnd(idx))
7f561c08de6b Initial load duke parents: diff changeset	976	{
7f561c08de6b Initial load duke parents: diff changeset	977	// If we're multi-line matching
7f561c08de6b Initial load duke parents: diff changeset	978	if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
7f561c08de6b Initial load duke parents: diff changeset	979	{
7f561c08de6b Initial load duke parents: diff changeset	980	// Give up if we're not at the end of a line
7f561c08de6b Initial load duke parents: diff changeset	981	if (!isNewline(idx)) {
7f561c08de6b Initial load duke parents: diff changeset	982	return -1;
7f561c08de6b Initial load duke parents: diff changeset	983	} else {
7f561c08de6b Initial load duke parents: diff changeset	984	break;
7f561c08de6b Initial load duke parents: diff changeset	985	}
7f561c08de6b Initial load duke parents: diff changeset	986	}
7f561c08de6b Initial load duke parents: diff changeset	987	return -1;
7f561c08de6b Initial load duke parents: diff changeset	988	}
7f561c08de6b Initial load duke parents: diff changeset	989	break;
7f561c08de6b Initial load duke parents: diff changeset	990
7f561c08de6b Initial load duke parents: diff changeset	991	case OP_ESCAPE:
7f561c08de6b Initial load duke parents: diff changeset	992
7f561c08de6b Initial load duke parents: diff changeset	993	// Which escape?
7f561c08de6b Initial load duke parents: diff changeset	994	switch (opdata)
7f561c08de6b Initial load duke parents: diff changeset	995	{
7f561c08de6b Initial load duke parents: diff changeset	996	// Word boundary match
7f561c08de6b Initial load duke parents: diff changeset	997	case E_NBOUND:
7f561c08de6b Initial load duke parents: diff changeset	998	case E_BOUND:
7f561c08de6b Initial load duke parents: diff changeset	999	{
7f561c08de6b Initial load duke parents: diff changeset	1000	char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1));
7f561c08de6b Initial load duke parents: diff changeset	1001	char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
7f561c08de6b Initial load duke parents: diff changeset	1002	if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND))
7f561c08de6b Initial load duke parents: diff changeset	1003	{
7f561c08de6b Initial load duke parents: diff changeset	1004	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1005	}
7f561c08de6b Initial load duke parents: diff changeset	1006	}
7f561c08de6b Initial load duke parents: diff changeset	1007	break;
7f561c08de6b Initial load duke parents: diff changeset	1008
7f561c08de6b Initial load duke parents: diff changeset	1009	// Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit
7f561c08de6b Initial load duke parents: diff changeset	1010	case E_ALNUM:
7f561c08de6b Initial load duke parents: diff changeset	1011	case E_NALNUM:
7f561c08de6b Initial load duke parents: diff changeset	1012	case E_DIGIT:
7f561c08de6b Initial load duke parents: diff changeset	1013	case E_NDIGIT:
7f561c08de6b Initial load duke parents: diff changeset	1014	case E_SPACE:
7f561c08de6b Initial load duke parents: diff changeset	1015	case E_NSPACE:
7f561c08de6b Initial load duke parents: diff changeset	1016
7f561c08de6b Initial load duke parents: diff changeset	1017	// Give up if out of input
7f561c08de6b Initial load duke parents: diff changeset	1018	if (search.isEnd(idx))
7f561c08de6b Initial load duke parents: diff changeset	1019	{
7f561c08de6b Initial load duke parents: diff changeset	1020	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1021	}
7f561c08de6b Initial load duke parents: diff changeset	1022
7f561c08de6b Initial load duke parents: diff changeset	1023	char c = search.charAt(idx);
7f561c08de6b Initial load duke parents: diff changeset	1024
7f561c08de6b Initial load duke parents: diff changeset	1025	// Switch on escape
7f561c08de6b Initial load duke parents: diff changeset	1026	switch (opdata)
7f561c08de6b Initial load duke parents: diff changeset	1027	{
7f561c08de6b Initial load duke parents: diff changeset	1028	case E_ALNUM:
7f561c08de6b Initial load duke parents: diff changeset	1029	case E_NALNUM:
7f561c08de6b Initial load duke parents: diff changeset	1030	if (!((Character.isLetterOrDigit(c) \|\| c == '_') == (opdata == E_ALNUM)))
7f561c08de6b Initial load duke parents: diff changeset	1031	{
7f561c08de6b Initial load duke parents: diff changeset	1032	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1033	}
7f561c08de6b Initial load duke parents: diff changeset	1034	break;
7f561c08de6b Initial load duke parents: diff changeset	1035
7f561c08de6b Initial load duke parents: diff changeset	1036	case E_DIGIT:
7f561c08de6b Initial load duke parents: diff changeset	1037	case E_NDIGIT:
7f561c08de6b Initial load duke parents: diff changeset	1038	if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
7f561c08de6b Initial load duke parents: diff changeset	1039	{
7f561c08de6b Initial load duke parents: diff changeset	1040	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1041	}
7f561c08de6b Initial load duke parents: diff changeset	1042	break;
7f561c08de6b Initial load duke parents: diff changeset	1043
7f561c08de6b Initial load duke parents: diff changeset	1044	case E_SPACE:
7f561c08de6b Initial load duke parents: diff changeset	1045	case E_NSPACE:
7f561c08de6b Initial load duke parents: diff changeset	1046	if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
7f561c08de6b Initial load duke parents: diff changeset	1047	{
7f561c08de6b Initial load duke parents: diff changeset	1048	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1049	}
7f561c08de6b Initial load duke parents: diff changeset	1050	break;
7f561c08de6b Initial load duke parents: diff changeset	1051	}
7f561c08de6b Initial load duke parents: diff changeset	1052	idx++;
7f561c08de6b Initial load duke parents: diff changeset	1053	break;
7f561c08de6b Initial load duke parents: diff changeset	1054
7f561c08de6b Initial load duke parents: diff changeset	1055	default:
7f561c08de6b Initial load duke parents: diff changeset	1056	internalError("Unrecognized escape '" + opdata + "'");
7f561c08de6b Initial load duke parents: diff changeset	1057	}
7f561c08de6b Initial load duke parents: diff changeset	1058	break;
7f561c08de6b Initial load duke parents: diff changeset	1059
7f561c08de6b Initial load duke parents: diff changeset	1060	case OP_ANY:
7f561c08de6b Initial load duke parents: diff changeset	1061
7f561c08de6b Initial load duke parents: diff changeset	1062	if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
7f561c08de6b Initial load duke parents: diff changeset	1063	// Match anything
7f561c08de6b Initial load duke parents: diff changeset	1064	if (search.isEnd(idx))
7f561c08de6b Initial load duke parents: diff changeset	1065	{
7f561c08de6b Initial load duke parents: diff changeset	1066	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1067	}
7f561c08de6b Initial load duke parents: diff changeset	1068	}
7f561c08de6b Initial load duke parents: diff changeset	1069	else
7f561c08de6b Initial load duke parents: diff changeset	1070	{
7f561c08de6b Initial load duke parents: diff changeset	1071	// Match anything but a newline
7f561c08de6b Initial load duke parents: diff changeset	1072	if (search.isEnd(idx) \|\| isNewline(idx))
7f561c08de6b Initial load duke parents: diff changeset	1073	{
7f561c08de6b Initial load duke parents: diff changeset	1074	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1075	}
7f561c08de6b Initial load duke parents: diff changeset	1076	}
7f561c08de6b Initial load duke parents: diff changeset	1077	idx++;
7f561c08de6b Initial load duke parents: diff changeset	1078	break;
7f561c08de6b Initial load duke parents: diff changeset	1079
7f561c08de6b Initial load duke parents: diff changeset	1080	case OP_ATOM:
7f561c08de6b Initial load duke parents: diff changeset	1081	{
7f561c08de6b Initial load duke parents: diff changeset	1082	// Match an atom value
7f561c08de6b Initial load duke parents: diff changeset	1083	if (search.isEnd(idx))
7f561c08de6b Initial load duke parents: diff changeset	1084	{
7f561c08de6b Initial load duke parents: diff changeset	1085	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1086	}
7f561c08de6b Initial load duke parents: diff changeset	1087
7f561c08de6b Initial load duke parents: diff changeset	1088	// Get length of atom and starting index
7f561c08de6b Initial load duke parents: diff changeset	1089	int lenAtom = opdata;
7f561c08de6b Initial load duke parents: diff changeset	1090	int startAtom = node + nodeSize;
7f561c08de6b Initial load duke parents: diff changeset	1091
7f561c08de6b Initial load duke parents: diff changeset	1092	// Give up if not enough input remains to have a match
7f561c08de6b Initial load duke parents: diff changeset	1093	if (search.isEnd(lenAtom + idx - 1))
7f561c08de6b Initial load duke parents: diff changeset	1094	{
7f561c08de6b Initial load duke parents: diff changeset	1095	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1096	}
7f561c08de6b Initial load duke parents: diff changeset	1097
7f561c08de6b Initial load duke parents: diff changeset	1098	// Match atom differently depending on casefolding flag
7f561c08de6b Initial load duke parents: diff changeset	1099	final boolean caseFold =
7f561c08de6b Initial load duke parents: diff changeset	1100	((matchFlags & MATCH_CASEINDEPENDENT) != 0);
7f561c08de6b Initial load duke parents: diff changeset	1101
7f561c08de6b Initial load duke parents: diff changeset	1102	for (int i = 0; i < lenAtom; i++)
7f561c08de6b Initial load duke parents: diff changeset	1103	{
7f561c08de6b Initial load duke parents: diff changeset	1104	if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0)
7f561c08de6b Initial load duke parents: diff changeset	1105	{
7f561c08de6b Initial load duke parents: diff changeset	1106	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1107	}
7f561c08de6b Initial load duke parents: diff changeset	1108	}
7f561c08de6b Initial load duke parents: diff changeset	1109	}
7f561c08de6b Initial load duke parents: diff changeset	1110	break;
7f561c08de6b Initial load duke parents: diff changeset	1111
7f561c08de6b Initial load duke parents: diff changeset	1112	case OP_POSIXCLASS:
7f561c08de6b Initial load duke parents: diff changeset	1113	{
7f561c08de6b Initial load duke parents: diff changeset	1114	// Out of input?
7f561c08de6b Initial load duke parents: diff changeset	1115	if (search.isEnd(idx))
7f561c08de6b Initial load duke parents: diff changeset	1116	{
7f561c08de6b Initial load duke parents: diff changeset	1117	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1118	}
7f561c08de6b Initial load duke parents: diff changeset	1119
7f561c08de6b Initial load duke parents: diff changeset	1120	switch (opdata)
7f561c08de6b Initial load duke parents: diff changeset	1121	{
7f561c08de6b Initial load duke parents: diff changeset	1122	case POSIX_CLASS_ALNUM:
7f561c08de6b Initial load duke parents: diff changeset	1123	if (!Character.isLetterOrDigit(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1124	{
7f561c08de6b Initial load duke parents: diff changeset	1125	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1126	}
7f561c08de6b Initial load duke parents: diff changeset	1127	break;
7f561c08de6b Initial load duke parents: diff changeset	1128
7f561c08de6b Initial load duke parents: diff changeset	1129	case POSIX_CLASS_ALPHA:
7f561c08de6b Initial load duke parents: diff changeset	1130	if (!Character.isLetter(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1131	{
7f561c08de6b Initial load duke parents: diff changeset	1132	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1133	}
7f561c08de6b Initial load duke parents: diff changeset	1134	break;
7f561c08de6b Initial load duke parents: diff changeset	1135
7f561c08de6b Initial load duke parents: diff changeset	1136	case POSIX_CLASS_DIGIT:
7f561c08de6b Initial load duke parents: diff changeset	1137	if (!Character.isDigit(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1138	{
7f561c08de6b Initial load duke parents: diff changeset	1139	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1140	}
7f561c08de6b Initial load duke parents: diff changeset	1141	break;
7f561c08de6b Initial load duke parents: diff changeset	1142
7f561c08de6b Initial load duke parents: diff changeset	1143	case POSIX_CLASS_BLANK: // JWL - bugbug: is this right??
7f561c08de6b Initial load duke parents: diff changeset	1144	if (!Character.isSpaceChar(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1145	{
7f561c08de6b Initial load duke parents: diff changeset	1146	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1147	}
7f561c08de6b Initial load duke parents: diff changeset	1148	break;
7f561c08de6b Initial load duke parents: diff changeset	1149
7f561c08de6b Initial load duke parents: diff changeset	1150	case POSIX_CLASS_SPACE:
7f561c08de6b Initial load duke parents: diff changeset	1151	if (!Character.isWhitespace(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1152	{
7f561c08de6b Initial load duke parents: diff changeset	1153	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1154	}
7f561c08de6b Initial load duke parents: diff changeset	1155	break;
7f561c08de6b Initial load duke parents: diff changeset	1156
7f561c08de6b Initial load duke parents: diff changeset	1157	case POSIX_CLASS_CNTRL:
7f561c08de6b Initial load duke parents: diff changeset	1158	if (Character.getType(search.charAt(idx)) != Character.CONTROL)
7f561c08de6b Initial load duke parents: diff changeset	1159	{
7f561c08de6b Initial load duke parents: diff changeset	1160	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1161	}
7f561c08de6b Initial load duke parents: diff changeset	1162	break;
7f561c08de6b Initial load duke parents: diff changeset	1163
7f561c08de6b Initial load duke parents: diff changeset	1164	case POSIX_CLASS_GRAPH: // JWL - bugbug???
7f561c08de6b Initial load duke parents: diff changeset	1165	switch (Character.getType(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1166	{
7f561c08de6b Initial load duke parents: diff changeset	1167	case Character.MATH_SYMBOL:
7f561c08de6b Initial load duke parents: diff changeset	1168	case Character.CURRENCY_SYMBOL:
7f561c08de6b Initial load duke parents: diff changeset	1169	case Character.MODIFIER_SYMBOL:
7f561c08de6b Initial load duke parents: diff changeset	1170	case Character.OTHER_SYMBOL:
7f561c08de6b Initial load duke parents: diff changeset	1171	break;
7f561c08de6b Initial load duke parents: diff changeset	1172
7f561c08de6b Initial load duke parents: diff changeset	1173	default:
7f561c08de6b Initial load duke parents: diff changeset	1174	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1175	}
7f561c08de6b Initial load duke parents: diff changeset	1176	break;
7f561c08de6b Initial load duke parents: diff changeset	1177
7f561c08de6b Initial load duke parents: diff changeset	1178	case POSIX_CLASS_LOWER:
7f561c08de6b Initial load duke parents: diff changeset	1179	if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER)
7f561c08de6b Initial load duke parents: diff changeset	1180	{
7f561c08de6b Initial load duke parents: diff changeset	1181	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1182	}
7f561c08de6b Initial load duke parents: diff changeset	1183	break;
7f561c08de6b Initial load duke parents: diff changeset	1184
7f561c08de6b Initial load duke parents: diff changeset	1185	case POSIX_CLASS_UPPER:
7f561c08de6b Initial load duke parents: diff changeset	1186	if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER)
7f561c08de6b Initial load duke parents: diff changeset	1187	{
7f561c08de6b Initial load duke parents: diff changeset	1188	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1189	}
7f561c08de6b Initial load duke parents: diff changeset	1190	break;
7f561c08de6b Initial load duke parents: diff changeset	1191
7f561c08de6b Initial load duke parents: diff changeset	1192	case POSIX_CLASS_PRINT:
7f561c08de6b Initial load duke parents: diff changeset	1193	if (Character.getType(search.charAt(idx)) == Character.CONTROL)
7f561c08de6b Initial load duke parents: diff changeset	1194	{
7f561c08de6b Initial load duke parents: diff changeset	1195	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1196	}
7f561c08de6b Initial load duke parents: diff changeset	1197	break;
7f561c08de6b Initial load duke parents: diff changeset	1198
7f561c08de6b Initial load duke parents: diff changeset	1199	case POSIX_CLASS_PUNCT:
7f561c08de6b Initial load duke parents: diff changeset	1200	{
7f561c08de6b Initial load duke parents: diff changeset	1201	int type = Character.getType(search.charAt(idx));
7f561c08de6b Initial load duke parents: diff changeset	1202	switch(type)
7f561c08de6b Initial load duke parents: diff changeset	1203	{
7f561c08de6b Initial load duke parents: diff changeset	1204	case Character.DASH_PUNCTUATION:
7f561c08de6b Initial load duke parents: diff changeset	1205	case Character.START_PUNCTUATION:
7f561c08de6b Initial load duke parents: diff changeset	1206	case Character.END_PUNCTUATION:
7f561c08de6b Initial load duke parents: diff changeset	1207	case Character.CONNECTOR_PUNCTUATION:
7f561c08de6b Initial load duke parents: diff changeset	1208	case Character.OTHER_PUNCTUATION:
7f561c08de6b Initial load duke parents: diff changeset	1209	break;
7f561c08de6b Initial load duke parents: diff changeset	1210
7f561c08de6b Initial load duke parents: diff changeset	1211	default:
7f561c08de6b Initial load duke parents: diff changeset	1212	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1213	}
7f561c08de6b Initial load duke parents: diff changeset	1214	}
7f561c08de6b Initial load duke parents: diff changeset	1215	break;
7f561c08de6b Initial load duke parents: diff changeset	1216
7f561c08de6b Initial load duke parents: diff changeset	1217	case POSIX_CLASS_XDIGIT: // JWL - bugbug??
7f561c08de6b Initial load duke parents: diff changeset	1218	{
7f561c08de6b Initial load duke parents: diff changeset	1219	boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') \|\|
7f561c08de6b Initial load duke parents: diff changeset	1220	(search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') \|\|
7f561c08de6b Initial load duke parents: diff changeset	1221	(search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F'));
7f561c08de6b Initial load duke parents: diff changeset	1222	if (!isXDigit)
7f561c08de6b Initial load duke parents: diff changeset	1223	{
7f561c08de6b Initial load duke parents: diff changeset	1224	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1225	}
7f561c08de6b Initial load duke parents: diff changeset	1226	}
7f561c08de6b Initial load duke parents: diff changeset	1227	break;
7f561c08de6b Initial load duke parents: diff changeset	1228
7f561c08de6b Initial load duke parents: diff changeset	1229	case POSIX_CLASS_JSTART:
7f561c08de6b Initial load duke parents: diff changeset	1230	if (!Character.isJavaIdentifierStart(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1231	{
7f561c08de6b Initial load duke parents: diff changeset	1232	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1233	}
7f561c08de6b Initial load duke parents: diff changeset	1234	break;
7f561c08de6b Initial load duke parents: diff changeset	1235
7f561c08de6b Initial load duke parents: diff changeset	1236	case POSIX_CLASS_JPART:
7f561c08de6b Initial load duke parents: diff changeset	1237	if (!Character.isJavaIdentifierPart(search.charAt(idx)))
7f561c08de6b Initial load duke parents: diff changeset	1238	{
7f561c08de6b Initial load duke parents: diff changeset	1239	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1240	}
7f561c08de6b Initial load duke parents: diff changeset	1241	break;
7f561c08de6b Initial load duke parents: diff changeset	1242
7f561c08de6b Initial load duke parents: diff changeset	1243	default:
7f561c08de6b Initial load duke parents: diff changeset	1244	internalError("Bad posix class");
7f561c08de6b Initial load duke parents: diff changeset	1245	break;
7f561c08de6b Initial load duke parents: diff changeset	1246	}
7f561c08de6b Initial load duke parents: diff changeset	1247
7f561c08de6b Initial load duke parents: diff changeset	1248	// Matched.
7f561c08de6b Initial load duke parents: diff changeset	1249	idx++;
7f561c08de6b Initial load duke parents: diff changeset	1250	}
7f561c08de6b Initial load duke parents: diff changeset	1251	break;
7f561c08de6b Initial load duke parents: diff changeset	1252
7f561c08de6b Initial load duke parents: diff changeset	1253	case OP_ANYOF:
7f561c08de6b Initial load duke parents: diff changeset	1254	{
7f561c08de6b Initial load duke parents: diff changeset	1255	// Out of input?
7f561c08de6b Initial load duke parents: diff changeset	1256	if (search.isEnd(idx))
7f561c08de6b Initial load duke parents: diff changeset	1257	{
7f561c08de6b Initial load duke parents: diff changeset	1258	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1259	}
7f561c08de6b Initial load duke parents: diff changeset	1260
7f561c08de6b Initial load duke parents: diff changeset	1261	// Get character to match against character class and maybe casefold
7f561c08de6b Initial load duke parents: diff changeset	1262	char c = search.charAt(idx);
7f561c08de6b Initial load duke parents: diff changeset	1263	boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
7f561c08de6b Initial load duke parents: diff changeset	1264	// Loop through character class checking our match character
7f561c08de6b Initial load duke parents: diff changeset	1265	int idxRange = node + nodeSize;
7f561c08de6b Initial load duke parents: diff changeset	1266	int idxEnd = idxRange + (opdata * 2);
7f561c08de6b Initial load duke parents: diff changeset	1267	boolean match = false;
7f561c08de6b Initial load duke parents: diff changeset	1268	for (int i = idxRange; !match && i < idxEnd; )
7f561c08de6b Initial load duke parents: diff changeset	1269	{
7f561c08de6b Initial load duke parents: diff changeset	1270	// Get start, end and match characters
7f561c08de6b Initial load duke parents: diff changeset	1271	char s = instruction[i++];
7f561c08de6b Initial load duke parents: diff changeset	1272	char e = instruction[i++];
7f561c08de6b Initial load duke parents: diff changeset	1273
7f561c08de6b Initial load duke parents: diff changeset	1274	match = ((compareChars(c, s, caseFold) >= 0)
7f561c08de6b Initial load duke parents: diff changeset	1275	&& (compareChars(c, e, caseFold) <= 0));
7f561c08de6b Initial load duke parents: diff changeset	1276	}
7f561c08de6b Initial load duke parents: diff changeset	1277
7f561c08de6b Initial load duke parents: diff changeset	1278	// Fail if we didn't match the character class
7f561c08de6b Initial load duke parents: diff changeset	1279	if (!match)
7f561c08de6b Initial load duke parents: diff changeset	1280	{
7f561c08de6b Initial load duke parents: diff changeset	1281	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1282	}
7f561c08de6b Initial load duke parents: diff changeset	1283	idx++;
7f561c08de6b Initial load duke parents: diff changeset	1284	}
7f561c08de6b Initial load duke parents: diff changeset	1285	break;
7f561c08de6b Initial load duke parents: diff changeset	1286
7f561c08de6b Initial load duke parents: diff changeset	1287	case OP_BRANCH:
7f561c08de6b Initial load duke parents: diff changeset	1288	{
7f561c08de6b Initial load duke parents: diff changeset	1289	// Check for choices
7f561c08de6b Initial load duke parents: diff changeset	1290	if (instruction[next + offsetOpcode] != OP_BRANCH)
7f561c08de6b Initial load duke parents: diff changeset	1291	{
7f561c08de6b Initial load duke parents: diff changeset	1292	// If there aren't any other choices, just evaluate this branch.
7f561c08de6b Initial load duke parents: diff changeset	1293	node += nodeSize;
7f561c08de6b Initial load duke parents: diff changeset	1294	continue;
7f561c08de6b Initial load duke parents: diff changeset	1295	}
7f561c08de6b Initial load duke parents: diff changeset	1296
7f561c08de6b Initial load duke parents: diff changeset	1297	// Try all available branches
7f561c08de6b Initial load duke parents: diff changeset	1298	short nextBranch;
7f561c08de6b Initial load duke parents: diff changeset	1299	do
7f561c08de6b Initial load duke parents: diff changeset	1300	{
7f561c08de6b Initial load duke parents: diff changeset	1301	// Try matching the branch against the string
7f561c08de6b Initial load duke parents: diff changeset	1302	if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	1303	{
7f561c08de6b Initial load duke parents: diff changeset	1304	return idxNew;
7f561c08de6b Initial load duke parents: diff changeset	1305	}
7f561c08de6b Initial load duke parents: diff changeset	1306
7f561c08de6b Initial load duke parents: diff changeset	1307	// Go to next branch (if any)
7f561c08de6b Initial load duke parents: diff changeset	1308	nextBranch = (short)instruction[node + offsetNext];
7f561c08de6b Initial load duke parents: diff changeset	1309	node += nextBranch;
7f561c08de6b Initial load duke parents: diff changeset	1310	}
7f561c08de6b Initial load duke parents: diff changeset	1311	while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH));
7f561c08de6b Initial load duke parents: diff changeset	1312
7f561c08de6b Initial load duke parents: diff changeset	1313	// Failed to match any branch!
7f561c08de6b Initial load duke parents: diff changeset	1314	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1315	}
7f561c08de6b Initial load duke parents: diff changeset	1316
7f561c08de6b Initial load duke parents: diff changeset	1317	case OP_NOTHING:
7f561c08de6b Initial load duke parents: diff changeset	1318	case OP_GOTO:
7f561c08de6b Initial load duke parents: diff changeset	1319
7f561c08de6b Initial load duke parents: diff changeset	1320	// Just advance to the next node without doing anything
7f561c08de6b Initial load duke parents: diff changeset	1321	break;
7f561c08de6b Initial load duke parents: diff changeset	1322
7f561c08de6b Initial load duke parents: diff changeset	1323	case OP_END:
7f561c08de6b Initial load duke parents: diff changeset	1324
7f561c08de6b Initial load duke parents: diff changeset	1325	// Match has succeeded!
7f561c08de6b Initial load duke parents: diff changeset	1326	setParenEnd(0, idx);
7f561c08de6b Initial load duke parents: diff changeset	1327	return idx;
7f561c08de6b Initial load duke parents: diff changeset	1328
7f561c08de6b Initial load duke parents: diff changeset	1329	default:
7f561c08de6b Initial load duke parents: diff changeset	1330
7f561c08de6b Initial load duke parents: diff changeset	1331	// Corrupt program
7f561c08de6b Initial load duke parents: diff changeset	1332	internalError("Invalid opcode '" + opcode + "'");
7f561c08de6b Initial load duke parents: diff changeset	1333	}
7f561c08de6b Initial load duke parents: diff changeset	1334
7f561c08de6b Initial load duke parents: diff changeset	1335	// Advance to the next node in the program
7f561c08de6b Initial load duke parents: diff changeset	1336	node = next;
7f561c08de6b Initial load duke parents: diff changeset	1337	}
7f561c08de6b Initial load duke parents: diff changeset	1338
7f561c08de6b Initial load duke parents: diff changeset	1339	// We "should" never end up here
7f561c08de6b Initial load duke parents: diff changeset	1340	internalError("Corrupt program");
7f561c08de6b Initial load duke parents: diff changeset	1341	return -1;
7f561c08de6b Initial load duke parents: diff changeset	1342	}
7f561c08de6b Initial load duke parents: diff changeset	1343
7f561c08de6b Initial load duke parents: diff changeset	1344	/**
7f561c08de6b Initial load duke parents: diff changeset	1345	* Match the current regular expression program against the current
7f561c08de6b Initial load duke parents: diff changeset	1346	* input string, starting at index i of the input string. This method
7f561c08de6b Initial load duke parents: diff changeset	1347	* is only meant for internal use.
7f561c08de6b Initial load duke parents: diff changeset	1348	*
7f561c08de6b Initial load duke parents: diff changeset	1349	* @param i The input string index to start matching at
7f561c08de6b Initial load duke parents: diff changeset	1350	* @return True if the input matched the expression
7f561c08de6b Initial load duke parents: diff changeset	1351	*/
7f561c08de6b Initial load duke parents: diff changeset	1352	protected boolean matchAt(int i)
7f561c08de6b Initial load duke parents: diff changeset	1353	{
7f561c08de6b Initial load duke parents: diff changeset	1354	// Initialize start pointer, paren cache and paren count
7f561c08de6b Initial load duke parents: diff changeset	1355	start0 = -1;
7f561c08de6b Initial load duke parents: diff changeset	1356	end0 = -1;
7f561c08de6b Initial load duke parents: diff changeset	1357	start1 = -1;
7f561c08de6b Initial load duke parents: diff changeset	1358	end1 = -1;
7f561c08de6b Initial load duke parents: diff changeset	1359	start2 = -1;
7f561c08de6b Initial load duke parents: diff changeset	1360	end2 = -1;
7f561c08de6b Initial load duke parents: diff changeset	1361	startn = null;
7f561c08de6b Initial load duke parents: diff changeset	1362	endn = null;
7f561c08de6b Initial load duke parents: diff changeset	1363	parenCount = 1;
7f561c08de6b Initial load duke parents: diff changeset	1364	setParenStart(0, i);
7f561c08de6b Initial load duke parents: diff changeset	1365
7f561c08de6b Initial load duke parents: diff changeset	1366	// Allocate backref arrays (unless optimizations indicate otherwise)
7f561c08de6b Initial load duke parents: diff changeset	1367	if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
7f561c08de6b Initial load duke parents: diff changeset	1368	{
7f561c08de6b Initial load duke parents: diff changeset	1369	startBackref = new int[maxParen];
7f561c08de6b Initial load duke parents: diff changeset	1370	endBackref = new int[maxParen];
7f561c08de6b Initial load duke parents: diff changeset	1371	}
7f561c08de6b Initial load duke parents: diff changeset	1372
7f561c08de6b Initial load duke parents: diff changeset	1373	// Match against string
7f561c08de6b Initial load duke parents: diff changeset	1374	int idx;
7f561c08de6b Initial load duke parents: diff changeset	1375	if ((idx = matchNodes(0, maxNode, i)) != -1)
7f561c08de6b Initial load duke parents: diff changeset	1376	{
7f561c08de6b Initial load duke parents: diff changeset	1377	setParenEnd(0, idx);
7f561c08de6b Initial load duke parents: diff changeset	1378	return true;
7f561c08de6b Initial load duke parents: diff changeset	1379	}
7f561c08de6b Initial load duke parents: diff changeset	1380
7f561c08de6b Initial load duke parents: diff changeset	1381	// Didn't match
7f561c08de6b Initial load duke parents: diff changeset	1382	parenCount = 0;
7f561c08de6b Initial load duke parents: diff changeset	1383	return false;
7f561c08de6b Initial load duke parents: diff changeset	1384	}
7f561c08de6b Initial load duke parents: diff changeset	1385
7f561c08de6b Initial load duke parents: diff changeset	1386	/**
7f561c08de6b Initial load duke parents: diff changeset	1387	* Matches the current regular expression program against a character array,
7f561c08de6b Initial load duke parents: diff changeset	1388	* starting at a given index.
7f561c08de6b Initial load duke parents: diff changeset	1389	*
7f561c08de6b Initial load duke parents: diff changeset	1390	* @param search String to match against
7f561c08de6b Initial load duke parents: diff changeset	1391	* @param i Index to start searching at
7f561c08de6b Initial load duke parents: diff changeset	1392	* @return True if string matched
7f561c08de6b Initial load duke parents: diff changeset	1393	*/
7f561c08de6b Initial load duke parents: diff changeset	1394	public boolean match(String search, int i)
7f561c08de6b Initial load duke parents: diff changeset	1395	{
7f561c08de6b Initial load duke parents: diff changeset	1396	return match(new StringCharacterIterator(search), i);
7f561c08de6b Initial load duke parents: diff changeset	1397	}
7f561c08de6b Initial load duke parents: diff changeset	1398
7f561c08de6b Initial load duke parents: diff changeset	1399	/**
7f561c08de6b Initial load duke parents: diff changeset	1400	* Matches the current regular expression program against a character array,
7f561c08de6b Initial load duke parents: diff changeset	1401	* starting at a given index.
7f561c08de6b Initial load duke parents: diff changeset	1402	*
7f561c08de6b Initial load duke parents: diff changeset	1403	* @param search String to match against
7f561c08de6b Initial load duke parents: diff changeset	1404	* @param i Index to start searching at
7f561c08de6b Initial load duke parents: diff changeset	1405	* @return True if string matched
7f561c08de6b Initial load duke parents: diff changeset	1406	*/
7f561c08de6b Initial load duke parents: diff changeset	1407	public boolean match(CharacterIterator search, int i)
7f561c08de6b Initial load duke parents: diff changeset	1408	{
7f561c08de6b Initial load duke parents: diff changeset	1409	// There is no compiled program to search with!
7f561c08de6b Initial load duke parents: diff changeset	1410	if (program == null)
7f561c08de6b Initial load duke parents: diff changeset	1411	{
7f561c08de6b Initial load duke parents: diff changeset	1412	// This should be uncommon enough to be an error case rather
7f561c08de6b Initial load duke parents: diff changeset	1413	// than an exception (which would have to be handled everywhere)
7f561c08de6b Initial load duke parents: diff changeset	1414	internalError("No RE program to run!");
7f561c08de6b Initial load duke parents: diff changeset	1415	}
7f561c08de6b Initial load duke parents: diff changeset	1416
7f561c08de6b Initial load duke parents: diff changeset	1417	// Save string to search
7f561c08de6b Initial load duke parents: diff changeset	1418	this.search = search;
7f561c08de6b Initial load duke parents: diff changeset	1419
7f561c08de6b Initial load duke parents: diff changeset	1420	// Can we optimize the search by looking for a prefix string?
7f561c08de6b Initial load duke parents: diff changeset	1421	if (program.prefix == null)
7f561c08de6b Initial load duke parents: diff changeset	1422	{
7f561c08de6b Initial load duke parents: diff changeset	1423	// Unprefixed matching must try for a match at each character
7f561c08de6b Initial load duke parents: diff changeset	1424	for ( ;! search.isEnd(i - 1); i++)
7f561c08de6b Initial load duke parents: diff changeset	1425	{
7f561c08de6b Initial load duke parents: diff changeset	1426	// Try a match at index i
7f561c08de6b Initial load duke parents: diff changeset	1427	if (matchAt(i))
7f561c08de6b Initial load duke parents: diff changeset	1428	{
7f561c08de6b Initial load duke parents: diff changeset	1429	return true;
7f561c08de6b Initial load duke parents: diff changeset	1430	}
7f561c08de6b Initial load duke parents: diff changeset	1431	}
7f561c08de6b Initial load duke parents: diff changeset	1432	return false;
7f561c08de6b Initial load duke parents: diff changeset	1433	}
7f561c08de6b Initial load duke parents: diff changeset	1434	else
7f561c08de6b Initial load duke parents: diff changeset	1435	{
7f561c08de6b Initial load duke parents: diff changeset	1436	// Prefix-anchored matching is possible
7f561c08de6b Initial load duke parents: diff changeset	1437	boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
7f561c08de6b Initial load duke parents: diff changeset	1438	char[] prefix = program.prefix;
7f561c08de6b Initial load duke parents: diff changeset	1439	for ( ; !search.isEnd(i + prefix.length - 1); i++)
7f561c08de6b Initial load duke parents: diff changeset	1440	{
7f561c08de6b Initial load duke parents: diff changeset	1441	int j = i;
7f561c08de6b Initial load duke parents: diff changeset	1442	int k = 0;
7f561c08de6b Initial load duke parents: diff changeset	1443
7f561c08de6b Initial load duke parents: diff changeset	1444	boolean match;
7f561c08de6b Initial load duke parents: diff changeset	1445	do {
7f561c08de6b Initial load duke parents: diff changeset	1446	// If there's a mismatch of any character in the prefix, give up
7f561c08de6b Initial load duke parents: diff changeset	1447	match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0);
7f561c08de6b Initial load duke parents: diff changeset	1448	} while (match && k < prefix.length);
7f561c08de6b Initial load duke parents: diff changeset	1449
7f561c08de6b Initial load duke parents: diff changeset	1450	// See if the whole prefix string matched
7f561c08de6b Initial load duke parents: diff changeset	1451	if (k == prefix.length)
7f561c08de6b Initial load duke parents: diff changeset	1452	{
7f561c08de6b Initial load duke parents: diff changeset	1453	// We matched the full prefix at firstChar, so try it
7f561c08de6b Initial load duke parents: diff changeset	1454	if (matchAt(i))
7f561c08de6b Initial load duke parents: diff changeset	1455	{
7f561c08de6b Initial load duke parents: diff changeset	1456	return true;
7f561c08de6b Initial load duke parents: diff changeset	1457	}
7f561c08de6b Initial load duke parents: diff changeset	1458	}
7f561c08de6b Initial load duke parents: diff changeset	1459	}
7f561c08de6b Initial load duke parents: diff changeset	1460	return false;
7f561c08de6b Initial load duke parents: diff changeset	1461	}
7f561c08de6b Initial load duke parents: diff changeset	1462	}
7f561c08de6b Initial load duke parents: diff changeset	1463
7f561c08de6b Initial load duke parents: diff changeset	1464	/**
7f561c08de6b Initial load duke parents: diff changeset	1465	* Matches the current regular expression program against a String.
7f561c08de6b Initial load duke parents: diff changeset	1466	*
7f561c08de6b Initial load duke parents: diff changeset	1467	* @param search String to match against
7f561c08de6b Initial load duke parents: diff changeset	1468	* @return True if string matched
7f561c08de6b Initial load duke parents: diff changeset	1469	*/
7f561c08de6b Initial load duke parents: diff changeset	1470	public boolean match(String search)
7f561c08de6b Initial load duke parents: diff changeset	1471	{
7f561c08de6b Initial load duke parents: diff changeset	1472	return match(search, 0);
7f561c08de6b Initial load duke parents: diff changeset	1473	}
7f561c08de6b Initial load duke parents: diff changeset	1474
7f561c08de6b Initial load duke parents: diff changeset	1475	/**
7f561c08de6b Initial load duke parents: diff changeset	1476	* Splits a string into an array of strings on regular expression boundaries.
7f561c08de6b Initial load duke parents: diff changeset	1477	* This function works the same way as the Perl function of the same name.
7f561c08de6b Initial load duke parents: diff changeset	1478	* Given a regular expression of "[ab]+" and a string to split of
7f561c08de6b Initial load duke parents: diff changeset	1479	* "xyzzyababbayyzabbbab123", the result would be the array of Strings
7f561c08de6b Initial load duke parents: diff changeset	1480	* "[xyzzy, yyz, 123]".
7f561c08de6b Initial load duke parents: diff changeset	1481	*
7f561c08de6b Initial load duke parents: diff changeset	1482	* <p>Please note that the first string in the resulting array may be an empty
7f561c08de6b Initial load duke parents: diff changeset	1483	* string. This happens when the very first character of input string is
7f561c08de6b Initial load duke parents: diff changeset	1484	* matched by the pattern.
7f561c08de6b Initial load duke parents: diff changeset	1485	*
7f561c08de6b Initial load duke parents: diff changeset	1486	* @param s String to split on this regular exression
7f561c08de6b Initial load duke parents: diff changeset	1487	* @return Array of strings
7f561c08de6b Initial load duke parents: diff changeset	1488	*/
7f561c08de6b Initial load duke parents: diff changeset	1489	public String[] split(String s)
7f561c08de6b Initial load duke parents: diff changeset	1490	{
7f561c08de6b Initial load duke parents: diff changeset	1491	// Create new vector
7f561c08de6b Initial load duke parents: diff changeset	1492	Vector v = new Vector();
7f561c08de6b Initial load duke parents: diff changeset	1493
7f561c08de6b Initial load duke parents: diff changeset	1494	// Start at position 0 and search the whole string
7f561c08de6b Initial load duke parents: diff changeset	1495	int pos = 0;
7f561c08de6b Initial load duke parents: diff changeset	1496	int len = s.length();
7f561c08de6b Initial load duke parents: diff changeset	1497
7f561c08de6b Initial load duke parents: diff changeset	1498	// Try a match at each position
7f561c08de6b Initial load duke parents: diff changeset	1499	while (pos < len && match(s, pos))
7f561c08de6b Initial load duke parents: diff changeset	1500	{
7f561c08de6b Initial load duke parents: diff changeset	1501	// Get start of match
7f561c08de6b Initial load duke parents: diff changeset	1502	int start = getParenStart(0);
7f561c08de6b Initial load duke parents: diff changeset	1503
7f561c08de6b Initial load duke parents: diff changeset	1504	// Get end of match
7f561c08de6b Initial load duke parents: diff changeset	1505	int newpos = getParenEnd(0);
7f561c08de6b Initial load duke parents: diff changeset	1506
7f561c08de6b Initial load duke parents: diff changeset	1507	// Check if no progress was made
7f561c08de6b Initial load duke parents: diff changeset	1508	if (newpos == pos)
7f561c08de6b Initial load duke parents: diff changeset	1509	{
7f561c08de6b Initial load duke parents: diff changeset	1510	v.addElement(s.substring(pos, start + 1));
7f561c08de6b Initial load duke parents: diff changeset	1511	newpos++;
7f561c08de6b Initial load duke parents: diff changeset	1512	}
7f561c08de6b Initial load duke parents: diff changeset	1513	else
7f561c08de6b Initial load duke parents: diff changeset	1514	{
7f561c08de6b Initial load duke parents: diff changeset	1515	v.addElement(s.substring(pos, start));
7f561c08de6b Initial load duke parents: diff changeset	1516	}
7f561c08de6b Initial load duke parents: diff changeset	1517
7f561c08de6b Initial load duke parents: diff changeset	1518	// Move to new position
7f561c08de6b Initial load duke parents: diff changeset	1519	pos = newpos;
7f561c08de6b Initial load duke parents: diff changeset	1520	}
7f561c08de6b Initial load duke parents: diff changeset	1521
7f561c08de6b Initial load duke parents: diff changeset	1522	// Push remainder if it's not empty
7f561c08de6b Initial load duke parents: diff changeset	1523	String remainder = s.substring(pos);
7f561c08de6b Initial load duke parents: diff changeset	1524	if (remainder.length() != 0)
7f561c08de6b Initial load duke parents: diff changeset	1525	{
7f561c08de6b Initial load duke parents: diff changeset	1526	v.addElement(remainder);
7f561c08de6b Initial load duke parents: diff changeset	1527	}
7f561c08de6b Initial load duke parents: diff changeset	1528
7f561c08de6b Initial load duke parents: diff changeset	1529	// Return vector as an array of strings
7f561c08de6b Initial load duke parents: diff changeset	1530	String[] ret = new String[v.size()];
7f561c08de6b Initial load duke parents: diff changeset	1531	v.copyInto(ret);
7f561c08de6b Initial load duke parents: diff changeset	1532	return ret;
7f561c08de6b Initial load duke parents: diff changeset	1533	}
7f561c08de6b Initial load duke parents: diff changeset	1534
7f561c08de6b Initial load duke parents: diff changeset	1535	/**
7f561c08de6b Initial load duke parents: diff changeset	1536	* Flag bit that indicates that subst should replace all occurrences of this
7f561c08de6b Initial load duke parents: diff changeset	1537	* regular expression.
7f561c08de6b Initial load duke parents: diff changeset	1538	*/
7f561c08de6b Initial load duke parents: diff changeset	1539	public static final int REPLACE_ALL = 0x0000;
7f561c08de6b Initial load duke parents: diff changeset	1540
7f561c08de6b Initial load duke parents: diff changeset	1541	/**
7f561c08de6b Initial load duke parents: diff changeset	1542	* Flag bit that indicates that subst should only replace the first occurrence
7f561c08de6b Initial load duke parents: diff changeset	1543	* of this regular expression.
7f561c08de6b Initial load duke parents: diff changeset	1544	*/
7f561c08de6b Initial load duke parents: diff changeset	1545	public static final int REPLACE_FIRSTONLY = 0x0001;
7f561c08de6b Initial load duke parents: diff changeset	1546
7f561c08de6b Initial load duke parents: diff changeset	1547	/**
7f561c08de6b Initial load duke parents: diff changeset	1548	* Flag bit that indicates that subst should replace backreferences
7f561c08de6b Initial load duke parents: diff changeset	1549	*/
7f561c08de6b Initial load duke parents: diff changeset	1550	public static final int REPLACE_BACKREFERENCES = 0x0002;
7f561c08de6b Initial load duke parents: diff changeset	1551
7f561c08de6b Initial load duke parents: diff changeset	1552	/**
7f561c08de6b Initial load duke parents: diff changeset	1553	* Substitutes a string for this regular expression in another string.
7f561c08de6b Initial load duke parents: diff changeset	1554	* This method works like the Perl function of the same name.
7f561c08de6b Initial load duke parents: diff changeset	1555	* Given a regular expression of "a*b", a String to substituteIn of
7f561c08de6b Initial load duke parents: diff changeset	1556	* "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
7f561c08de6b Initial load duke parents: diff changeset	1557	* resulting String returned by subst would be "-foo-garply-wacky-".
7f561c08de6b Initial load duke parents: diff changeset	1558	*
7f561c08de6b Initial load duke parents: diff changeset	1559	* @param substituteIn String to substitute within
7f561c08de6b Initial load duke parents: diff changeset	1560	* @param substitution String to substitute for all matches of this regular expression.
7f561c08de6b Initial load duke parents: diff changeset	1561	* @return The string substituteIn with zero or more occurrences of the current
7f561c08de6b Initial load duke parents: diff changeset	1562	* regular expression replaced with the substitution String (if this regular
7f561c08de6b Initial load duke parents: diff changeset	1563	* expression object doesn't match at any position, the original String is returned
7f561c08de6b Initial load duke parents: diff changeset	1564	* unchanged).
7f561c08de6b Initial load duke parents: diff changeset	1565	*/
7f561c08de6b Initial load duke parents: diff changeset	1566	public String subst(String substituteIn, String substitution)
7f561c08de6b Initial load duke parents: diff changeset	1567	{
7f561c08de6b Initial load duke parents: diff changeset	1568	return subst(substituteIn, substitution, REPLACE_ALL);
7f561c08de6b Initial load duke parents: diff changeset	1569	}
7f561c08de6b Initial load duke parents: diff changeset	1570
7f561c08de6b Initial load duke parents: diff changeset	1571	/**
7f561c08de6b Initial load duke parents: diff changeset	1572	* Substitutes a string for this regular expression in another string.
7f561c08de6b Initial load duke parents: diff changeset	1573	* This method works like the Perl function of the same name.
7f561c08de6b Initial load duke parents: diff changeset	1574	* Given a regular expression of "a*b", a String to substituteIn of
7f561c08de6b Initial load duke parents: diff changeset	1575	* "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
7f561c08de6b Initial load duke parents: diff changeset	1576	* resulting String returned by subst would be "-foo-garply-wacky-".
7f561c08de6b Initial load duke parents: diff changeset	1577	* <p>
7f561c08de6b Initial load duke parents: diff changeset	1578	* It is also possible to reference the contents of a parenthesized expression
7f561c08de6b Initial load duke parents: diff changeset	1579	* with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
7f561c08de6b Initial load duke parents: diff changeset	1580	* a String to substituteIn of "visit us: http://www.apache.org!" and the
7f561c08de6b Initial load duke parents: diff changeset	1581	* substitution String "<a href=\"$0\">$0</a>", the resulting String
7f561c08de6b Initial load duke parents: diff changeset	1582	* returned by subst would be
7f561c08de6b Initial load duke parents: diff changeset	1583	* "visit us: <a href=\"http://www.apache.org\">http://www.apache.org</a>!".
7f561c08de6b Initial load duke parents: diff changeset	1584	* <p>
7f561c08de6b Initial load duke parents: diff changeset	1585	* <i>Note:</i> $0 represents the whole match.
7f561c08de6b Initial load duke parents: diff changeset	1586	*
7f561c08de6b Initial load duke parents: diff changeset	1587	* @param substituteIn String to substitute within
7f561c08de6b Initial load duke parents: diff changeset	1588	* @param substitution String to substitute for matches of this regular expression
7f561c08de6b Initial load duke parents: diff changeset	1589	* @param flags One or more bitwise flags from REPLACE_*. If the REPLACE_FIRSTONLY
7f561c08de6b Initial load duke parents: diff changeset	1590	* flag bit is set, only the first occurrence of this regular expression is replaced.
7f561c08de6b Initial load duke parents: diff changeset	1591	* If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be
7f561c08de6b Initial load duke parents: diff changeset	1592	* replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will
7f561c08de6b Initial load duke parents: diff changeset	1593	* be processed.
7f561c08de6b Initial load duke parents: diff changeset	1594	* @return The string substituteIn with zero or more occurrences of the current
7f561c08de6b Initial load duke parents: diff changeset	1595	* regular expression replaced with the substitution String (if this regular
7f561c08de6b Initial load duke parents: diff changeset	1596	* expression object doesn't match at any position, the original String is returned
7f561c08de6b Initial load duke parents: diff changeset	1597	* unchanged).
7f561c08de6b Initial load duke parents: diff changeset	1598	*/
7f561c08de6b Initial load duke parents: diff changeset	1599	public String subst(String substituteIn, String substitution, int flags)
7f561c08de6b Initial load duke parents: diff changeset	1600	{
7f561c08de6b Initial load duke parents: diff changeset	1601	// String to return
7f561c08de6b Initial load duke parents: diff changeset	1602	StringBuffer ret = new StringBuffer();
7f561c08de6b Initial load duke parents: diff changeset	1603
7f561c08de6b Initial load duke parents: diff changeset	1604	// Start at position 0 and search the whole string
7f561c08de6b Initial load duke parents: diff changeset	1605	int pos = 0;
7f561c08de6b Initial load duke parents: diff changeset	1606	int len = substituteIn.length();
7f561c08de6b Initial load duke parents: diff changeset	1607
7f561c08de6b Initial load duke parents: diff changeset	1608	// Try a match at each position
7f561c08de6b Initial load duke parents: diff changeset	1609	while (pos < len && match(substituteIn, pos))
7f561c08de6b Initial load duke parents: diff changeset	1610	{
7f561c08de6b Initial load duke parents: diff changeset	1611	// Append string before match
7f561c08de6b Initial load duke parents: diff changeset	1612	ret.append(substituteIn.substring(pos, getParenStart(0)));
7f561c08de6b Initial load duke parents: diff changeset	1613
7f561c08de6b Initial load duke parents: diff changeset	1614	if ((flags & REPLACE_BACKREFERENCES) != 0)
7f561c08de6b Initial load duke parents: diff changeset	1615	{
7f561c08de6b Initial load duke parents: diff changeset	1616	// Process backreferences
7f561c08de6b Initial load duke parents: diff changeset	1617	int lCurrentPosition = 0;
7f561c08de6b Initial load duke parents: diff changeset	1618	int lLastPosition = -2;
7f561c08de6b Initial load duke parents: diff changeset	1619	int lLength = substitution.length();
7f561c08de6b Initial load duke parents: diff changeset	1620	boolean bAddedPrefix = false;
7f561c08de6b Initial load duke parents: diff changeset	1621
7f561c08de6b Initial load duke parents: diff changeset	1622	while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0)
7f561c08de6b Initial load duke parents: diff changeset	1623	{
7f561c08de6b Initial load duke parents: diff changeset	1624	if ((lCurrentPosition == 0 \|\| substitution.charAt(lCurrentPosition - 1) != '\\')
7f561c08de6b Initial load duke parents: diff changeset	1625	&& lCurrentPosition+1 < lLength)
7f561c08de6b Initial load duke parents: diff changeset	1626	{
7f561c08de6b Initial load duke parents: diff changeset	1627	char c = substitution.charAt(lCurrentPosition + 1);
7f561c08de6b Initial load duke parents: diff changeset	1628	if (c >= '0' && c <= '9')
7f561c08de6b Initial load duke parents: diff changeset	1629	{
7f561c08de6b Initial load duke parents: diff changeset	1630	if (bAddedPrefix == false)
7f561c08de6b Initial load duke parents: diff changeset	1631	{
7f561c08de6b Initial load duke parents: diff changeset	1632	// Append everything between the beginning of the
7f561c08de6b Initial load duke parents: diff changeset	1633	// substitution string and the current $ sign
7f561c08de6b Initial load duke parents: diff changeset	1634	ret.append(substitution.substring(0, lCurrentPosition));
7f561c08de6b Initial load duke parents: diff changeset	1635	bAddedPrefix = true;
7f561c08de6b Initial load duke parents: diff changeset	1636	}
7f561c08de6b Initial load duke parents: diff changeset	1637	else
7f561c08de6b Initial load duke parents: diff changeset	1638	{
7f561c08de6b Initial load duke parents: diff changeset	1639	// Append everything between the last and the current $ sign
7f561c08de6b Initial load duke parents: diff changeset	1640	ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition));
7f561c08de6b Initial load duke parents: diff changeset	1641	}
7f561c08de6b Initial load duke parents: diff changeset	1642
7f561c08de6b Initial load duke parents: diff changeset	1643	// Append the parenthesized expression
7f561c08de6b Initial load duke parents: diff changeset	1644	// Note: if a parenthesized expression of the requested
7f561c08de6b Initial load duke parents: diff changeset	1645	// index is not available "null" is added to the string
7f561c08de6b Initial load duke parents: diff changeset	1646	ret.append(getParen(c - '0'));
7f561c08de6b Initial load duke parents: diff changeset	1647	lLastPosition = lCurrentPosition;
7f561c08de6b Initial load duke parents: diff changeset	1648	}
7f561c08de6b Initial load duke parents: diff changeset	1649	}
7f561c08de6b Initial load duke parents: diff changeset	1650
7f561c08de6b Initial load duke parents: diff changeset	1651	// Move forward, skipping past match
7f561c08de6b Initial load duke parents: diff changeset	1652	lCurrentPosition++;
7f561c08de6b Initial load duke parents: diff changeset	1653	}
7f561c08de6b Initial load duke parents: diff changeset	1654
7f561c08de6b Initial load duke parents: diff changeset	1655	// Append everything after the last $ sign
7f561c08de6b Initial load duke parents: diff changeset	1656	ret.append(substitution.substring(lLastPosition + 2, lLength));
7f561c08de6b Initial load duke parents: diff changeset	1657	}
7f561c08de6b Initial load duke parents: diff changeset	1658	else
7f561c08de6b Initial load duke parents: diff changeset	1659	{
7f561c08de6b Initial load duke parents: diff changeset	1660	// Append substitution without processing backreferences
7f561c08de6b Initial load duke parents: diff changeset	1661	ret.append(substitution);
7f561c08de6b Initial load duke parents: diff changeset	1662	}
7f561c08de6b Initial load duke parents: diff changeset	1663
7f561c08de6b Initial load duke parents: diff changeset	1664	// Move forward, skipping past match
7f561c08de6b Initial load duke parents: diff changeset	1665	int newpos = getParenEnd(0);
7f561c08de6b Initial load duke parents: diff changeset	1666
7f561c08de6b Initial load duke parents: diff changeset	1667	// We always want to make progress!
7f561c08de6b Initial load duke parents: diff changeset	1668	if (newpos == pos)
7f561c08de6b Initial load duke parents: diff changeset	1669	{
7f561c08de6b Initial load duke parents: diff changeset	1670	newpos++;
7f561c08de6b Initial load duke parents: diff changeset	1671	}
7f561c08de6b Initial load duke parents: diff changeset	1672
7f561c08de6b Initial load duke parents: diff changeset	1673	// Try new position
7f561c08de6b Initial load duke parents: diff changeset	1674	pos = newpos;
7f561c08de6b Initial load duke parents: diff changeset	1675
7f561c08de6b Initial load duke parents: diff changeset	1676	// Break out if we're only supposed to replace one occurrence
7f561c08de6b Initial load duke parents: diff changeset	1677	if ((flags & REPLACE_FIRSTONLY) != 0)
7f561c08de6b Initial load duke parents: diff changeset	1678	{
7f561c08de6b Initial load duke parents: diff changeset	1679	break;
7f561c08de6b Initial load duke parents: diff changeset	1680	}
7f561c08de6b Initial load duke parents: diff changeset	1681	}
7f561c08de6b Initial load duke parents: diff changeset	1682
7f561c08de6b Initial load duke parents: diff changeset	1683	// If there's remaining input, append it
7f561c08de6b Initial load duke parents: diff changeset	1684	if (pos < len)
7f561c08de6b Initial load duke parents: diff changeset	1685	{
7f561c08de6b Initial load duke parents: diff changeset	1686	ret.append(substituteIn.substring(pos));
7f561c08de6b Initial load duke parents: diff changeset	1687	}
7f561c08de6b Initial load duke parents: diff changeset	1688
7f561c08de6b Initial load duke parents: diff changeset	1689	// Return string buffer as string
7f561c08de6b Initial load duke parents: diff changeset	1690	return ret.toString();
7f561c08de6b Initial load duke parents: diff changeset	1691	}
7f561c08de6b Initial load duke parents: diff changeset	1692
7f561c08de6b Initial load duke parents: diff changeset	1693	/**
7f561c08de6b Initial load duke parents: diff changeset	1694	* Returns an array of Strings, whose toString representation matches a regular
7f561c08de6b Initial load duke parents: diff changeset	1695	* expression. This method works like the Perl function of the same name. Given
7f561c08de6b Initial load duke parents: diff changeset	1696	* a regular expression of "a*b" and an array of String objects of [foo, aab, zzz,
7f561c08de6b Initial load duke parents: diff changeset	1697	* aaaab], the array of Strings returned by grep would be [aab, aaaab].
7f561c08de6b Initial load duke parents: diff changeset	1698	*
7f561c08de6b Initial load duke parents: diff changeset	1699	* @param search Array of Objects to search
7f561c08de6b Initial load duke parents: diff changeset	1700	* @return Array of Strings whose toString() value matches this regular expression.
7f561c08de6b Initial load duke parents: diff changeset	1701	*/
7f561c08de6b Initial load duke parents: diff changeset	1702	public String[] grep(Object[] search)
7f561c08de6b Initial load duke parents: diff changeset	1703	{
7f561c08de6b Initial load duke parents: diff changeset	1704	// Create new vector to hold return items
7f561c08de6b Initial load duke parents: diff changeset	1705	Vector v = new Vector();
7f561c08de6b Initial load duke parents: diff changeset	1706
7f561c08de6b Initial load duke parents: diff changeset	1707	// Traverse array of objects
7f561c08de6b Initial load duke parents: diff changeset	1708	for (int i = 0; i < search.length; i++)
7f561c08de6b Initial load duke parents: diff changeset	1709	{
7f561c08de6b Initial load duke parents: diff changeset	1710	// Get next object as a string
7f561c08de6b Initial load duke parents: diff changeset	1711	String s = search[i].toString();
7f561c08de6b Initial load duke parents: diff changeset	1712
7f561c08de6b Initial load duke parents: diff changeset	1713	// If it matches this regexp, add it to the list
7f561c08de6b Initial load duke parents: diff changeset	1714	if (match(s))
7f561c08de6b Initial load duke parents: diff changeset	1715	{
7f561c08de6b Initial load duke parents: diff changeset	1716	v.addElement(s);
7f561c08de6b Initial load duke parents: diff changeset	1717	}
7f561c08de6b Initial load duke parents: diff changeset	1718	}
7f561c08de6b Initial load duke parents: diff changeset	1719
7f561c08de6b Initial load duke parents: diff changeset	1720	// Return vector as an array of strings
7f561c08de6b Initial load duke parents: diff changeset	1721	String[] ret = new String[v.size()];
7f561c08de6b Initial load duke parents: diff changeset	1722	v.copyInto(ret);
7f561c08de6b Initial load duke parents: diff changeset	1723	return ret;
7f561c08de6b Initial load duke parents: diff changeset	1724	}
7f561c08de6b Initial load duke parents: diff changeset	1725
7f561c08de6b Initial load duke parents: diff changeset	1726	/**
7f561c08de6b Initial load duke parents: diff changeset	1727	* @return true if character at i-th position in the <code>search</code> string is a newline
7f561c08de6b Initial load duke parents: diff changeset	1728	*/
7f561c08de6b Initial load duke parents: diff changeset	1729	private boolean isNewline(int i)
7f561c08de6b Initial load duke parents: diff changeset	1730	{
7f561c08de6b Initial load duke parents: diff changeset	1731	char nextChar = search.charAt(i);
7f561c08de6b Initial load duke parents: diff changeset	1732
7f561c08de6b Initial load duke parents: diff changeset	1733	if (nextChar == '\n' \|\| nextChar == '\r' \|\| nextChar == '\u0085'
7f561c08de6b Initial load duke parents: diff changeset	1734	\|\| nextChar == '\u2028' \|\| nextChar == '\u2029')
7f561c08de6b Initial load duke parents: diff changeset	1735	{
7f561c08de6b Initial load duke parents: diff changeset	1736	return true;
7f561c08de6b Initial load duke parents: diff changeset	1737	}
7f561c08de6b Initial load duke parents: diff changeset	1738
7f561c08de6b Initial load duke parents: diff changeset	1739	return false;
7f561c08de6b Initial load duke parents: diff changeset	1740	}
7f561c08de6b Initial load duke parents: diff changeset	1741
7f561c08de6b Initial load duke parents: diff changeset	1742	/**
7f561c08de6b Initial load duke parents: diff changeset	1743	* Compares two characters.
7f561c08de6b Initial load duke parents: diff changeset	1744	*
7f561c08de6b Initial load duke parents: diff changeset	1745	* @param c1 first character to compare.
7f561c08de6b Initial load duke parents: diff changeset	1746	* @param c2 second character to compare.
7f561c08de6b Initial load duke parents: diff changeset	1747	* @param caseIndependent whether comparision is case insensitive or not.
7f561c08de6b Initial load duke parents: diff changeset	1748	* @return negative, 0, or positive integer as the first character
7f561c08de6b Initial load duke parents: diff changeset	1749	* less than, equal to, or greater then the second.
7f561c08de6b Initial load duke parents: diff changeset	1750	*/
7f561c08de6b Initial load duke parents: diff changeset	1751	private int compareChars(char c1, char c2, boolean caseIndependent)
7f561c08de6b Initial load duke parents: diff changeset	1752	{
7f561c08de6b Initial load duke parents: diff changeset	1753	if (caseIndependent)
7f561c08de6b Initial load duke parents: diff changeset	1754	{
7f561c08de6b Initial load duke parents: diff changeset	1755	c1 = Character.toLowerCase(c1);
7f561c08de6b Initial load duke parents: diff changeset	1756	c2 = Character.toLowerCase(c2);
7f561c08de6b Initial load duke parents: diff changeset	1757	}
7f561c08de6b Initial load duke parents: diff changeset	1758	return ((int)c1 - (int)c2);
7f561c08de6b Initial load duke parents: diff changeset	1759	}
7f561c08de6b Initial load duke parents: diff changeset	1760	}

author	mchung
	Mon, 26 Nov 2012 22:49:06 -0800
changeset 16098	9001e536ab4e
parent 12457	c348e06f0e82
permissions	-rw-r--r--