--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/misc/RegexpPool.java Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,319 @@
+/*
+ * Copyright 1995-2001 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.misc;
+import java.io.*;
+
+/**
+ * A class to represent a pool of regular expressions. A string
+ * can be matched against the whole pool all at once. It is much
+ * faster than doing individual regular expression matches one-by-one.
+ *
+ * @see java.misc.RegexpTarget
+ * @author James Gosling
+ */
+
+public class RegexpPool {
+ private RegexpNode prefixMachine = new RegexpNode();
+ private RegexpNode suffixMachine = new RegexpNode();
+ private static final int BIG = 0x7FFFFFFF;
+ private int lastDepth = BIG;
+
+ public RegexpPool () {
+ }
+
+ /**
+ * Add a regular expression to the pool of regular expressions.
+ * @param re The regular expression to add to the pool.
+ For now, only handles strings that either begin or end with
+ a '*'.
+ * @param ret The object to be returned when this regular expression is
+ matched. If ret is an instance of the RegexpTarget class, ret.found
+ is called with the string fragment that matched the '*' as its
+ parameter.
+ * @exception REException error
+ */
+ public void add(String re, Object ret) throws REException {
+ add(re, ret, false);
+ }
+
+ /**
+ * Replace the target for the regular expression with a different
+ * target.
+ *
+ * @param re The regular expression to be replaced in the pool.
+ * For now, only handles strings that either begin or end with
+ * a '*'.
+ * @param ret The object to be returned when this regular expression is
+ * matched. If ret is an instance of the RegexpTarget class, ret.found
+ * is called with the string fragment that matched the '*' as its
+ * parameter.
+ */
+ public void replace(String re, Object ret) {
+ try {
+ add(re, ret, true);
+ } catch(Exception e) {
+ // should never occur if replace is true
+ }
+ }
+
+ /**
+ * Delete the regular expression and its target.
+ * @param re The regular expression to be deleted from the pool.
+ * must begin or end with a '*'
+ * @return target - the old target.
+ */
+ public Object delete(String re) {
+ Object o = null;
+ RegexpNode p = prefixMachine;
+ RegexpNode best = p;
+ int len = re.length() - 1;
+ int i;
+ boolean prefix = true;
+
+ if (!re.startsWith("*") ||
+ !re.endsWith("*"))
+ len++;
+
+ if (len <= 0)
+ return null;
+
+ /* March forward through the prefix machine */
+ for (i = 0; p != null; i++) {
+ if (p.result != null && p.depth < BIG
+ && (!p.exact || i == len)) {
+ best = p;
+ }
+ if (i >= len)
+ break;
+ p = p.find(re.charAt(i));
+ }
+
+ /* march backward through the suffix machine */
+ p = suffixMachine;
+ for (i = len; --i >= 0 && p != null;) {
+ if (p.result != null && p.depth < BIG) {
+ prefix = false;
+ best = p;
+ }
+ p = p.find(re.charAt(i));
+ }
+
+ // delete only if there is an exact match
+ if (prefix) {
+ if (re.equals(best.re)) {
+ o = best.result;
+ best.result = null;
+
+ }
+ } else {
+ if (re.equals(best.re)) {
+ o = best.result;
+ best.result = null;
+ }
+ }
+ return o;
+ }
+
+ /** Search for a match to a string & return the object associated
+ with it with the match. When multiple regular expressions
+ would match the string, the best match is returned first.
+ The next best match is returned the next time matchNext is
+ called.
+ @param s The string to match against the regular expressions
+ in the pool.
+ @return null on failure, otherwise the object associated with
+ the regular expression when it was added to the pool.
+ If the object is an instance of RegexpTarget, then
+ the return value is the result from calling
+ return.found(string_that_matched_wildcard).
+ */
+ public Object match(String s) {
+ return matchAfter(s, BIG);
+ }
+
+ /** Identical to match except that it will only find matches to
+ regular expressions that were added to the pool <i>after</i>
+ the last regular expression that matched in the last call
+ to match() or matchNext() */
+ public Object matchNext(String s) {
+ return matchAfter(s, lastDepth);
+ }
+
+ private void add(String re, Object ret, boolean replace) throws REException {
+ int len = re.length();
+ RegexpNode p;
+ if (re.charAt(0) == '*') {
+ p = suffixMachine;
+ while (len > 1)
+ p = p.add(re.charAt(--len));
+ } else {
+ boolean exact = false;
+ if (re.charAt(len - 1) == '*')
+ len--;
+ else
+ exact = true;
+ p = prefixMachine;
+ for (int i = 0; i < len; i++)
+ p = p.add(re.charAt(i));
+ p.exact = exact;
+ }
+
+ if (p.result != null && !replace)
+ throw new REException(re + " is a duplicate");
+
+ p.re = re;
+ p.result = ret;
+ }
+
+ private Object matchAfter(String s, int lastMatchDepth) {
+ RegexpNode p = prefixMachine;
+ RegexpNode best = p;
+ int bst = 0;
+ int bend = 0;
+ int len = s.length();
+ int i;
+ if (len <= 0)
+ return null;
+ /* March forward through the prefix machine */
+ for (i = 0; p != null; i++) {
+ if (p.result != null && p.depth < lastMatchDepth
+ && (!p.exact || i == len)) {
+ lastDepth = p.depth;
+ best = p;
+ bst = i;
+ bend = len;
+ }
+ if (i >= len)
+ break;
+ p = p.find(s.charAt(i));
+ }
+ /* march backward through the suffix machine */
+ p = suffixMachine;
+ for (i = len; --i >= 0 && p != null;) {
+ if (p.result != null && p.depth < lastMatchDepth) {
+ lastDepth = p.depth;
+ best = p;
+ bst = 0;
+ bend = i+1;
+ }
+ p = p.find(s.charAt(i));
+ }
+ Object o = best.result;
+ if (o != null && o instanceof RegexpTarget)
+ o = ((RegexpTarget) o).found(s.substring(bst, bend));
+ return o;
+ }
+
+ /** Resets the pool so that the next call to matchNext looks
+ at all regular expressions in the pool. match(s); is equivalent
+ to reset(); matchNext(s);
+ <p><b>Multithreading note:</b> reset/nextMatch leave state in the
+ regular expression pool. If multiple threads could be using this
+ pool this way, they should be syncronized to avoid race hazards.
+ match() was done in such a way that there are no such race
+ hazards: multiple threads can be matching in the same pool
+ simultaneously. */
+ public void reset() {
+ lastDepth = BIG;
+ }
+
+ /** Print this pool to standard output */
+ public void print(PrintStream out) {
+ out.print("Regexp pool:\n");
+ if (suffixMachine.firstchild != null) {
+ out.print(" Suffix machine: ");
+ suffixMachine.firstchild.print(out);
+ out.print("\n");
+ }
+ if (prefixMachine.firstchild != null) {
+ out.print(" Prefix machine: ");
+ prefixMachine.firstchild.print(out);
+ out.print("\n");
+ }
+ }
+
+}
+
+/* A node in a regular expression finite state machine. */
+class RegexpNode {
+ char c;
+ RegexpNode firstchild;
+ RegexpNode nextsibling;
+ int depth;
+ boolean exact;
+ Object result;
+ String re = null;
+
+ RegexpNode () {
+ c = '#';
+ depth = 0;
+ }
+ RegexpNode (char C, int depth) {
+ c = C;
+ this.depth = depth;
+ }
+ RegexpNode add(char C) {
+ RegexpNode p = firstchild;
+ if (p == null)
+ p = new RegexpNode (C, depth+1);
+ else {
+ while (p != null)
+ if (p.c == C)
+ return p;
+ else
+ p = p.nextsibling;
+ p = new RegexpNode (C, depth+1);
+ p.nextsibling = firstchild;
+ }
+ firstchild = p;
+ return p;
+ }
+ RegexpNode find(char C) {
+ for (RegexpNode p = firstchild;
+ p != null;
+ p = p.nextsibling)
+ if (p.c == C)
+ return p;
+ return null;
+ }
+ void print(PrintStream out) {
+ if (nextsibling != null) {
+ RegexpNode p = this;
+ out.print("(");
+ while (p != null) {
+ out.write(p.c);
+ if (p.firstchild != null)
+ p.firstchild.print(out);
+ p = p.nextsibling;
+ out.write(p != null ? '|' : ')');
+ }
+ } else {
+ out.write(c);
+ if (firstchild != null)
+ firstchild.print(out);
+ }
+ }
+}