jaxp/src/com/sun/org/apache/regexp/internal/REDebugCompiler.java
author ehelin
Mon, 31 Mar 2014 14:02:40 +0200
changeset 23544 e6362a5ba011
parent 12457 c348e06f0e82
permissions -rw-r--r--
8033251: Use DWARF debug symbols for Linux 32-bit as default Reviewed-by: dcubed, dholmes, coleenp

/*
 * reserved comment block
 * DO NOT REMOVE OR ALTER!
 */
/*
 * Copyright 1999-2004 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.sun.org.apache.regexp.internal;

import java.io.PrintWriter;
import java.util.Hashtable;

/**
 * A subclass of RECompiler which can dump a regular expression program
 * for debugging purposes.
 *
 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
 */
public class REDebugCompiler extends RECompiler
{
    /**
     * Mapping from opcodes to descriptive strings
     */
    static Hashtable hashOpcode = new Hashtable();
    static
    {
        hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR),    "OP_RELUCTANTSTAR");
        hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS),    "OP_RELUCTANTPLUS");
        hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE),   "OP_RELUCTANTMAYBE");
        hashOpcode.put(new Integer(RE.OP_END),              "OP_END");
        hashOpcode.put(new Integer(RE.OP_BOL),              "OP_BOL");
        hashOpcode.put(new Integer(RE.OP_EOL),              "OP_EOL");
        hashOpcode.put(new Integer(RE.OP_ANY),              "OP_ANY");
        hashOpcode.put(new Integer(RE.OP_ANYOF),            "OP_ANYOF");
        hashOpcode.put(new Integer(RE.OP_BRANCH),           "OP_BRANCH");
        hashOpcode.put(new Integer(RE.OP_ATOM),             "OP_ATOM");
        hashOpcode.put(new Integer(RE.OP_STAR),             "OP_STAR");
        hashOpcode.put(new Integer(RE.OP_PLUS),             "OP_PLUS");
        hashOpcode.put(new Integer(RE.OP_MAYBE),            "OP_MAYBE");
        hashOpcode.put(new Integer(RE.OP_NOTHING),          "OP_NOTHING");
        hashOpcode.put(new Integer(RE.OP_GOTO),             "OP_GOTO");
        hashOpcode.put(new Integer(RE.OP_ESCAPE),           "OP_ESCAPE");
        hashOpcode.put(new Integer(RE.OP_OPEN),             "OP_OPEN");
        hashOpcode.put(new Integer(RE.OP_CLOSE),            "OP_CLOSE");
        hashOpcode.put(new Integer(RE.OP_BACKREF),          "OP_BACKREF");
        hashOpcode.put(new Integer(RE.OP_POSIXCLASS),       "OP_POSIXCLASS");
        hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER),      "OP_OPEN_CLUSTER");
        hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER),      "OP_CLOSE_CLUSTER");
    }

    /**
     * Returns a descriptive string for an opcode.
     * @param opcode Opcode to convert to a string
     * @return Description of opcode
     */
    String opcodeToString(char opcode)
    {
        // Get string for opcode
        String ret =(String)hashOpcode.get(new Integer(opcode));

        // Just in case we have a corrupt program
        if (ret == null)
        {
            ret = "OP_????";
        }
        return ret;
    }

    /**
     * Return a string describing a (possibly unprintable) character.
     * @param c Character to convert to a printable representation
     * @return String representation of character
     */
    String charToString(char c)
    {
        // If it's unprintable, convert to '\###'
        if (c < ' ' || c > 127)
        {
            return "\\" + (int)c;
        }

        // Return the character as a string
        return String.valueOf(c);
    }

    /**
     * Returns a descriptive string for a node in a regular expression program.
     * @param node Node to describe
     * @return Description of node
     */
    String nodeToString(int node)
    {
        // Get opcode and opdata for node
        char opcode =      instruction[node + RE.offsetOpcode];
        int opdata  = (int)instruction[node + RE.offsetOpdata];

        // Return opcode as a string and opdata value
        return opcodeToString(opcode) + ", opdata = " + opdata;
    }

    /**
     * Inserts a node with a given opcode and opdata at insertAt.  The node relative next
     * pointer is initialized to 0.
     * @param opcode Opcode for new node
     * @param opdata Opdata for new node (only the low 16 bits are currently used)
     * @param insertAt Index at which to insert the new node in the program * /
    void nodeInsert(char opcode, int opdata, int insertAt) {
        System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
        PrintWriter writer = new PrintWriter( System.out );
        dumpProgram( writer );
        super.nodeInsert( opcode, opdata, insertAt );
        System.out.println( "====< " );
        dumpProgram( writer );
        writer.flush();
    }/**/


    /**
    * Appends a node to the end of a node chain
    * @param node Start of node chain to traverse
    * @param pointTo Node to have the tail of the chain point to * /
    void setNextOfEnd(int node, int pointTo) {
        System.out.println( "====> " + node + " " + pointTo );
        PrintWriter writer = new PrintWriter( System.out );
        dumpProgram( writer );
        super.setNextOfEnd( node, pointTo );
        System.out.println( "====< " );
        dumpProgram( writer );
        writer.flush();
    }/**/


    /**
     * Dumps the current program to a PrintWriter
     * @param p PrintWriter for program dump output
     */
    public void dumpProgram(PrintWriter p)
    {
        // Loop through the whole program
        for (int i = 0; i < lenInstruction; )
        {
            // Get opcode, opdata and next fields of current program node
            char opcode =        instruction[i + RE.offsetOpcode];
            char opdata =        instruction[i + RE.offsetOpdata];
            short next  = (short)instruction[i + RE.offsetNext];

            // Display the current program node
            p.print(i + ". " + nodeToString(i) + ", next = ");

            // If there's no next, say 'none', otherwise give absolute index of next node
            if (next == 0)
            {
                p.print("none");
            }
            else
            {
                p.print(i + next);
            }

            // Move past node
            i += RE.nodeSize;

            // If character class
            if (opcode == RE.OP_ANYOF)
            {
                // Opening bracket for start of char class
                p.print(", [");

                // Show each range in the char class
                int rangeCount = opdata;
                for (int r = 0; r < rangeCount; r++)
                {
                    // Get first and last chars in range
                    char charFirst = instruction[i++];
                    char charLast  = instruction[i++];

                    // Print range as X-Y, unless range encompasses only one char
                    if (charFirst == charLast)
                    {
                        p.print(charToString(charFirst));
                    }
                    else
                    {
                        p.print(charToString(charFirst) + "-" + charToString(charLast));
                    }
                }

                // Annotate the end of the char class
                p.print("]");
            }

            // If atom
            if (opcode == RE.OP_ATOM)
            {
                // Open quote
                p.print(", \"");

                // Print each character in the atom
                for (int len = opdata; len-- != 0; )
                {
                    p.print(charToString(instruction[i++]));
                }

                // Close quote
                p.print("\"");
            }

            // Print a newline
            p.println("");
        }
    }
}