jdk/make/src/classes/build/tools/generatebreakiteratordata/RuleBasedBreakIteratorBuilder.java
author rfield
Mon, 13 Feb 2017 08:50:26 -0800
changeset 43856 fcdebb803c62
parent 23010 6dadb192ad81
permissions -rw-r--r--
8174797: jshell tool: invalid module path crashes tool 8174796: jshell tool: regression: user home (tilde) not translated Reviewed-by: jlahoda
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
23010
6dadb192ad81 8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013
lana
parents: 21805
diff changeset
     2
 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package build.tools.generatebreakiteratordata;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import java.io.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import java.util.Enumeration;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
import java.util.Hashtable;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
import java.util.Stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
import java.util.Vector;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
import java.util.zip.CRC32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
import sun.text.CompactByteArray;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This class has the job of constructing a RuleBasedBreakIterator from a
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * textual description. A Builder is constructed by GenerateBreakIteratorData,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 * which uses it to construct the iterator itself and then throws it away.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 * <p>The construction logic is separated out into its own class for two primary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * reasons:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 * <ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * <li>The construction logic is quite sophisticated and large. Separating
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 * it out into its own class means the code must only be loaded into memory
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * while a RuleBasedBreakIterator is being constructed, and can be purged after
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * that.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * <li>There is a fair amount of state that must be maintained throughout the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * construction process that is not needed by the iterator after construction.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * Separating this state out into another class prevents all of the functions
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * that construct the iterator from having to have really long parameter lists,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * (hopefully) contributing to readability and maintainability.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * </ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * It'd be really nice if this could be an independent class rather than an
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * inner class, because that would shorten the source file considerably, but
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * making Builder an inner class of RuleBasedBreakIterator allows it direct
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * access to RuleBasedBreakIterator's private members, which saves us from
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * having to provide some kind of "back door" to the Builder class that could
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * then also be used by other classes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
class RuleBasedBreakIteratorBuilder {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
     * A token used as a character-category value to identify ignore characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
    protected static final byte IGNORE = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
     * Tables that indexes from character values to character category numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
    private CompactByteArray charCategoryTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
    private SupplementaryCharacterData supplementaryCharCategoryTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
     * The table of state transitions used for forward iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
    private short[] stateTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
     * The table of state transitions used to sync up the iterator with the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
     * text in backwards and random-access iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    private short[] backwardsStateTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
     * A list of flags indicating which states in the state table are accepting
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
     * ("end") states
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
    private boolean[] endStates = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
     * A list of flags indicating which states in the state table are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
     * lookahead states (states which turn lookahead on and off)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    private boolean[] lookaheadStates = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     * A table for additional data. May be used by a subclass of
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     * RuleBasedBreakIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    private byte[] additionalData = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
     * The number of character categories (and, thus, the number of columns in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     * the state tables)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    private int numCategories;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
     * A temporary holding place used for calculating the character categories.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
     * This object contains CharSet objects.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   113
    protected Vector<CharSet> categories = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
     * A table used to map parts of regexp text to lists of character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
     * categories, rather than having to figure them out from scratch each time
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   119
    protected Hashtable<String, Object> expressions = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
     * A temporary holding place for the list of ignore characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
    protected CharSet ignoreChars = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
     * A temporary holding place where the forward state table is built
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   129
    protected Vector<short[]> tempStateTable = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
     * A list of all the states that have to be filled in with transitions to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
     * the next state that is created.  Used when building the state table from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
     * the regular expressions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   136
    protected Vector<Integer> decisionPointList = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
     * A stack for holding decision point lists.  This is used to handle nested
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
     * parentheses and braces in regexps.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   142
    protected Stack<Vector<Integer>> decisionPointStack = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
     * A list of states that loop back on themselves.  Used to handle .*?
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   147
    protected Vector<Integer> loopingStates = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
     * Looping states actually have to be backfilled later in the process
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
     * than everything else.  This is where a the list of states to backfill
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
     * is accumulated.  This is also used to handle .*?
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   154
    protected Vector<Integer> statesToBackfill = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
     * A list mapping pairs of state numbers for states that are to be combined
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     * to the state number of the state representing their combination.  Used
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
     * in the process of making the state table deterministic to prevent
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
     * infinite recursion.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   162
    protected Vector<int[]> mergeList = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
     * A flag that is used to indicate when the list of looping states can
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
     * be reset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
    protected boolean clearLoopingStates = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
     * A bit mask used to indicate a bit in the table's flags column that marks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
     * a state as an accepting state.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
    protected static final int END_STATE_FLAG = 0x8000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
     * A bit mask used to indicate a bit in the table's flags column that marks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
     * a state as one the builder shouldn't loop to any looping states
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
    protected static final int DONT_LOOP_FLAG = 0x4000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
     * A bit mask used to indicate a bit in the table's flags column that marks
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
     * a state as a lookahead state.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
    protected static final int LOOKAHEAD_STATE_FLAG = 0x2000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
     * A bit mask representing the union of the mask values listed above.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
     * Used for clearing or masking off the flag bits.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
    protected static final int ALL_FLAGS = END_STATE_FLAG
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
                                         | LOOKAHEAD_STATE_FLAG
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
                                         | DONT_LOOP_FLAG;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
     * This is the main function for setting up the BreakIterator's tables. It
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
     * just vectors different parts of the job off to other functions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
    public RuleBasedBreakIteratorBuilder(String description) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   201
        Vector<String> tempRuleList = buildRuleList(description);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
        buildCharCategories(tempRuleList);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
        buildStateTable(tempRuleList);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
        buildBackwardsStateTable(tempRuleList);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
     * Thus function has three main purposes:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
     * <ul><li>Perform general syntax checking on the description, so the rest
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
     * of the build code can assume that it's parsing a legal description.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
     * <li>Split the description into separate rules
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
     * <li>Perform variable-name substitutions (so that no one else sees
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
     * variable names)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
     * </ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   216
    private Vector<String> buildRuleList(String description) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
        // invariants:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
        // - parentheses must be balanced: ()[]{}<>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
        // - nothing can be nested inside <>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
        // - nothing can be nested inside [] except more []s
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        // - pairs of ()[]{}<> must not be empty
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
        // - ; can only occur at the outer level
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
        // - | can only appear inside ()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
        // - only one = or / can occur in a single rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
        // - = and / cannot both occur in the same rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
        // - <> can only occur on the left side of a = expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
        //   (because we'll perform substitutions to eliminate them other places)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
        // - the left-hand side of a = expression can only be a single character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
        //   (possibly with \) or text inside <>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
        // - the right-hand side of a = expression must be enclosed in [] or ()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
        // - * may not occur at the beginning of a rule, nor may it follow
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
        //   =, /, (, (, |, }, ;, or *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
        // - ? may only follow *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
        // - the rule list must contain at least one / rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
        // - no rule may be empty
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
        // - all printing characters in the ASCII range except letters and digits
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
        //   are reserved and must be preceded by \
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
        // - ! may only occur at the beginning of a rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        // set up a vector to contain the broken-up description (each entry in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
        // vector is a separate rule) and a stack for keeping track of opening
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
        // punctuation
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   243
        Vector<String> tempRuleList = new Vector<>();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   244
        Stack<Character> parenStack = new Stack<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
        int p = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
        int ruleStart = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
        int c = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
        int lastC = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
        int lastOpen = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
        boolean haveEquals = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
        boolean havePipe = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        boolean sawVarName = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
        final String charsThatCantPrecedeAsterisk = "=/{(|}*;\u0000";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
        // if the description doesn't end with a semicolon, tack a semicolon onto the end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
        if (description.length() != 0 &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
            description.codePointAt(description.length() - 1) != ';') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
            description = description + ";";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
        // for each character, do...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
        while (p < description.length()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
            c = description.codePointAt(p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
                // if the character is a backslash, skip the character that follows it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
                // (it'll get treated as a literal character)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
                case '\\':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
                    ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
                // if the character is opening punctuation, verify that no nesting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
                // rules are broken, and push the character onto the stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
                case '{':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
                case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
                case '[':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
                case '(':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
                    if (lastOpen == '<') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
                        error("Can't nest brackets inside <>", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
                    if (lastOpen == '[' && c != '[') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
                        error("Can't nest anything in [] but []", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
                    // if we see < anywhere except on the left-hand side of =,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
                    // we must be seeing a variable name that was never defined
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
                    if (c == '<' && (haveEquals || havePipe)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
                        error("Unknown variable name", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
                    lastOpen = c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
                    parenStack.push(new Character((char)c));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
                    if (c == '<') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
                        sawVarName = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
                // if the character is closing punctuation, verify that it matches the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
                // last opening punctuation we saw, and that the brackets contain
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
                // something, then pop the stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
                case '}':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
                case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
                case ']':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
                case ')':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
                    char expectedClose = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
                    switch (lastOpen) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
                        case '{':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
                            expectedClose = '}';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
                        case '[':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
                            expectedClose = ']';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
                        case '(':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
                            expectedClose = ')';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
                        case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
                            expectedClose = '>';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
                    if (c != expectedClose) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
                        error("Unbalanced parentheses", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
                    if (lastC == lastOpen) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
                        error("Parens don't contain anything", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
                    parenStack.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
                    if (!parenStack.empty()) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   329
                        lastOpen = parenStack.peek().charValue();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
                        lastOpen = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
                // if the character is an asterisk, make sure it occurs in a place
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
                // where an asterisk can legally go
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
                case '*':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
                    if (charsThatCantPrecedeAsterisk.indexOf(lastC) != -1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
                        error("Misplaced asterisk", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
                // if the character is a question mark, make sure it follows an asterisk
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
                case '?':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
                    if (lastC != '*') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
                        error("Misplaced ?", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
                // if the character is an equals sign, make sure we haven't seen another
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
                // equals sign or a slash yet
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
                case '=':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
                    if (haveEquals || havePipe) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
                        error("More than one = or / in rule", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
                    haveEquals = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
                // if the character is a slash, make sure we haven't seen another slash
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
                // or an equals sign yet
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
                case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
                    if (haveEquals || havePipe) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
                        error("More than one = or / in rule", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
                    if (sawVarName) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
                        error("Unknown variable name", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
                    havePipe = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
                // if the character is an exclamation point, make sure it occurs only
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
                // at the beginning of a rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
                case '!':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
                    if (lastC != ';' && lastC != '\u0000') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
                        error("! can only occur at the beginning of a rule", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
                // we don't have to do anything special on a period
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
                case '.':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
                // if the character is a syntax character that can only occur
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
                // inside [], make sure that it does in fact only occur inside [].
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
                case '^':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
                case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
                case ':':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
                    if (lastOpen != '[' && lastOpen != '<') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
                        error("Illegal character", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
                // if the character is a semicolon, do the following...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
                case ';':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
                    // make sure the rule contains something and that there are no
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
                    // unbalanced parentheses or brackets
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
                    if (lastC == ';' || lastC == '\u0000') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
                        error("Empty rule", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
                    if (!parenStack.empty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
                        error("Unbalanced parenheses", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
                    if (parenStack.empty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
                        // if the rule contained an = sign, call processSubstitution()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
                        // to replace the substitution name with the substitution text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
                        // wherever it appears in the description
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
                        if (haveEquals) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
                            description = processSubstitution(description.substring(ruleStart,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
                                            p), description, p + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
                            // otherwise, check to make sure the rule doesn't reference
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
                            // any undefined substitutions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
                            if (sawVarName) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
                                error("Unknown variable name", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
                            // then add it to tempRuleList
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
                            tempRuleList.addElement(description.substring(ruleStart, p));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
                        // and reset everything to process the next rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
                        ruleStart = p + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
                        haveEquals = havePipe = sawVarName = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
                // if the character is a vertical bar, check to make sure that it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
                // occurs inside a () expression and that the character that precedes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
                // it isn't also a vertical bar
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
                case '|':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
                    if (lastC == '|') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
                        error("Empty alternative", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
                    if (parenStack.empty() || lastOpen != '(') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
                        error("Misplaced |", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
                // if the character is anything else (escaped characters are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
                // skipped and don't make it here), it's an error
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
                default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
                    if (c >= ' ' && c < '\u007f' && !Character.isLetter((char)c)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
                        && !Character.isDigit((char)c)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                        error("Illegal character", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                    if (c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
                        ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
            lastC = c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
            ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
        if (tempRuleList.size() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
            error("No valid rules in description", p, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
        return tempRuleList;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
     * This function performs variable-name substitutions.  First it does syntax
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
     * checking on the variable-name definition.  If it's syntactically valid, it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
     * then goes through the remainder of the description and does a simple
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
     * find-and-replace of the variable name with its text.  (The variable text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
     * must be enclosed in either [] or () for this to work.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
    protected String processSubstitution(String substitutionRule, String description,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                    int startPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
        // isolate out the text on either side of the equals sign
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
        String replace;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        String replaceWith;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        int equalPos = substitutionRule.indexOf('=');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
        replace = substitutionRule.substring(0, equalPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
        replaceWith = substitutionRule.substring(equalPos + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
        // check to see whether the substitution name is something we've declared
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
        // to be "special".  For RuleBasedBreakIterator itself, this is "<ignore>".
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
        // This function takes care of any extra processing that has to be done
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
        // with "special" substitution names.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
        handleSpecialSubstitution(replace, replaceWith, startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
        // perform various other syntax checks on the rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
        if (replaceWith.length() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
            error("Nothing on right-hand side of =", startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
        if (replace.length() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
            error("Nothing on left-hand side of =", startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
        if (replace.length() == 2 && replace.charAt(0) != '\\') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
            error("Illegal left-hand side for =", startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
        if (replace.length() >= 3 && replace.charAt(0) != '<' &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
            replace.codePointBefore(equalPos) != '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
            error("Illegal left-hand side for =", startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
        if (!(replaceWith.charAt(0) == '[' &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
              replaceWith.charAt(replaceWith.length() - 1) == ']') &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
            !(replaceWith.charAt(0) == '(' &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
              replaceWith.charAt(replaceWith.length() - 1) == ')')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
            error("Illegal right-hand side for =", startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        // now go through the rest of the description (which hasn't been broken up
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
        // into separate rules yet) and replace every occurrence of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
        // substitution name with the substitution body
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
        StringBuffer result = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
        result.append(description.substring(0, startPos));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
        int lastPos = startPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
        int pos = description.indexOf(replace, startPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
        while (pos != -1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
            result.append(description.substring(lastPos, pos));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
            result.append(replaceWith);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
            lastPos = pos + replace.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
            pos = description.indexOf(replace, lastPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
        result.append(description.substring(lastPos));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
        return result.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
     * This function defines a protocol for handling substitution names that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
     * are "special," i.e., that have some property beyond just being
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
     * substitutions.  At the RuleBasedBreakIterator level, we have one
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
     * special substitution name, "<ignore>".  Subclasses can override this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
     * function to add more.  Any special processing that has to go on beyond
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
     * that which is done by the normal substitution-processing code is done
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
     * here.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
    protected void handleSpecialSubstitution(String replace, String replaceWith,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
                int startPos, String description) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
        // if we get a definition for a substitution called "ignore", it defines
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
        // the ignore characters for the iterator.  Check to make sure the expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
        // is a [] expression, and if it is, parse it and store the characters off
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
        // to the side.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
        if (replace.equals("<ignore>")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
            if (replaceWith.charAt(0) == '(') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
                error("Ignore group can't be enclosed in (", startPos, description);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
            ignoreChars = CharSet.parseString(replaceWith);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
     * This function builds the character category table.  On entry,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
     * tempRuleList is a vector of break rules that has had variable names substituted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
     * On exit, the charCategoryTable data member has been initialized to hold the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
     * character category table, and tempRuleList's rules have been munged to contain
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
     * character category numbers everywhere a literal character or a [] expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
     * originally occurred.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   555
    @SuppressWarnings("fallthrough")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   556
    protected void buildCharCategories(Vector<String> tempRuleList) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
        int bracketLevel = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
        int p = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
        int lineNum = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
        // build hash table of every literal character or [] expression in the rule list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
        // and use CharSet.parseString() to derive a CharSet object representing the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
        // characters each refers to
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   564
        expressions = new Hashtable<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
        while (lineNum < tempRuleList.size()) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   566
            String line = tempRuleList.elementAt(lineNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
            p = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
            while (p < line.length()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
                int c = line.codePointAt(p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
                switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
                    // skip over all syntax characters except [
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
                    case '{': case '}': case '(': case ')': case '*': case '.':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
                    case '/': case '|': case ';': case '?': case '!':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
                    // for [, find the matching ] (taking nested [] pairs into account)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
                    // and add the whole expression to the expression list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
                    case '[':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
                        int q = p + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
                        ++bracketLevel;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
                        while (q < line.length() && bracketLevel != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
                            c = line.codePointAt(q);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
                            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
                            case '\\':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
                                q++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
                                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
                            case '[':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
                                ++bracketLevel;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
                                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
                            case ']':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
                                --bracketLevel;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
                                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
                            q = q + Character.charCount(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
                        if (expressions.get(line.substring(p, q)) == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
                            expressions.put(line.substring(p, q), CharSet.parseString(line.substring(p, q)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
                        p = q - 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
                    // for \ sequences, just move to the next character and treat
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
                    // it as a single character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
                    case '\\':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
                        ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
                        c = line.codePointAt(p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
                        // DON'T break; fall through into "default" clause
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
                    // for an isolated single character, add it to the expression list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
                    default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
                        expressions.put(line.substring(p, p + 1), CharSet.parseString(line.substring(p, p + 1)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
                p += Character.charCount(line.codePointAt(p));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
            ++lineNum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
        // dump CharSet's internal expression cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
        CharSet.releaseExpressionCache();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
        // create the temporary category table (which is a vector of CharSet objects)
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   622
        categories = new Vector<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
        if (ignoreChars != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
            categories.addElement(ignoreChars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
            categories.addElement(new CharSet());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
        ignoreChars = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
        // this is a hook to allow subclasses to add categories on their own
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
        mungeExpressionList(expressions);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
        // Derive the character categories.  Go through the existing character categories
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
        // looking for overlap.  Any time there's overlap, we create a new character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
        // category for the characters that overlapped and remove them from their original
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
        // category.  At the end, any characters that are left in the expression haven't
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
        // been mentioned in any category, so another new category is created for them.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
        // For example, if the first expression is [abc], then a, b, and c will be placed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
        // into a single character category.  If the next expression is [bcd], we will first
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
        // remove b and c from their existing category (leaving a behind), create a new
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
        // category for b and c, and then create another new category for d (which hadn't
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
        // been mentioned in the previous expression).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
        // At no time should a character ever occur in more than one character category.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
        // for each expression in the expressions list, do...
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   647
        for (Enumeration<Object> iter = expressions.elements(); iter.hasMoreElements(); ) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
            // initialize the working char set to the chars in the current expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
            CharSet e = (CharSet)iter.nextElement();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
            // for each category in the category list, do...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
            for (int j = categories.size() - 1; !e.empty() && j > 0; j--) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
                // if there's overlap between the current working set of chars
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
                // and the current category...
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   656
                CharSet that = categories.elementAt(j);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
                if (!that.intersection(e).empty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
                    // add a new category for the characters that were in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
                    // current category but not in the working char set
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
                    CharSet temp = that.difference(e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
                    if (!temp.empty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
                        categories.addElement(temp);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
                    // remove those characters from the working char set and replace
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
                    // the current category with the characters that it did
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
                    // have in common with the current working char set
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
                    temp = e.intersection(that);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
                    e = e.difference(that);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
                    if (!temp.equals(that)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
                        categories.setElementAt(temp, j);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
            // if there are still characters left in the working char set,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
            // add a new category containing them
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
            if (!e.empty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
                categories.addElement(e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
        // we have the ignore characters stored in position 0.  Make an extra pass through
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
        // the character category list and remove anything from the ignore list that shows
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
        // up in some other category
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
        CharSet allChars = new CharSet();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
        for (int i = 1; i < categories.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   689
            allChars = allChars.union(categories.elementAt(i));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
        }
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   691
        CharSet ignoreChars = categories.elementAt(0);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
        ignoreChars = ignoreChars.difference(allChars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
        categories.setElementAt(ignoreChars, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
        // now that we've derived the character categories, go back through the expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
        // list and replace each CharSet object with a String that represents the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
        // character categories that expression refers to.  The String is encoded: each
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
        // character is a character category number (plus 0x100 to avoid confusing them
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
        // with syntax characters in the rule grammar)
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   700
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   701
        for (Enumeration<String> iter = expressions.keys(); iter.hasMoreElements(); ) {
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   702
            String key = iter.nextElement();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
            CharSet cs = (CharSet)expressions.get(key);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
            StringBuffer cats = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
            // for each category...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
            for (int j = 0; j < categories.size(); j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
                // if the current expression contains characters in that category...
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   710
                CharSet temp = cs.intersection(categories.elementAt(j));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
                if (!temp.empty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
                    // then add the encoded category number to the String for this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
                    // expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
                    cats.append((char)(0x100 + j));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
                    if (temp.equals(cs)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
            // once we've finished building the encoded String for this expression,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
            // replace the CharSet object with it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
            expressions.put(key, cats.toString());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
        // and finally, we turn the temporary category table into a permanent category
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
        // table, which is a CompactByteArray. (we skip category 0, which by definition
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
        // refers to all characters not mentioned specifically in the rules)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
        charCategoryTable = new CompactByteArray((byte)0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
        supplementaryCharCategoryTable = new SupplementaryCharacterData((byte)0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
        // for each category...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
        for (int i = 0; i < categories.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   735
            CharSet chars = categories.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
            // go through the character ranges in the category one by one...
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   738
            Enumeration<int[]> enum_ = chars.getChars();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
            while (enum_.hasMoreElements()) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   740
                int[] range = enum_.nextElement();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
                // and set the corresponding elements in the CompactArray accordingly
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
                if (i != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
                    if (range[0] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
                        if (range[1] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
                            charCategoryTable.setElementAt((char)range[0], (char)range[1], (byte)i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
                            charCategoryTable.setElementAt((char)range[0], (char)0xFFFF, (byte)i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
                            supplementaryCharCategoryTable.appendElement(Character.MIN_SUPPLEMENTARY_CODE_POINT, range[1], (byte)i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
                        supplementaryCharCategoryTable.appendElement(range[0], range[1], (byte)i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
                // (category 0 is special-- it's the hiding place for the ignore
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
                // characters, whose real category number in the CompactArray is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
                // -1 [this is because category 0 contains all characters not
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
                // specifically mentioned anywhere in the rules] )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
                    if (range[0] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
                        if (range[1] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
                            charCategoryTable.setElementAt((char)range[0], (char)range[1], IGNORE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
                            charCategoryTable.setElementAt((char)range[0], (char)0xFFFF, IGNORE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
                            supplementaryCharCategoryTable.appendElement(Character.MIN_SUPPLEMENTARY_CODE_POINT, range[1], IGNORE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
                        supplementaryCharCategoryTable.appendElement(range[0], range[1], IGNORE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
        // once we've populated the CompactArray, compact it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
        charCategoryTable.compact();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
        // And, complete the category table for supplementary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
        supplementaryCharCategoryTable.complete();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
        // initialize numCategories
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
        numCategories = categories.size();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   785
    protected void mungeExpressionList(Hashtable<String, Object> expressions) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        // empty in the parent class.  This function provides a hook for subclasses
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
        // to mess with the character category table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
     * This is the function that builds the forward state table.  Most of the real
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
     * work is done in parseRule(), which is called once for each rule in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
     * description.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   795
    private void buildStateTable(Vector<String> tempRuleList) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
        // initialize our temporary state table, and fill it with two states:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        // state 0 is a dummy state that allows state 1 to be the starting state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
        // and 0 to represent "stop".  State 1 is added here to seed things
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
        // before we start parsing
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   800
        tempStateTable = new Vector<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
        tempStateTable.addElement(new short[numCategories + 1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
        tempStateTable.addElement(new short[numCategories + 1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
        // call parseRule() for every rule in the rule list (except those which
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
        // start with !, which are actually backwards-iteration rules)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
        for (int i = 0; i < tempRuleList.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   807
            String rule = tempRuleList.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
            if (rule.charAt(0) != '!') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
                parseRule(rule, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
        // finally, use finishBuildingStateTable() to minimize the number of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
        // states in the table and perform some other cleanup work
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
        finishBuildingStateTable(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
     * This is where most of the work really happens.  This routine parses a single
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
     * rule in the rule description, adding and modifying states in the state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
     * table according to the new expression.  The state table is kept deterministic
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
     * throughout the whole operation, although some ugly postprocessing is needed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
     * to handle the *? token.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
    private void parseRule(String rule, boolean forward) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
        // algorithm notes:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
        //   - The basic idea here is to read successive character-category groups
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
        //   from the input string.  For each group, you create a state and point
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
        //   the appropriate entries in the previous state to it.  This produces a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
        //   straight line from the start state to the end state.  The {}, *, and (|)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
        //   idioms produce branches in this straight line.  These branches (states
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
        //   that can transition to more than one other state) are called "decision
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
        //   points."  A list of decision points is kept.  This contains a list of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
        //   all states that can transition to the next state to be created.  For a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
        //   straight line progression, the only thing in the decision-point list is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
        //   the current state.  But if there's a branch, the decision-point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
        //   will contain all of the beginning points of the branch when the next
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
        //   state to be created represents the end point of the branch.  A stack is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
        //   used to save decision point lists in the presence of nested parentheses
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
        //   and the like.  For example, when a { is encountered, the current decision
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
        //   point list is saved on the stack and restored when the corresponding }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
        //   is encountered.  This way, after the } is read, the decision point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
        //   will contain both the state right before the } _and_ the state before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
        //   the whole {} expression.  Both of these states can transition to the next
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
        //   state after the {} expression.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
        //   - one complication arises when we have to stamp a transition value into
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
        //   an array cell that already contains one.  The updateStateTable() and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
        //   mergeStates() functions handle this case.  Their basic approach is to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
        //   create a new state that combines the two states that conflict and point
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
        //   at it when necessary.  This happens recursively, so if the merged states
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
        //   also conflict, they're resolved in the same way, and so on.  There are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
        //   a number of tests aimed at preventing infinite recursion.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
        //   - another complication arises with repeating characters.  It's somewhat
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
        //   ambiguous whether the user wants a greedy or non-greedy match in these cases.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
        //   (e.g., whether "[a-z]*abc" means the SHORTEST sequence of letters ending in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
        //   "abc" or the LONGEST sequence of letters ending in "abc".  We've adopted
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
        //   the *? to mean "shortest" and * by itself to mean "longest".  (You get the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
        //   same result with both if there's no overlap between the repeating character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
        //   group and the group immediately following it.)  Handling the *? token is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
        //   rather complicated and involves keeping track of whether a state needs to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
        //   be merged (as described above) or merely overwritten when you update one of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
        //   its cells, and copying the contents of a state that loops with a *? token
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
        //   into some of the states that follow it after the rest of the table-building
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
        //   process is complete ("backfilling").
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
        // implementation notes:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
        //   - This function assumes syntax checking has been performed on the input string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
        //   prior to its being passed in here.  It assumes that parentheses are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
        //   balanced, all literal characters are enclosed in [] and turned into category
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
        //   numbers, that there are no illegal characters or character sequences, and so
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
        //   on.  Violation of these invariants will lead to undefined behavior.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
        //   - It'd probably be better to use linked lists rather than Vector and Stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
        //   to maintain the decision point list and stack.  I went for simplicity in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
        //   this initial implementation.  If performance is critical enough, we can go
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
        //   back and fix this later.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
        //   -There are a number of important limitations on the *? token.  It does not work
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
        //   right when followed by a repeating character sequence (e.g., ".*?(abc)*")
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
        //   (although it does work right when followed by a single repeating character).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
        //   It will not always work right when nested in parentheses or braces (although
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
        //   sometimes it will).  It also will not work right if the group of repeating
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
        //   characters and the group of characters that follows overlap partially
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
        //   (e.g., "[a-g]*?[e-j]").  None of these capabilites was deemed necessary for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
        //   describing breaking rules we know about, so we left them out for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
        //   expeditiousness.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
        //   - Rules such as "[a-z]*?abc;" will be treated the same as "[a-z]*?aa*bc;"--
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
        //   that is, if the string ends in "aaaabc", the break will go before the first
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
        //   "a" rather than the last one.  Both of these are limitations in the design
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
        //   of RuleBasedBreakIterator and not limitations of the rule parser.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
        int p = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
        int currentState = 1;   // don't use state number 0; 0 means "stop"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
        int lastState = currentState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
        String pendingChars = "";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   894
        decisionPointStack = new Stack<>();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   895
        decisionPointList = new Vector<>();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   896
        loopingStates = new Vector<>();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   897
        statesToBackfill = new Vector<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
        short[] state;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
        boolean sawEarlyBreak = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
        // if we're adding rules to the backward state table, mark the initial state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
        // as a looping state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
        if (!forward) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
            loopingStates.addElement(new Integer(1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
        // put the current state on the decision point list before we start
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
        decisionPointList.addElement(new Integer(currentState)); // we want currentState to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
                                                                 // be 1 here...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
        currentState = tempStateTable.size() - 1;   // but after that, we want it to be
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
                                                    // 1 less than the state number of the next state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
        while (p < rule.length()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
            int c = rule.codePointAt(p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
            clearLoopingStates = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
            // this section handles literal characters, escaped characters (which are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
            // effectively literal characters too), the . token, and [] expressions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
            if (c == '['
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
                || c == '\\'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
                || Character.isLetter(c)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
                || Character.isDigit(c)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
                || c < ' '
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
                || c == '.'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
                || c >= '\u007f') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
                // if we're not on a period, isolate the expression and look up
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
                // the corresponding category list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
                if (c != '.') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
                    int q = p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
                    // if we're on a backslash, the expression is the character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
                    // after the backslash
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
                    if (c == '\\') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
                        q = p + 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
                        ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
                    // if we're on an opening bracket, scan to the closing bracket
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
                    // to isolate the expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
                    else if (c == '[') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
                        int bracketLevel = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
                        q += Character.charCount(rule.codePointAt(q));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
                        while (bracketLevel > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
                            c = rule.codePointAt(q);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
                            if (c == '[') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
                                ++bracketLevel;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
                            else if (c == ']') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
                                --bracketLevel;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
                            else if (c == '\\') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
                                c = rule.codePointAt(++q);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
                            q += Character.charCount(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
                    // otherwise, the expression is just the character itself
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
                        q = p + Character.charCount(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
                    // look up the category list for the expression and store it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
                    // in pendingChars
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
                    pendingChars = (String)expressions.get(rule.substring(p, q));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
                    // advance the current position past the expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
                    p = q - Character.charCount(rule.codePointBefore(q));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
                // if the character we're on is a period, we end up down here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
                else {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   975
                    int rowNum = decisionPointList.lastElement().intValue();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
   976
                    state = tempStateTable.elementAt(rowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
                    // if the period is followed by an asterisk, then just set the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
                    // state to loop back on itself
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
                    if (p + 1 < rule.length() && rule.charAt(p + 1) == '*' && state[0] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
                        decisionPointList.addElement(new Integer(state[0]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
                        pendingChars = "";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
                        ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
                    // otherwise, fabricate a category list ("pendingChars") with
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
                    // every category in it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
                        StringBuffer temp = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
                        for (int i = 0; i < numCategories; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
                            temp.append((char)(i + 0x100));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
                        pendingChars = temp.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
                // we'll end up in here for all expressions except for .*, which is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
                // special-cased above
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
                if (pendingChars.length() != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
                    // if the expression is followed by an asterisk, then push a copy
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
                    // of the current desicion point list onto the stack (this is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
                    // the same thing we do on an opening brace)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
                    if (p + 1 < rule.length() && rule.charAt(p + 1) == '*') {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1004
                        @SuppressWarnings("unchecked")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1005
                        Vector<Integer> clone = (Vector<Integer>)decisionPointList.clone();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1006
                        decisionPointStack.push(clone);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
                    // create a new state, add it to the list of states to backfill
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
                    // if we have looping states to worry about, set its "don't make
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
                    // me an accepting state" flag if we've seen a slash, and add
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
                    // it to the end of the state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
                    int newState = tempStateTable.size();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
                    if (loopingStates.size() != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
                        statesToBackfill.addElement(new Integer(newState));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
                    state = new short[numCategories + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
                    if (sawEarlyBreak) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
                        state[numCategories] = DONT_LOOP_FLAG;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
                    tempStateTable.addElement(state);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
                    // update everybody in the decision point list to point to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
                    // the new state (this also performs all the reconciliation
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
                    // needed to make the table deterministic), then clear the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
                    // decision point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
                    updateStateTable(decisionPointList, pendingChars, (short)newState);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
                    decisionPointList.removeAllElements();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
                    // add all states created since the last literal character we've
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
                    // seen to the decision point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
                    lastState = currentState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
                    do {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
                        ++currentState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
                        decisionPointList.addElement(new Integer(currentState));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
                    } while (currentState + 1 < tempStateTable.size());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
            // a { marks the beginning of an optional run of characters.  Push a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
            // copy of the current decision point list onto the stack.  This saves
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
            // it, preventing it from being affected by whatever's inside the parentheses.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
            // This decision point list is restored when a } is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
            else if (c == '{') {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1045
                @SuppressWarnings("unchecked")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1046
                Vector<Integer> clone = (Vector<Integer>)decisionPointList.clone();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1047
                decisionPointStack.push(clone);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
            // a } marks the end of an optional run of characters.  Pop the last decision
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
            // point list off the stack and merge it with the current decision point list.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
            // a * denotes a repeating character or group (* after () is handled separately
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
            // below).  In addition to restoring the decision point list, modify the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
            // current state to point to itself on the appropriate character categories.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
            else if (c == '}' || c == '*') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
                // when there's a *, update the current state to loop back on itself
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
                // on the character categories that caused us to enter this state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
                if (c == '*') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
                    for (int i = lastState + 1; i < tempStateTable.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1060
                        Vector<Integer> temp = new Vector<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
                        temp.addElement(new Integer(i));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
                        updateStateTable(temp, pendingChars, (short)(lastState + 1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
                // pop the top element off the decision point stack and merge
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
                // it with the current decision point list (this causes the divergent
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
                // paths through the state table to come together again on the next
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
                // new state)
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1070
                Vector<Integer> temp = decisionPointStack.pop();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
                for (int i = 0; i < decisionPointList.size(); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
                    temp.addElement(decisionPointList.elementAt(i));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
                decisionPointList = temp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
            // a ? after a * modifies the behavior of * in cases where there is overlap
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
            // between the set of characters that repeat and the characters which follow.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
            // Without the ?, all states following the repeating state, up to a state which
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
            // is reached by a character that doesn't overlap, will loop back into the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
            // repeating state.  With the ?, the mark states following the *? DON'T loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
            // back into the repeating state.  Thus, "[a-z]*xyz" will match the longest
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
            // sequence of letters that ends in "xyz," while "[a-z]*? will match the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
            // _shortest_ sequence of letters that ends in "xyz".
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
            // We use extra bookkeeping to achieve this effect, since everything else works
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
            // according to the "longest possible match" principle.  The basic principle
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
            // is that transitions out of a looping state are written in over the looping
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
            // value instead of being reconciled, and that we copy the contents of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
            // looping state into empty cells of all non-terminal states that follow the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
            // looping state.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
            else if (c == '?') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
                setLoopingStates(decisionPointList, decisionPointList);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
            // a ( marks the beginning of a sequence of characters.  Parentheses can either
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
            // contain several alternative character sequences (i.e., "(ab|cd|ef)"), or
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
            // they can contain a sequence of characters that can repeat (i.e., "(abc)*").  Thus,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
            // A () group can have multiple entry and exit points.  To keep track of this,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
            // we reserve TWO spots on the decision-point stack.  The top of the stack is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
            // the list of exit points, which becomes the current decision point list when
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
            // the ) is reached.  The next entry down is the decision point list at the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
            // beginning of the (), which becomes the current decision point list at every
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
            // entry point.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
            // In addition to keeping track of the exit points and the active decision
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
            // points before the ( (i.e., the places from which the () can be entered),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
            // we need to keep track of the entry points in case the expression loops
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
            // (i.e., is followed by *).  We do that by creating a dummy state in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
            // state table and adding it to the decision point list (BEFORE it's duplicated
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
            // on the stack).  Nobody points to this state, so it'll get optimized out
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
            // at the end.  It exists only to hold the entry points in case the ()
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
            // expression loops.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
            else if (c == '(') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
                // add a new state to the state table to hold the entry points into
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
                // the () expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
                tempStateTable.addElement(new short[numCategories + 1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
                // we have to adjust lastState and currentState to account for the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
                // new dummy state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
                lastState = currentState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
                ++currentState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
                // add the current state to the decision point list (add it at the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
                // BEGINNING so we can find it later)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
                decisionPointList.insertElementAt(new Integer(currentState), 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
                // finally, push a copy of the current decision point list onto the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
                // stack (this keeps track of the active decision point list before
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
                // the () expression), followed by an empty decision point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
                // (this will hold the exit points)
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1130
                @SuppressWarnings("unchecked")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1131
                Vector<Integer> clone = (Vector<Integer>)decisionPointList.clone();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1132
                decisionPointStack.push(clone);
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1133
                decisionPointStack.push(new Vector<Integer>());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
            // a | separates alternative character sequences in a () expression.  When
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
            // a | is encountered, we add the current decision point list to the exit-point
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
            // list, and restore the decision point list to its state prior to the (.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            else if (c == '|') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
                // pick out the top two decision point lists on the stack
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1142
                Vector<Integer> oneDown = decisionPointStack.pop();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1143
                Vector<Integer> twoDown = decisionPointStack.peek();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
                decisionPointStack.push(oneDown);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
                // append the current decision point list to the list below it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
                // on the stack (the list of exit points), and restore the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
                // current decision point list to its state before the () expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                for (int i = 0; i < decisionPointList.size(); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                    oneDown.addElement(decisionPointList.elementAt(i));
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1151
                @SuppressWarnings("unchecked")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1152
                Vector<Integer> clone = (Vector<Integer>)twoDown.clone();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1153
                decisionPointList = clone;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
            // a ) marks the end of a sequence of characters.  We do one of two things
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
            // depending on whether the sequence repeats (i.e., whether the ) is followed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
            // by *):  If the sequence doesn't repeat, then the exit-point list is merged
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
            // with the current decision point list and the decision point list from before
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
            // the () is thrown away.  If the sequence does repeat, then we fish out the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
            // state we were in before the ( and copy all of its forward transitions
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
            // (i.e., every transition added by the () expression) into every state in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
            // exit-point list and the current decision point list.  The current decision
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
            // point list is then merged with both the exit-point list AND the saved version
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
            // of the decision point list from before the ().  Then we throw out the *.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
            else if (c == ')') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
                // pull the exit point list off the stack, merge it with the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
                // decision point list, and make the merged version the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
                // decision point list
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1171
                Vector<Integer> exitPoints = decisionPointStack.pop();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
                for (int i = 0; i < decisionPointList.size(); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
                    exitPoints.addElement(decisionPointList.elementAt(i));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
                decisionPointList = exitPoints;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
                // if the ) isn't followed by a *, then all we have to do is throw
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
                // away the other list on the decision point stack, and we're done
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
                if (p + 1 >= rule.length() || rule.charAt(p + 1) != '*') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
                    decisionPointStack.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
                // but if the sequence repeats, we have a lot more work to do...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
                    // now exitPoints and decisionPointList have to point to equivalent
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
                    // vectors, but not the SAME vector
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1187
                    @SuppressWarnings("unchecked")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1188
                    Vector<Integer> clone = (Vector<Integer>)decisionPointList.clone();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1189
                    exitPoints = clone;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
                    // pop the original decision point list off the stack
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1192
                    Vector<Integer> temp = decisionPointStack.pop();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
                    // we squirreled away the row number of our entry point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
                    // at the beginning of the original decision point list.  Fish
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
                    // that state number out and retrieve the entry point list
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1197
                    int tempStateNum = temp.firstElement().intValue();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1198
                    short[] tempState = tempStateTable.elementAt(tempStateNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
                    // merge the original decision point list with the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
                    // decision point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
                    for (int i = 0; i < decisionPointList.size(); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
                        temp.addElement(decisionPointList.elementAt(i));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
                    decisionPointList = temp;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
                    // finally, copy every forward reference from the entry point
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
                    // list into every state in the new decision point list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
                    for (int i = 0; i < tempState.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
                        if (tempState[i] > tempStateNum) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
                            updateStateTable(exitPoints,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
                                             new Character((char)(i + 0x100)).toString(),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
                                             tempState[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
                    // update lastState and currentState, and throw away the *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
                    lastState = currentState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
                    currentState = tempStateTable.size() - 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
                    ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
            // a / marks the position where the break is to go if the character sequence
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
            // matches this rule.  We update the flag word of every state on the decision
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
            // point list to mark them as ending states, and take note of the fact that
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
            // we've seen the slash
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
            else if (c == '/') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
                sawEarlyBreak = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
                for (int i = 0; i < decisionPointList.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1230
                    state = tempStateTable.elementAt(decisionPointList.
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1231
                                    elementAt(i).intValue());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
                    state[numCategories] |= LOOKAHEAD_STATE_FLAG;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
            // if we get here without executing any of the above clauses, we have a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
            // syntax error.  However, for now we just ignore the offending character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
            // and move on
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
            // clearLoopingStates is a signal back from updateStateTable() that we've
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
            // transitioned to a state that won't loop back to the current looping
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
            // state.  (In other words, we've gotten to a point where we can no longer
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
            // go back into a *? we saw earlier.)  Clear out the list of looping states
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
            // and backfill any states that need to be backfilled.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
            if (clearLoopingStates) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
                setLoopingStates(null, decisionPointList);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
            // advance to the next character, now that we've processed the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
            // character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
            p += Character.charCount(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
        // this takes care of backfilling any states that still need to be backfilled
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
        setLoopingStates(null, decisionPointList);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
        // when we reach the end of the string, we do a postprocessing step to mark the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
        // end states.  The decision point list contains every state that can transition
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
        // to the end state-- that is, every state that is the last state in a sequence
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
        // that matches the rule.  All of these states are considered "mark states"
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
        // or "accepting states"-- that is, states that cause the position returned from
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
        // next() to be updated.  A mark state represents a possible break position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
        // This allows us to look ahead and remember how far the rule matched
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
        // before following the new branch (see next() for more information).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
        // The temporary state table has an extra "flag column" at the end where this
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
        // information is stored.  We mark the end states by setting a flag in their
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
        // flag column.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
        // Now if we saw the / in the rule, then everything after it is lookahead
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
        // material and the break really goes where the slash is.  In this case,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
        // we mark these states as BOTH accepting states and lookahead states.  This
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
        // signals that these states cause the break position to be updated to the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
        // position of the slash rather than the current break position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
        for (int i = 0; i < decisionPointList.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1274
            int rowNum = decisionPointList.elementAt(i).intValue();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1275
            state = tempStateTable.elementAt(rowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
            state[numCategories] |= END_STATE_FLAG;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
            if (sawEarlyBreak) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
                state[numCategories] |= LOOKAHEAD_STATE_FLAG;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
     * Update entries in the state table, and merge states when necessary to keep
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
     * the table deterministic.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
     * @param rows The list of rows that need updating (the decision point list)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
     * @param pendingChars A character category list, encoded in a String.  This is the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
     * list of the columns that need updating.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
     * @param newValue Update the cells specfied above to contain this value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1292
    private void updateStateTable(Vector<Integer> rows,
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
                                  String pendingChars,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
                                  short newValue) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
        // create a dummy state that has the specified row number (newValue) in
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
        // the cells that need to be updated (those specified by pendingChars)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
        // and 0 in the other cells
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
        short[] newValues = new short[numCategories + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
        for (int i = 0; i < pendingChars.length(); i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
            newValues[(int)(pendingChars.charAt(i)) - 0x100] = newValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
        // go through the list of rows to update, and update them by calling
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
        // mergeStates() to merge them the the dummy state we created
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
        for (int i = 0; i < rows.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1305
            mergeStates(rows.elementAt(i).intValue(), newValues, rows);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
     * The real work of making the state table deterministic happens here.  This function
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
     * merges a state in the state table (specified by rowNum) with a state that is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
     * passed in (newValues).  The basic process is to copy the nonzero cells in newStates
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
     * into the state in the state table (we'll call that oldValues).  If there's a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
     * collision (i.e., if the same cell has a nonzero value in both states, and it's
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
     * not the SAME value), then we have to reconcile the collision.  We do this by
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
     * creating a new state, adding it to the end of the state table, and using this
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
     * function recursively to merge the original two states into a single, combined
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
     * state.  This process may happen recursively (i.e., each successive level may
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
     * involve collisions).  To prevent infinite recursion, we keep a log of merge
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
     * operations.  Any time we're merging two states we've merged before, we can just
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
     * supply the row number for the result of that merge operation rather than creating
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
     * a new state just like it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
     * @param rowNum The row number in the state table of the state to be updated
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
     * @param newValues The state to merge it with.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
     * @param rowsBeingUpdated A copy of the list of rows passed to updateStateTable()
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
     * (itself a copy of the decision point list from parseRule()).  Newly-created
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
     * states get added to the decision point list if their "parents" were on it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
    private void mergeStates(int rowNum,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
                             short[] newValues,
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1331
                             Vector<Integer> rowsBeingUpdated) {
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1332
        short[] oldValues = tempStateTable.elementAt(rowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
        boolean isLoopingState = loopingStates.contains(new Integer(rowNum));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
        // for each of the cells in the rows we're reconciling, do...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
        for (int i = 0; i < oldValues.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
            // if they contain the same value, we don't have to do anything
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
            if (oldValues[i] == newValues[i]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
            // if oldValues is a looping state and the state the current cell points to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
            // is too, then we can just stomp over the current value of that cell (and
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
            // set the clear-looping-states flag if necessary)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
            else if (isLoopingState && loopingStates.contains(new Integer(oldValues[i]))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
                if (newValues[i] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
                    if (oldValues[i] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
                        clearLoopingStates = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
                    oldValues[i] = newValues[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
            // if the current cell in oldValues is 0, copy in the corresponding value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
            // from newValues
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
            else if (oldValues[i] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
                oldValues[i] = newValues[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
            // the last column of each row is the flag column.  Take care to merge the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
            // flag words correctly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
            else if (i == numCategories) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
                oldValues[i] = (short)((newValues[i] & ALL_FLAGS) | oldValues[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
            // if both newValues and oldValues have a nonzero value in the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
            // cell, and it isn't the same value both places...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
            else if (oldValues[i] != 0 && newValues[i] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
                // look up this pair of cell values in the merge list.  If it's
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
                // found, update the cell in oldValues to point to the merged state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
                int combinedRowNum = searchMergeList(oldValues[i], newValues[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
                if (combinedRowNum != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
                    oldValues[i] = (short)combinedRowNum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
                // otherwise, we have to reconcile them...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
                    // copy our row numbers into variables to make things easier
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
                    int oldRowNum = oldValues[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
                    int newRowNum = newValues[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
                    combinedRowNum = tempStateTable.size();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
                    // add this pair of row numbers to the merge list (create it first
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
                    // if we haven't created the merge list yet)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
                    if (mergeList == null) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1388
                        mergeList = new Vector<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
                    mergeList.addElement(new int[] { oldRowNum, newRowNum, combinedRowNum });
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
                    // create a new row to represent the merged state, and copy the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
                    // contents of oldRow into it, then add it to the end of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
                    // state table and update the original row (oldValues) to point
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
                    // to the new, merged, state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
                    short[] newRow = new short[numCategories + 1];
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1397
                    short[] oldRow = tempStateTable.elementAt(oldRowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
                    System.arraycopy(oldRow, 0, newRow, 0, numCategories + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
                    tempStateTable.addElement(newRow);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
                    oldValues[i] = (short)combinedRowNum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
                    // if the decision point list contains either of the parent rows,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
                    // update it to include the new row as well
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
                    if ((decisionPointList.contains(new Integer(oldRowNum))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
                            || decisionPointList.contains(new Integer(newRowNum)))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
                        && !decisionPointList.contains(new Integer(combinedRowNum))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
                    ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
                        decisionPointList.addElement(new Integer(combinedRowNum));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
                    // do the same thing with the list of rows being updated
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
                    if ((rowsBeingUpdated.contains(new Integer(oldRowNum))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
                            || rowsBeingUpdated.contains(new Integer(newRowNum)))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
                        && !rowsBeingUpdated.contains(new Integer(combinedRowNum))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
                    ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
                        decisionPointList.addElement(new Integer(combinedRowNum));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
                    // now (groan) do the same thing for all the entries on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
                    // decision point stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
                    for (int k = 0; k < decisionPointStack.size(); k++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1421
                        Vector<Integer> dpl = decisionPointStack.elementAt(k);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
                        if ((dpl.contains(new Integer(oldRowNum))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
                                || dpl.contains(new Integer(newRowNum)))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
                            && !dpl.contains(new Integer(combinedRowNum))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
                        ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
                            dpl.addElement(new Integer(combinedRowNum));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
                    // FINALLY (puff puff puff), call mergeStates() recursively to copy
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
                    // the row referred to by newValues into the new row and resolve any
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
                    // conflicts that come up at that level
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1433
                    mergeStates(combinedRowNum, tempStateTable.elementAt(
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1434
                                    newValues[i]), rowsBeingUpdated);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
     * The merge list is a list of pairs of rows that have been merged somewhere in
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
     * the process of building this state table, along with the row number of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
     * row containing the merged state.  This function looks up a pair of row numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
     * and returns the row number of the row they combine into.  (It returns 0 if
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
     * this pair of rows isn't in the merge list.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
    private int searchMergeList(int a, int b) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
        // if there is no merge list, there obviously isn't anything in it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
        if (mergeList == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
        // otherwise, for each element in the merge list...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
            int[] entry;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
            for (int i = 0; i < mergeList.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1458
                entry = mergeList.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
                // we have a hit if the two row numbers match the two row numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
                // in the beginning of the entry (the two that combine), in either
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
                // order
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
                if ((entry[0] == a && entry[1] == b) || (entry[0] == b && entry[1] == a)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
                    return entry[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
                // we also have a hit if one of the two row numbers matches the marged
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
                // row number and the other one matches one of the original row numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
                if ((entry[2] == a && (entry[0] == b || entry[1] == b))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
                    return entry[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
                if ((entry[2] == b && (entry[0] == a || entry[1] == a))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
                    return entry[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
     * This function is used to update the list of current loooping states (i.e.,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
     * states that are controlled by a *? construct).  It backfills values from
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
     * the looping states into unpopulated cells of the states that are currently
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
     * marked for backfilling, and then updates the list of looping states to be
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
     * the new list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
     * @param newLoopingStates The list of new looping states
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
     * @param endStates The list of states to treat as end states (states that
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
     * can exit the loop).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1490
    private void setLoopingStates(Vector<Integer> newLoopingStates,
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1491
                                  Vector<Integer> endStates) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
        // if the current list of looping states isn't empty, we have to backfill
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
        // values from the looping states into the states that are waiting to be
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
        // backfilled
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
        if (!loopingStates.isEmpty()) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1497
            int loopingState = loopingStates.lastElement().intValue();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
            int rowNum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
            // don't backfill into an end state OR any state reachable from an end state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
            // (since the search for reachable states is recursive, it's split out into
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
            // a separate function, eliminateBackfillStates(), below)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
            for (int i = 0; i < endStates.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1504
                eliminateBackfillStates(endStates.elementAt(i).intValue());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
            // we DON'T actually backfill the states that need to be backfilled here.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
            // Instead, we MARK them for backfilling.  The reason for this is that if
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
            // there are multiple rules in the state-table description, the looping
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
            // states may have some of their values changed by a succeeding rule, and
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
            // this wouldn't be reflected in the backfilled states.  We mark a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
            // for backfilling by putting the row number of the state to copy from
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
            // into the flag cell at the end of the row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
            for (int i = 0; i < statesToBackfill.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1515
                rowNum = statesToBackfill.elementAt(i).intValue();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1516
                short[] state = tempStateTable.elementAt(rowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
                state[numCategories] =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
                    (short)((state[numCategories] & ALL_FLAGS) | loopingState);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
            statesToBackfill.removeAllElements();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
            loopingStates.removeAllElements();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
        if (newLoopingStates != null) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1525
            @SuppressWarnings("unchecked")
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1526
            Vector<Integer> clone = (Vector<Integer>)newLoopingStates.clone();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1527
            loopingStates = clone;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
     * This removes "ending states" and states reachable from them from the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
     * list of states to backfill.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
     * @param The row number of the state to remove from the backfill list
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
    private void eliminateBackfillStates(int baseState) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
        // don't do anything unless this state is actually in the backfill list...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
        if (statesToBackfill.contains(new Integer(baseState))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
            // if it is, take it out
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
            statesToBackfill.removeElement(new Integer(baseState));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
            // then go through and recursively call this function for every
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
            // state that the base state points to
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1546
            short[] state = tempStateTable.elementAt(baseState);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
            for (int i = 0; i < numCategories; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
                if (state[i] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
                    eliminateBackfillStates(state[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1555
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
     * This function completes the backfilling process by actually doing the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
     * backfilling on the states that are marked for it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
    private void backfillLoopingStates() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
        short[] state;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
        short[] loopingState = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
        int loopingStateRowNum = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1563
        int fromState;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
        // for each state in the state table...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
        for (int i = 0; i < tempStateTable.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1567
            state = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
            // check the state's flag word to see if it's marked for backfilling
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
            // (it's marked for backfilling if any bits other than the two high-order
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
            // bits are set-- if they are, then the flag word, minus the two high bits,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
            // is the row number to copy from)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
            fromState = state[numCategories] & ~ALL_FLAGS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
            if (fromState > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
                // load up the state to copy from (if we haven't already)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
                if (fromState != loopingStateRowNum) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
                    loopingStateRowNum = fromState;
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1579
                    loopingState = tempStateTable.elementAt(loopingStateRowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
                // clear out the backfill part of the flag word
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
                state[numCategories] &= ALL_FLAGS;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
                // then fill all zero cells in the current state with values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
                // from the corresponding cells of the fromState
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
                for (int j = 0; j < state.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
                    if (state[j] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
                        state[j] = loopingState[j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
                    else if (state[j] == DONT_LOOP_FLAG) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
                        state[j] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
     * This function completes the state-table-building process by doing several
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1601
     * postprocessing steps and copying everything into its final resting place
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1602
     * in the iterator itself
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1603
     * @param forward True if we're working on the forward state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1604
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1605
    private void finishBuildingStateTable(boolean forward) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1606
        // start by backfilling the looping states
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1607
        backfillLoopingStates();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1608
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1609
        int[] rowNumMap = new int[tempStateTable.size()];
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1610
        Stack<Integer> rowsToFollow = new Stack<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1611
        rowsToFollow.push(new Integer(1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1612
        rowNumMap[1] = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1613
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1614
        // determine which states are no longer reachable from the start state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1615
        // (the reachable states will have their row numbers in the row number
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1616
        // map, and the nonreachable states will have zero in the row number map)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1617
        while (rowsToFollow.size() != 0) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1618
            int rowNum = rowsToFollow.pop().intValue();
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1619
            short[] row = tempStateTable.elementAt(rowNum);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
            for (int i = 0; i < numCategories; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1622
                if (row[i] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
                    if (rowNumMap[row[i]] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1624
                        rowNumMap[row[i]] = row[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
                        rowsToFollow.push(new Integer(row[i]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1626
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1628
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
        boolean madeChange;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
        int newRowNum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
        // algorithm for minimizing the number of states in the table adapted from
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
        // Aho & Ullman, "Principles of Compiler Design"
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
        // The basic idea here is to organize the states into classes.  When we're done,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
        // all states in the same class can be considered identical and all but one eliminated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
        // initially assign states to classes based on the number of populated cells they
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
        // contain (the class number is the number of populated cells)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
        int[] stateClasses = new int[tempStateTable.size()];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
        int nextClass = numCategories + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
        short[] state1, state2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
        for (int i = 1; i < stateClasses.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
            if (rowNumMap[i] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
            }
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1648
            state1 = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
            for (int j = 0; j < numCategories; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
                if (state1[j] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
                    ++stateClasses[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
            if (stateClasses[i] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
                stateClasses[i] = nextClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
        ++nextClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
        // then, for each class, elect the first member of that class as that class's
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
        // "representative".  For each member of the class, compare it to the "representative."
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
        // If there's a column position where the state being tested transitions to a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
        // state in a DIFFERENT class from the class where the "representative" transitions,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
        // then move the state into a new class.  Repeat this process until no new classes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
        // are created.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
        int currentClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
        int lastClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
        boolean split;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
        do {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
            currentClass = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
            lastClass = nextClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
            while (currentClass < nextClass) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1674
                split = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1675
                state1 = state2 = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1676
                for (int i = 0; i < stateClasses.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1677
                    if (stateClasses[i] == currentClass) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1678
                        if (state1 == null) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1679
                            state1 = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1680
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1681
                        else {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1682
                            state2 = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1683
                            for (int j = 0; j < state2.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1684
                                if ((j == numCategories && state1[j] != state2[j] && forward)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1685
                                        || (j != numCategories && stateClasses[state1[j]]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1686
                                        != stateClasses[state2[j]])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1687
                                    stateClasses[i] = nextClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1688
                                    split = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1689
                                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1690
                                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1691
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1692
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1693
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1694
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1695
                if (split) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1696
                    ++nextClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1697
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1698
                ++currentClass;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1699
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1700
        } while (lastClass != nextClass);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1701
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1702
        // at this point, all of the states in a class except the first one (the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1703
        //"representative") can be eliminated, so update the row-number map accordingly
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1704
        int[] representatives = new int[nextClass];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1705
        for (int i = 1; i < stateClasses.length; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1706
            if (representatives[stateClasses[i]] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1707
                representatives[stateClasses[i]] = i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1708
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1709
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1710
                rowNumMap[i] = representatives[stateClasses[i]];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1711
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1712
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1713
        // renumber all remaining rows...
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1714
        // first drop all that are either unreferenced or not a class representative
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1715
        for (int i = 1; i < rowNumMap.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1716
            if (rowNumMap[i] != i) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1717
                tempStateTable.setElementAt(null, i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1718
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1719
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1720
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1721
        // then calculate everybody's new row number and update the row
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1722
        // number map appropriately (the first pass updates the row numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1723
        // of all the class representatives [the rows we're keeping], and the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1724
        // second pass updates the cross references for all the rows that
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1725
        // are being deleted)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1726
        newRowNum = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1727
        for (int i = 1; i < rowNumMap.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1728
            if (tempStateTable.elementAt(i) != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1729
                rowNumMap[i] = newRowNum++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1730
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1731
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1732
        for (int i = 1; i < rowNumMap.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1733
            if (tempStateTable.elementAt(i) == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1734
                rowNumMap[i] = rowNumMap[rowNumMap[i]];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1735
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1736
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1737
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1738
        // allocate the permanent state table, and copy the remaining rows into it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1739
        // (adjusting all the cell values, of course)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1740
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1741
        // this section does that for the forward state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1742
        if (forward) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1743
            endStates = new boolean[newRowNum];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1744
            lookaheadStates = new boolean[newRowNum];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1745
            stateTable = new short[newRowNum * numCategories];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1746
            int p = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1747
            int p2 = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1748
            for (int i = 0; i < tempStateTable.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1749
                short[] row = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1750
                if (row == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1751
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1752
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1753
                for (int j = 0; j < numCategories; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1754
                    stateTable[p] = (short)(rowNumMap[row[j]]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1755
                    ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1756
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1757
                endStates[p2] = ((row[numCategories] & END_STATE_FLAG) != 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1758
                lookaheadStates[p2] = ((row[numCategories] & LOOKAHEAD_STATE_FLAG) != 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1759
                ++p2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1760
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1761
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1762
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1763
        // and this section does it for the backward state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1764
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1765
            backwardsStateTable = new short[newRowNum * numCategories];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1766
            int p = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1767
            for (int i = 0; i < tempStateTable.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1768
                short[] row = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1769
                if (row == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1770
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1771
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1772
                for (int j = 0; j < numCategories; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1773
                    backwardsStateTable[p] = (short)(rowNumMap[row[j]]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1774
                    ++p;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1775
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1776
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1777
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1778
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1779
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1780
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1781
     * This function builds the backward state table from the forward state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1782
     * table and any additional rules (identified by the ! on the front)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1783
     * supplied in the description
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1784
     */
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1785
    private void buildBackwardsStateTable(Vector<String> tempRuleList) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1786
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1787
        // create the temporary state table and seed it with two rows (row 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1788
        // isn't used for anything, and we have to create row 1 (the initial
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1789
        // state) before we can do anything else
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1790
        tempStateTable = new Vector<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1791
        tempStateTable.addElement(new short[numCategories + 1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1792
        tempStateTable.addElement(new short[numCategories + 1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1793
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1794
        // although the backwards state table is built automatically from the forward
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1795
        // state table, there are some situations (the default sentence-break rules,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1796
        // for example) where this doesn't yield enough stop states, causing a dramatic
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1797
        // drop in performance.  To help with these cases, the user may supply
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1798
        // supplemental rules that are added to the backward state table.  These have
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1799
        // the same syntax as the normal break rules, but begin with '!' to distinguish
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1800
        // them from normal break rules
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1801
        for (int i = 0; i < tempRuleList.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1802
            String rule = tempRuleList.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1803
            if (rule.charAt(0) == '!') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1804
                parseRule(rule.substring(1), false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1805
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1806
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1807
        backfillLoopingStates();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1808
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1809
        // Backwards iteration is qualitatively different from forwards iteration.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1810
        // This is because backwards iteration has to be made to operate from no context
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1811
        // at all-- the user should be able to ask BreakIterator for the break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1812
        // immediately on either side of some arbitrary offset in the text.  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1813
        // forward iteration table doesn't let us do that-- it assumes complete
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1814
        // information on the context, which means starting from the beginning of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1815
        // document.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1816
        // The way we do backward and random-access iteration is to back up from the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1817
        // current (or user-specified) position until we see something we're sure is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1818
        // a break position (it may not be the last break position immediately
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1819
        // preceding our starting point, however).  Then we roll forward from there to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1820
        // locate the actual break position we're after.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1821
        // This means that the backwards state table doesn't have to identify every
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1822
        // break position, allowing the building algorithm to be much simpler.  Here,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1823
        // we use a "pairs" approach, scanning the forward-iteration state table for
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1824
        // pairs of character categories we ALWAYS break between, and building a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1825
        // table from that information.  No context is required-- all this state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1826
        // looks at is a pair of adjacent characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1827
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1828
        // It's possible that the user has supplied supplementary rules (see above).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1829
        // This has to be done first to keep parseRule() and friends from becoming
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1830
        // EVEN MORE complicated.  The automatically-generated states are appended
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1831
        // onto the end of the state table, and then the two sets of rules are
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1832
        // stitched together at the end.  Take note of the row number of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1833
        // first row of the auromatically-generated part.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1834
        int backTableOffset = tempStateTable.size();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1835
        if (backTableOffset > 2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1836
            ++backTableOffset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1837
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1838
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1839
        // the automatically-generated part of the table models a two-dimensional
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1840
        // array where the two dimensions represent the two characters we're currently
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1841
        // looking at.  To model this as a state table, we actually need one additional
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1842
        // row to represent the initial state.  It gets populated with the row numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1843
        // of the other rows (in order).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1844
        for (int i = 0; i < numCategories + 1; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1845
            tempStateTable.addElement(new short[numCategories + 1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1846
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1847
        short[] state = tempStateTable.elementAt(backTableOffset - 1);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1848
        for (int i = 0; i < numCategories; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1849
            state[i] = (short)(i + backTableOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1850
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1851
        // scavenge the forward state table for pairs of character categories
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1852
        // that always have a break between them.  The algorithm is as follows:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1853
        // Look down each column in the state table.  For each nonzero cell in
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1854
        // that column, look up the row it points to.  For each nonzero cell in
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1855
        // that row, populate a cell in the backwards state table: the row number
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1856
        // of that cell is the number of the column we were scanning (plus the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1857
        // offset that locates this sub-table), and the column number of that cell
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1858
        // is the column number of the nonzero cell we just found.  This cell is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1859
        // populated with its own column number (adjusted according to the actual
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1860
        // location of the sub-table).  This process will produce a state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1861
        // whose behavior is the same as looking up successive pairs of characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1862
        // in an array of Booleans to determine whether there is a break.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1863
        int numRows = stateTable.length / numCategories;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1864
        for (int column = 0; column < numCategories; column++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1865
            for (int row = 0; row < numRows; row++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1866
                int nextRow = lookupState(row, column);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1867
                if (nextRow != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1868
                    for (int nextColumn = 0; nextColumn < numCategories; nextColumn++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1869
                        int cellValue = lookupState(nextRow, nextColumn);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1870
                        if (cellValue != 0) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1871
                            state = tempStateTable.elementAt(nextColumn +
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1872
                                            backTableOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1873
                            state[column] = (short)(column + backTableOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1874
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1875
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1876
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1877
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1878
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1879
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1880
        // if the user specified some backward-iteration rules with the ! token,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1881
        // we have to merge the resulting state table with the auto-generated one
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1882
        // above.  First copy the populated cells from row 1 over the populated
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1883
        // cells in the auto-generated table.  Then copy values from row 1 of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1884
        // auto-generated table into all of the the unpopulated cells of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1885
        // rule-based table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1886
        if (backTableOffset > 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1887
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1888
            // for every row in the auto-generated sub-table, if a cell is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1889
            // populated that is also populated in row 1 of the rule-based
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1890
            // sub-table, copy the value from row 1 over the value in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1891
            // auto-generated sub-table
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1892
            state = tempStateTable.elementAt(1);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1893
            for (int i = backTableOffset - 1; i < tempStateTable.size(); i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1894
                short[] state2 = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1895
                for (int j = 0; j < numCategories; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1896
                    if (state[j] != 0 && state2[j] != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1897
                        state2[j] = state[j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1898
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1899
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1900
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1901
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1902
            // now, for every row in the rule-based sub-table that is not
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1903
            // an end state, fill in all unpopulated cells with the values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1904
            // of the corresponding cells in the first row of the auto-
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1905
            // generated sub-table.
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1906
            state = tempStateTable.elementAt(backTableOffset - 1);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1907
            for (int i = 1; i < backTableOffset - 1; i++) {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 5506
diff changeset
  1908
                short[] state2 = tempStateTable.elementAt(i);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1909
                if ((state2[numCategories] & END_STATE_FLAG) == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1910
                    for (int j = 0; j < numCategories; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1911
                        if (state2[j] == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1912
                            state2[j] = state[j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1913
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1914
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1915
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1916
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1917
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1918
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1919
        // finally, clean everything up and copy it into the actual BreakIterator
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1920
        // by calling finishBuildingStateTable()
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1921
        finishBuildingStateTable(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1922
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1923
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1924
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1925
     * Given a current state and a character category, looks up the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1926
     * next state to transition to in the state table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1927
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1928
    protected int lookupState(int state, int category) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1929
        return stateTable[state * numCategories + category];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1930
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1931
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1932
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1933
     * Throws an IllegalArgumentException representing a syntax error in the rule
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1934
     * description.  The exception's message contains some debugging information.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1935
     * @param message A message describing the problem
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1936
     * @param position The position in the description where the problem was
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1937
     * discovered
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1938
     * @param context The string containing the error
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1939
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1940
    protected void error(String message, int position, String context) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1941
        throw new IllegalArgumentException("Parse error at position (" + position + "): " + message + "\n" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1942
                context.substring(0, position) + " -here- " + context.substring(position));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1943
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1944
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1945
    void makeFile(String filename) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1946
        writeTables(filename);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1947
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1948
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1949
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1950
     * Magic number for the BreakIterator data file format.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1951
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1952
    private static final byte[] LABEL = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1953
        (byte)'B', (byte)'I', (byte)'d', (byte)'a', (byte)'t', (byte)'a',
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1954
        (byte)'\0'
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1955
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1956
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1957
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1958
     * Version number of the dictionary that was read in.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1959
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1960
    private static final byte[] supportedVersion = { (byte)1 };
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1961
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1962
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1963
     * Header size in byte count
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1964
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1965
     private static final int HEADER_LENGTH = 36;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1966
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1967
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1968
     * Array length of indices for BMP characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1969
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1970
     private static final int BMP_INDICES_LENGTH = 512;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1971
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1972
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1973
     * Read datafile. The datafile's format is as follows:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1974
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1975
     *   BreakIteratorData {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1976
     *       u1           magic[7];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1977
     *       u1           version;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1978
     *       u4           totalDataSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1979
     *       header_info  header;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1980
     *       body         value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1981
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1982
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1983
     * <code>totalDataSize</code> is the summation of the size of
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1984
     * <code>header_info</code> and <code>body</code> in byte count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1985
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1986
     * In <code>header</code>, each field except for checksum implies the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1987
     * length of each field. Since <code>BMPdataLength</code> is a fixed-length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1988
     *  data(512 entries), its length isn't included in <code>header</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1989
     * <code>checksum</code> is a CRC32 value of all in <code>body</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1990
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1991
     *   header_info {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1992
     *       u4           stateTableLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1993
     *       u4           backwardsStateTableLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1994
     *       u4           endStatesLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1995
     *       u4           lookaheadStatesLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1996
     *       u4           BMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1997
     *       u4           nonBMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1998
     *       u4           additionalDataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1999
     *       u8           checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2000
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2001
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2002
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2003
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2004
     * Finally, <code>BMPindices</code> and <code>BMPdata</code> are set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2005
     * <code>charCategoryTable</code>. <code>nonBMPdata</code> is set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2006
     * <code>supplementaryCharCategoryTable</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2007
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2008
     *   body {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2009
     *       u2           stateTable[stateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2010
     *       u2           backwardsStateTable[backwardsStateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2011
     *       u1           endStates[endStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2012
     *       u1           lookaheadStates[lookaheadStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2013
     *       u2           BMPindices[512];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2014
     *       u1           BMPdata[BMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2015
     *       u4           nonBMPdata[numNonBMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2016
     *       u1           additionalData[additionalDataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2017
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2018
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2019
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2020
    protected void writeTables(String datafile) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2021
        final String filename;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2022
        final String outputDir;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2023
        String tmpbuf = GenerateBreakIteratorData.getOutputDirectory();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2024
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2025
        if (tmpbuf.equals("")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2026
            filename = datafile;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2027
            outputDir = "";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2028
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2029
            char sep = File.separatorChar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2030
            if (sep == '/') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2031
                outputDir = tmpbuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2032
            } else if (sep == '\\') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2033
                outputDir = tmpbuf.replaceAll("/", "\\\\");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2034
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2035
                outputDir = tmpbuf.replaceAll("/", String.valueOf(sep));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2036
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2037
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2038
            filename = outputDir + sep + datafile;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2039
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2040
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2041
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2042
            if (!outputDir.equals("")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2043
                new File(outputDir).mkdirs();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2044
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2045
            BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(filename));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2046
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2047
            byte[] BMPdata = charCategoryTable.getStringArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2048
            short[] BMPindices = charCategoryTable.getIndexArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2049
            int[] nonBMPdata = supplementaryCharCategoryTable.getArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2050
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2051
            if (BMPdata.length <= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2052
                throw new InternalError("Wrong BMP data length(" + BMPdata.length + ")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2053
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2054
            if (BMPindices.length != BMP_INDICES_LENGTH) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2055
                throw new InternalError("Wrong BMP indices length(" + BMPindices.length + ")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2056
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2057
            if (nonBMPdata.length <= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2058
                throw new InternalError("Wrong non-BMP data length(" + nonBMPdata.length + ")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2059
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2060
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2061
            int len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2062
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2063
            /* Compute checksum */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2064
            CRC32 crc32 = new CRC32();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2065
            len = stateTable.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2066
            for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2067
                crc32.update(stateTable[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2068
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2069
            len = backwardsStateTable.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2070
            for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2071
                crc32.update(backwardsStateTable[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2072
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2073
            crc32.update(toByteArray(endStates));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2074
            crc32.update(toByteArray(lookaheadStates));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2075
            for (int i = 0; i < BMP_INDICES_LENGTH; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2076
                crc32.update(BMPindices[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2077
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2078
            crc32.update(BMPdata);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2079
            len = nonBMPdata.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2080
            for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2081
                crc32.update(nonBMPdata[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2082
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2083
            if (additionalData != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2084
                len = additionalData.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2085
                for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2086
                    crc32.update(additionalData[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2087
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2088
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2089
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2090
            /* First, write magic, version, and totalDataSize. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2091
            len = HEADER_LENGTH +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2092
                  (stateTable.length + backwardsStateTable.length) * 2 +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2093
                  endStates.length + lookaheadStates.length + 1024 +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2094
                  BMPdata.length + nonBMPdata.length * 4 +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2095
                  ((additionalData == null) ? 0 : additionalData.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2096
            out.write(LABEL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2097
            out.write(supportedVersion);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2098
            out.write(toByteArray(len));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2099
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2100
            /* Write header_info. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2101
            out.write(toByteArray(stateTable.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2102
            out.write(toByteArray(backwardsStateTable.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2103
            out.write(toByteArray(endStates.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2104
            out.write(toByteArray(lookaheadStates.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2105
            out.write(toByteArray(BMPdata.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2106
            out.write(toByteArray(nonBMPdata.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2107
            if (additionalData == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2108
                out.write(toByteArray(0));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2109
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2110
                out.write(toByteArray(additionalData.length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2111
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2112
            out.write(toByteArray(crc32.getValue()));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2113
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2114
            /* Write stateTable[numCategories * numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2115
            len = stateTable.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2116
            for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2117
                out.write(toByteArray(stateTable[i]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2118
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2119
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2120
            /* Write backwardsStateTable[numCategories * numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2121
            len = backwardsStateTable.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2122
            for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2123
                out.write(toByteArray(backwardsStateTable[i]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2124
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2125
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2126
            /* Write endStates[numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2127
            out.write(toByteArray(endStates));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2128
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2129
            /* Write lookaheadStates[numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2130
            out.write(toByteArray(lookaheadStates));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2131
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2132
            for (int i = 0; i < BMP_INDICES_LENGTH; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2133
                out.write(toByteArray(BMPindices[i]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2134
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2135
            BMPindices = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2136
            out.write(BMPdata);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2137
            BMPdata = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2138
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2139
            /* Write a category table for non-BMP characters. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2140
            len = nonBMPdata.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2141
            for (int i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2142
                out.write(toByteArray(nonBMPdata[i]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2143
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2144
            nonBMPdata = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2145
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2146
            /* Write additional data */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2147
            if (additionalData != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2148
                out.write(additionalData);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2149
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2150
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2151
            out.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2152
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2153
        catch (Exception e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2154
            throw new InternalError(e.toString());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2155
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2156
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2157
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2158
    byte[] toByteArray(short val) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2159
        byte[] buf = new byte[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2160
        buf[0] = (byte)((val>>>8) & 0xFF);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2161
        buf[1] = (byte)(val & 0xFF);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2162
        return buf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2163
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2164
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2165
    byte[] toByteArray(int val) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2166
        byte[] buf = new byte[4];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2167
        buf[0] = (byte)((val>>>24) & 0xFF);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2168
        buf[1] = (byte)((val>>>16) & 0xFF);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2169
        buf[2] = (byte)((val>>>8) & 0xFF);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2170
        buf[3] = (byte)(val & 0xFF);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2171
        return buf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2172
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2173
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2174
    byte[] toByteArray(long val) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2175
        byte[] buf = new byte[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2176
        buf[0] = (byte)((val>>>56) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2177
        buf[1] = (byte)((val>>>48) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2178
        buf[2] = (byte)((val>>>40) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2179
        buf[3] = (byte)((val>>>32) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2180
        buf[4] = (byte)((val>>>24) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2181
        buf[5] = (byte)((val>>>16) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2182
        buf[6] = (byte)((val>>>8) & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2183
        buf[7] = (byte)(val & 0xff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2184
        return buf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2185
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2186
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2187
    byte[] toByteArray(boolean[] data) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2188
        byte[] buf = new byte[data.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2189
        for (int i = 0; i < data.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2190
            buf[i] = data[i] ? (byte)1 : (byte)0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2191
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2192
        return buf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2193
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2194
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2195
    void setAdditionalData(byte[] data) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2196
        additionalData = data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2197
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2198
}