jdk/src/share/classes/java/text/DictionaryBasedBreakIterator.java
author xdono
Wed, 02 Jul 2008 12:55:45 -0700
changeset 715 f16baef3a20e
parent 438 2ae294e4518c
child 5506 202f599c92aa
permissions -rw-r--r--
6719955: Update copyright year Summary: Update copyright year for files that have been modified in 2008 Reviewed-by: ohair, tbell
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
715
f16baef3a20e 6719955: Update copyright year
xdono
parents: 438
diff changeset
     2
 * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Sun designates this
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
     9
 * by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    21
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    22
 * CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    23
 * have any questions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * The original version of this source code and documentation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * is protected by multiple US and International patents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
package java.text;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
import java.util.Vector;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
import java.util.Stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
import java.util.Hashtable;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
import java.text.CharacterIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
import java.io.InputStream;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
import java.io.IOException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * to further subdivide ranges of text beyond what is possible using just the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * state-table-based algorithm.  This is necessary, for example, to handle
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * word and line breaking in Thai, which doesn't use spaces between words.  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * state-table-based algorithm used by RuleBasedBreakIterator is used to divide
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * up text as far as possible, and then contiguous ranges of letters are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * repeatedly compared against a list of known words (i.e., the dictionary)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * to divide them up into words.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * but adds one more special substitution name: <dictionary>.  This substitution
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * name is used to identify characters in words in the dictionary.  The idea is that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * if the iterator passes over a chunk of text that includes two or more characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * in a row that are included in <dictionary>, it goes back through that range and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * derives additional break positions (if possible) using the dictionary.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * DictionaryBasedBreakIterator is also constructed with the filename of a dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * file.  It follows a prescribed search path to locate the dictionary (right now,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * it looks for it in /com/ibm/text/resources in each directory in the classpath,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * and won't find it in JAR files, but this location is likely to change).  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * dictionary file is in a serialized binary format.  We have a very primitive (and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * slow) BuildDictionaryFile utility for creating dictionary files, but aren't
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 * currently making it public.  Contact us for help.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
     * a list of known words that is used to divide up contiguous ranges of letters,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
     * stored in a compressed, indexed, format that offers fast access
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
    private BreakDictionary dictionary;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
     * a list of flags indicating which character categories are contained in
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
     * the dictionary file (this is used to determine which ranges of characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
     * to apply the dictionary to)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
    private boolean[] categoryFlags;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
     * a temporary hiding place for the number of dictionary characters in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
     * last range passed over by next()
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    private int dictionaryCharCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
     * when a range of characters is divided up using the dictionary, the break
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     * positions that are discovered are stored here, preventing us from having
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     * to use either the dictionary or the state table again until the iterator
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
     * leaves this range of text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
    private int[] cachedBreakPositions;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     * if cachedBreakPositions is not null, this indicates which item in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     * cache the current iteration position refers to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
    private int positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
     * Constructs a DictionaryBasedBreakIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
     * @param description Same as the description parameter on RuleBasedBreakIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     * except for the special meaning of "<dictionary>".  This parameter is just
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     * passed through to RuleBasedBreakIterator's constructor.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
     * @param dictionaryFilename The filename of the dictionary file to use
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
    public DictionaryBasedBreakIterator(String dataFile, String dictionaryFile)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
                                        throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
        super(dataFile);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
        byte[] tmp = super.getAdditionalData();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
        if (tmp != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
            prepareCategoryFlags(tmp);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
            super.setAdditionalData(null);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
        dictionary = new BreakDictionary(dictionaryFile);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
    private void prepareCategoryFlags(byte[] data) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
        categoryFlags = new boolean[data.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
        for (int i = 0; i < data.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
            categoryFlags[i] = (data[i] == (byte)1) ? true : false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
    public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
        super.setText(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
     * Sets the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
     * (i.e., the CharacterIterator's starting offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
     * @return The offset of the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    public int first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
        return super.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
     * Sets the current iteration position to the end of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
     * (i.e., the CharacterIterator's ending offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
     * @return The text's past-the-end offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
    public int last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
        return super.last();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
     * Advances the iterator one step backwards.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
     * @return The position of the last boundary position before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
     * current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
    public int previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
        // if we have cached break positions and we're still in the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        // covered by them, just move one step backward in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
        if (cachedBreakPositions != null && positionInCache > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
            --positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
            return cachedBreakPositions[positionInCache];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
        // otherwise, dump the cache and use the inherited previous() method to move
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
        // backward.  This may fill up the cache with new break positions, in which
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        // case we have to mark our position in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
            int result = super.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
            if (cachedBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
                positionInCache = cachedBreakPositions.length - 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
            return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
     * Sets the current iteration position to the last boundary position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
     * before the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
     * @param offset The position to begin searching from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
     * @return The position of the last boundary before "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
    public int preceding(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
        // if we have no cached break positions, or "offset" is outside the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
        // range covered by the cache, we can just call the inherited routine
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
        // (which will eventually call other routines in this class that may
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
        // refresh the cache)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
        if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
                offset > cachedBreakPositions[cachedBreakPositions.length - 1]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
            return super.preceding(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
        // on the other hand, if "offset" is within the range covered by the cache,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
        // then all we have to do is search the cache for the last break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
        // before "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
            positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
            while (positionInCache < cachedBreakPositions.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
                   && offset > cachedBreakPositions[positionInCache]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
                ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
            --positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
            return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
     * Sets the current iteration position to the first boundary position after
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
     * the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
     * @param offset The position to begin searching forward from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
     * @return The position of the first boundary after "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
    public int following(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        // if we have no cached break positions, or if "offset" is outside the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
        // range covered by the cache, then dump the cache and call our
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
        // inherited following() method.  This will call other methods in this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
        // class that may refresh the cache.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
        if (cachedBreakPositions == null || offset < cachedBreakPositions[0] ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
                offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
            return super.following(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
        // on the other hand, if "offset" is within the range covered by the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
        // cache, then just search the cache for the first break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
        // after "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
            positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
            while (positionInCache < cachedBreakPositions.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
                   && offset >= cachedBreakPositions[positionInCache]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
                ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
            return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
     * This is the implementation function for next().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
    protected int handleNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
        // if there are no cached break positions, or if we've just moved
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
        // off the end of the range covered by the cache, we have to dump
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
        // and possibly regenerate the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
        if (cachedBreakPositions == null ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
            positionInCache == cachedBreakPositions.length - 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
            // start by using the inherited handleNext() to find a tentative return
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
            // value.   dictionaryCharCount tells us how many dictionary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
            // we passed over on our way to the tentative return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
            int startPos = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
            dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
            int result = super.handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
            // if we passed over more than one dictionary character, then we use
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
            // divideUpDictionaryRange() to regenerate the cached break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            // for the new range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
            if (dictionaryCharCount > 1 && result - startPos > 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
                divideUpDictionaryRange(startPos, result);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
            // otherwise, the value we got back from the inherited fuction
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
            // is our return value, and we can dump the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
                cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
                return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
        // if the cache of break positions has been regenerated (or existed all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
        // along), then just advance to the next break position in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
        // and return it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
        if (cachedBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
            ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
            return cachedBreakPositions[positionInCache];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
        return -9999;   // SHOULD NEVER GET HERE!
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
     * Looks up a character category for a character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
    protected int lookupCategory(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
        // this override of lookupCategory() exists only to keep track of whether we've
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        // passed over any dictionary characters.  It calls the inherited lookupCategory()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
        // to do the real work, and then checks whether its return value is one of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
        // categories represented in the dictionary.  If it is, bump the dictionary-
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
        // character count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
        int result = super.lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
        if (result != RuleBasedBreakIterator.IGNORE && categoryFlags[result]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
            ++dictionaryCharCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
     * This is the function that actually implements the dictionary-based
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
     * algorithm.  Given the endpoints of a range of text, it uses the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
     * dictionary to determine the positions of any boundaries in this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
     * range.  It stores all the boundary positions it discovers in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
     * cachedBreakPositions so that we only have to do this work once
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
     * for each time we enter the range.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
    private void divideUpDictionaryRange(int startPos, int endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
        // the range we're dividing may begin or end with non-dictionary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        // (i.e., for line breaking, we may have leading or trailing punctuation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        // that needs to be kept with the word).  Seek from the beginning of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
        // range to the first dictionary character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
        text.setIndex(startPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
        int category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
        while (category == IGNORE || !categoryFlags[category]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
            c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
        // initialize.  We maintain two stacks: currentBreakPositions contains
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
        // the list of break positions that will be returned if we successfully
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
        // finish traversing the whole range now.  possibleBreakPositions lists
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
        // all other possible word ends we've passed along the way.  (Whenever
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
        // we reach an error [a sequence of characters that can't begin any word
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
        // in the dictionary], we back up, possibly delete some breaks from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
        // currentBreakPositions, move a break from possibleBreakPositions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        // to currentBreakPositions, and start over from there.  This process
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
        // continues in this way until we either successfully make it all the way
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        // across the range, or exhaust all of our combinations of break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
        // positions.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
        Stack currentBreakPositions = new Stack();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
        Stack possibleBreakPositions = new Stack();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
        Vector wrongBreakPositions = new Vector();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
        // the dictionary is implemented as a trie, which is treated as a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        // machine.  -1 represents the end of a legal word.  Every word in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
        // dictionary is represented by a path from the root node to -1.  A path
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        // that ends in state 0 is an illegal combination of characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
        int state = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
        // these two variables are used for error handling.  We keep track of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
        // farthest we've gotten through the range being divided, and the combination
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
        // of breaks that got us that far.  If we use up all possible break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        // combinations, the text contains an error or a word that's not in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
        // dictionary.  In this case, we "bless" the break positions that got us the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        // farthest as real break positions, and then start over from scratch with
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        // the character where the error occurred.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
        int farthestEndPoint = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
        Stack bestBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        // initialize (we always exit the loop with a break statement)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
        c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
            // if we can transition to state "-1" from our current state, we're
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
            // on the last character of a legal word.  Push that position onto
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
            // the possible-break-positions stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
            if (dictionary.getNextState(state, 0) == -1) {
438
2ae294e4518c 6613529: Avoid duplicate object creation within JDK packages
dav
parents: 2
diff changeset
   387
                possibleBreakPositions.push(Integer.valueOf(text.getIndex()));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
            // look up the new state to transition to in the dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
            state = dictionary.getNextStateFromCharacter(state, c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
            // if the character we're sitting on causes us to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
            // the "end of word" state, then it was a non-dictionary character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
            // and we've successfully traversed the whole range.  Drop out
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
            // of the loop.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
            if (state == -1) {
438
2ae294e4518c 6613529: Avoid duplicate object creation within JDK packages
dav
parents: 2
diff changeset
   398
                currentBreakPositions.push(Integer.valueOf(text.getIndex()));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
            // if the character we're sitting on causes us to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
            // the error state, or if we've gone off the end of the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
            // without transitioning to the "end of word" state, we've hit
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            // an error...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
            else if (state == 0 || text.getIndex() >= endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
                // if this is the farthest we've gotten, take note of it in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
                // case there's an error in the text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
                if (text.getIndex() > farthestEndPoint) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
                    farthestEndPoint = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
                    bestBreakPositions = (Stack)(currentBreakPositions.clone());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
                // wrongBreakPositions is a list of all break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
                // we've tried starting that didn't allow us to traverse
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
                // all the way through the text.  Every time we pop a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
                //break position off of currentBreakPositions, we put it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
                // into wrongBreakPositions to avoid trying it again later.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
                // If we make it to this spot, we're either going to back
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
                // up to a break in possibleBreakPositions and try starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
                // over from there, or we've exhausted all possible break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
                // positions and are going to do the fallback procedure.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
                // This loop prevents us from messing with anything in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
                // possibleBreakPositions that didn't work as a starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
                // point the last time we tried it (this is to prevent a bunch of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
                // repetitive checks from slowing down some extreme cases)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
                Integer newStartingSpot = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
                while (!possibleBreakPositions.isEmpty() && wrongBreakPositions.contains(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
                            possibleBreakPositions.peek())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
                    possibleBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
                // if we've used up all possible break-position combinations, there's
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
                // an error or an unknown word in the text.  In this case, we start
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
                // over, treating the farthest character we've reached as the beginning
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
                // of the range, and "blessing" the break positions that got us that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
                // far as real break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
                if (possibleBreakPositions.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
                    if (bestBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
                        currentBreakPositions = bestBreakPositions;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
                        if (farthestEndPoint < endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
                            text.setIndex(farthestEndPoint + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                        if ((currentBreakPositions.size() == 0 ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
                             ((Integer)(currentBreakPositions.peek())).intValue() != text.getIndex())
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
                            && text.getIndex() != startPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
                            currentBreakPositions.push(new Integer(text.getIndex()));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
                        getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
                        currentBreakPositions.push(new Integer(text.getIndex()));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
                // if we still have more break positions we can try, then promote the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
                // last break in possibleBreakPositions into currentBreakPositions,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
                // and get rid of all entries in currentBreakPositions that come after
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
                // it.  Then back up to that position and start over from there (i.e.,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
                // treat that position as the beginning of a new word)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
                    Integer temp = (Integer)possibleBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
                    Object temp2 = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
                    while (!currentBreakPositions.isEmpty() && temp.intValue() <
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
                           ((Integer)currentBreakPositions.peek()).intValue()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
                        temp2 = currentBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
                        wrongBreakPositions.addElement(temp2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
                    currentBreakPositions.push(temp);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
                    text.setIndex(((Integer)currentBreakPositions.peek()).intValue());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
                // re-sync "c" for the next go-round, and drop out of the loop if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
                // we've made it off the end of the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
                if (text.getIndex() >= endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
            // if we didn't hit any exceptional conditions on this last iteration,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
            // just advance to the next character and loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
                c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
        // dump the last break position in the list, and replace it with the actual
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
        // end of the range (which may be the same character, or may be further on
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
        // because the range actually ended with non-dictionary characters we want to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        // keep with the word)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
        if (!currentBreakPositions.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
            currentBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
        }
438
2ae294e4518c 6613529: Avoid duplicate object creation within JDK packages
dav
parents: 2
diff changeset
   499
        currentBreakPositions.push(Integer.valueOf(endPos));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
        // create a regular array to hold the break positions and copy
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
        // the break positions from the stack to the array (in addition,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
        // our starting position goes into this array as a break position).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
        // This array becomes the cache of break positions used by next()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
        // and previous(), so this is where we actually refresh the cache.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
        cachedBreakPositions = new int[currentBreakPositions.size() + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        cachedBreakPositions[0] = startPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
        for (int i = 0; i < currentBreakPositions.size(); i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
            cachedBreakPositions[i + 1] = ((Integer)currentBreakPositions.elementAt(i)).intValue();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
}