src/java.base/share/classes/sun/text/DictionaryBasedBreakIterator.java
author erikj
Tue, 12 Sep 2017 19:03:39 +0200
changeset 47216 71c04702a3d5
parent 41750 jdk/src/java.base/share/classes/sun/text/DictionaryBasedBreakIterator.java@25ee1c2ee27e
permissions -rw-r--r--
8187443: Forest Consolidation: Move files to unified layout Reviewed-by: darcy, ihse
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
41750
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
     2
 * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * The original version of this source code and documentation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * is protected by multiple US and International patents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
41750
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
    41
package sun.text;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    43
import java.text.CharacterIterator;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    44
import java.util.ArrayList;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    45
import java.util.List;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
import java.util.Stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * to further subdivide ranges of text beyond what is possible using just the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * state-table-based algorithm.  This is necessary, for example, to handle
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * word and line breaking in Thai, which doesn't use spaces between words.  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * state-table-based algorithm used by RuleBasedBreakIterator is used to divide
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * up text as far as possible, and then contiguous ranges of letters are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * repeatedly compared against a list of known words (i.e., the dictionary)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * to divide them up into words.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * but adds one more special substitution name: <dictionary>.  This substitution
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * name is used to identify characters in words in the dictionary.  The idea is that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * if the iterator passes over a chunk of text that includes two or more characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * in a row that are included in <dictionary>, it goes back through that range and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * derives additional break positions (if possible) using the dictionary.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * DictionaryBasedBreakIterator is also constructed with the filename of a dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * file.  It follows a prescribed search path to locate the dictionary (right now,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * it looks for it in /com/ibm/text/resources in each directory in the classpath,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * and won't find it in JAR files, but this location is likely to change).  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * dictionary file is in a serialized binary format.  We have a very primitive (and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * slow) BuildDictionaryFile utility for creating dictionary files, but aren't
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * currently making it public.  Contact us for help.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 */
41750
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
    73
public class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
     * a list of known words that is used to divide up contiguous ranges of letters,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
     * stored in a compressed, indexed, format that offers fast access
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    private BreakDictionary dictionary;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
     * a list of flags indicating which character categories are contained in
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
     * the dictionary file (this is used to determine which ranges of characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
     * to apply the dictionary to)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
    private boolean[] categoryFlags;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
     * a temporary hiding place for the number of dictionary characters in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
     * last range passed over by next()
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
    private int dictionaryCharCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
     * when a range of characters is divided up using the dictionary, the break
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
     * positions that are discovered are stored here, preventing us from having
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
     * to use either the dictionary or the state table again until the iterator
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     * leaves this range of text
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
    private int[] cachedBreakPositions;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
     * if cachedBreakPositions is not null, this indicates which item in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
     * cache the current iteration position refers to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
    private int positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
     * Constructs a DictionaryBasedBreakIterator.
41750
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   110
     *
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   111
     * @param ruleFile       the name of the rule data file
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   112
     * @param ruleData       the rule data loaded from the rule data file
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   113
     * @param dictionaryFile the name of the dictionary file
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   114
     * @param dictionartData the dictionary data loaded from the dictionary file
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   115
     * @throws MissingResourceException if rule data or dictionary initialization failed
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
     */
41750
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   117
    public DictionaryBasedBreakIterator(String ruleFile, byte[] ruleData,
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   118
                                        String dictionaryFile, byte[] dictionaryData) {
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   119
        super(ruleFile, ruleData);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
        byte[] tmp = super.getAdditionalData();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
        if (tmp != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
            prepareCategoryFlags(tmp);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
            super.setAdditionalData(null);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
        }
41750
25ee1c2ee27e 8165804: Revisit the way of loading BreakIterator rules/dictionaries
okutsu
parents: 36511
diff changeset
   125
        dictionary = new BreakDictionary(dictionaryFile, dictionaryData);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
    private void prepareCategoryFlags(byte[] data) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
        categoryFlags = new boolean[data.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
        for (int i = 0; i < data.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
            categoryFlags[i] = (data[i] == (byte)1) ? true : false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   135
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
    public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
        super.setText(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
     * Sets the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
     * (i.e., the CharacterIterator's starting offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
     * @return The offset of the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   148
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    public int first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
        return super.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
     * Sets the current iteration position to the end of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     * (i.e., the CharacterIterator's ending offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
     * @return The text's past-the-end offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   161
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
    public int last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
        return super.last();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
     * Advances the iterator one step backwards.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
     * @return The position of the last boundary position before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
     * current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   174
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
    public int previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
        // if we have cached break positions and we're still in the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        // covered by them, just move one step backward in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        if (cachedBreakPositions != null && positionInCache > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
            --positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
            return cachedBreakPositions[positionInCache];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
        // otherwise, dump the cache and use the inherited previous() method to move
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
        // backward.  This may fill up the cache with new break positions, in which
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        // case we have to mark our position in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
            int result = super.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
            if (cachedBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
                positionInCache = cachedBreakPositions.length - 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
            return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
     * Sets the current iteration position to the last boundary position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
     * before the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
     * @param offset The position to begin searching from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
     * @return The position of the last boundary before "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   205
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
    public int preceding(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        // if we have no cached break positions, or "offset" is outside the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
        // range covered by the cache, we can just call the inherited routine
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
        // (which will eventually call other routines in this class that may
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
        // refresh the cache)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
        if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
                offset > cachedBreakPositions[cachedBreakPositions.length - 1]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
            return super.preceding(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
        // on the other hand, if "offset" is within the range covered by the cache,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        // then all we have to do is search the cache for the last break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
        // before "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
            positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
            while (positionInCache < cachedBreakPositions.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
                   && offset > cachedBreakPositions[positionInCache]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
                ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
            --positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
            return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
     * Sets the current iteration position to the first boundary position after
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
     * the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
     * @param offset The position to begin searching forward from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
     * @return The position of the first boundary after "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   241
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
    public int following(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
        // if we have no cached break positions, or if "offset" is outside the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
        // range covered by the cache, then dump the cache and call our
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
        // inherited following() method.  This will call other methods in this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
        // class that may refresh the cache.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
        if (cachedBreakPositions == null || offset < cachedBreakPositions[0] ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
                offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
            return super.following(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
        // on the other hand, if "offset" is within the range covered by the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
        // cache, then just search the cache for the first break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
        // after "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
            positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
            while (positionInCache < cachedBreakPositions.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
                   && offset >= cachedBreakPositions[positionInCache]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
                ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
            return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
     * This is the implementation function for next().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   273
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
    protected int handleNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
        // if there are no cached break positions, or if we've just moved
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
        // off the end of the range covered by the cache, we have to dump
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
        // and possibly regenerate the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
        if (cachedBreakPositions == null ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
            positionInCache == cachedBreakPositions.length - 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
            // start by using the inherited handleNext() to find a tentative return
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
            // value.   dictionaryCharCount tells us how many dictionary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            // we passed over on our way to the tentative return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
            int startPos = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
            dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
            int result = super.handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
            // if we passed over more than one dictionary character, then we use
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
            // divideUpDictionaryRange() to regenerate the cached break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
            // for the new range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
            if (dictionaryCharCount > 1 && result - startPos > 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
                divideUpDictionaryRange(startPos, result);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            // otherwise, the value we got back from the inherited fuction
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
            // is our return value, and we can dump the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
                cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
                return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
        // if the cache of break positions has been regenerated (or existed all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
        // along), then just advance to the next break position in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
        // and return it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
        if (cachedBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
            ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
            return cachedBreakPositions[positionInCache];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
        return -9999;   // SHOULD NEVER GET HERE!
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
     * Looks up a character category for a character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   319
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
    protected int lookupCategory(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
        // this override of lookupCategory() exists only to keep track of whether we've
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
        // passed over any dictionary characters.  It calls the inherited lookupCategory()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
        // to do the real work, and then checks whether its return value is one of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
        // categories represented in the dictionary.  If it is, bump the dictionary-
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
        // character count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
        int result = super.lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
        if (result != RuleBasedBreakIterator.IGNORE && categoryFlags[result]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
            ++dictionaryCharCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
     * This is the function that actually implements the dictionary-based
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
     * algorithm.  Given the endpoints of a range of text, it uses the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
     * dictionary to determine the positions of any boundaries in this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
     * range.  It stores all the boundary positions it discovers in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
     * cachedBreakPositions so that we only have to do this work once
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
     * for each time we enter the range.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   341
    @SuppressWarnings("unchecked")
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
    private void divideUpDictionaryRange(int startPos, int endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
        // the range we're dividing may begin or end with non-dictionary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
        // (i.e., for line breaking, we may have leading or trailing punctuation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
        // that needs to be kept with the word).  Seek from the beginning of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
        // range to the first dictionary character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
        text.setIndex(startPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
        int category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
        while (category == IGNORE || !categoryFlags[category]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
            c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        // initialize.  We maintain two stacks: currentBreakPositions contains
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
        // the list of break positions that will be returned if we successfully
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
        // finish traversing the whole range now.  possibleBreakPositions lists
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
        // all other possible word ends we've passed along the way.  (Whenever
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
        // we reach an error [a sequence of characters that can't begin any word
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
        // in the dictionary], we back up, possibly delete some breaks from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
        // currentBreakPositions, move a break from possibleBreakPositions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        // to currentBreakPositions, and start over from there.  This process
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
        // continues in this way until we either successfully make it all the way
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        // across the range, or exhaust all of our combinations of break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
        // positions.)
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   368
        Stack<Integer> currentBreakPositions = new Stack<>();
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   369
        Stack<Integer> possibleBreakPositions = new Stack<>();
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   370
        List<Integer> wrongBreakPositions = new ArrayList<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        // the dictionary is implemented as a trie, which is treated as a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
        // machine.  -1 represents the end of a legal word.  Every word in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        // dictionary is represented by a path from the root node to -1.  A path
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        // that ends in state 0 is an illegal combination of characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
        int state = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
        // these two variables are used for error handling.  We keep track of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        // farthest we've gotten through the range being divided, and the combination
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
        // of breaks that got us that far.  If we use up all possible break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
        // combinations, the text contains an error or a word that's not in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
        // dictionary.  In this case, we "bless" the break positions that got us the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
        // farthest as real break positions, and then start over from scratch with
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
        // the character where the error occurred.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
        int farthestEndPoint = text.getIndex();
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   386
        Stack<Integer> bestBreakPositions = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
        // initialize (we always exit the loop with a break statement)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
        c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
            // if we can transition to state "-1" from our current state, we're
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
            // on the last character of a legal word.  Push that position onto
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
            // the possible-break-positions stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
            if (dictionary.getNextState(state, 0) == -1) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   396
                possibleBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
            // look up the new state to transition to in the dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
            state = dictionary.getNextStateFromCharacter(state, c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
            // if the character we're sitting on causes us to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
            // the "end of word" state, then it was a non-dictionary character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
            // and we've successfully traversed the whole range.  Drop out
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            // of the loop.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
            if (state == -1) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   407
                currentBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
            // if the character we're sitting on causes us to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
            // the error state, or if we've gone off the end of the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
            // without transitioning to the "end of word" state, we've hit
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
            // an error...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
            else if (state == 0 || text.getIndex() >= endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
                // if this is the farthest we've gotten, take note of it in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
                // case there's an error in the text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
                if (text.getIndex() > farthestEndPoint) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
                    farthestEndPoint = text.getIndex();
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   421
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   422
                    @SuppressWarnings("unchecked")
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   423
                    Stack<Integer> currentBreakPositionsCopy = (Stack<Integer>) currentBreakPositions.clone();
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   424
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   425
                    bestBreakPositions = currentBreakPositionsCopy;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
                // wrongBreakPositions is a list of all break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
                // we've tried starting that didn't allow us to traverse
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
                // all the way through the text.  Every time we pop a
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   431
                // break position off of currentBreakPositions, we put it
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
                // into wrongBreakPositions to avoid trying it again later.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
                // If we make it to this spot, we're either going to back
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
                // up to a break in possibleBreakPositions and try starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
                // over from there, or we've exhausted all possible break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
                // positions and are going to do the fallback procedure.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
                // This loop prevents us from messing with anything in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
                // possibleBreakPositions that didn't work as a starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
                // point the last time we tried it (this is to prevent a bunch of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
                // repetitive checks from slowing down some extreme cases)
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   441
                while (!possibleBreakPositions.isEmpty()
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   442
                        && wrongBreakPositions.contains(possibleBreakPositions.peek())) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
                    possibleBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
                // if we've used up all possible break-position combinations, there's
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
                // an error or an unknown word in the text.  In this case, we start
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                // over, treating the farthest character we've reached as the beginning
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                // of the range, and "blessing" the break positions that got us that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                // far as real break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
                if (possibleBreakPositions.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
                    if (bestBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
                        currentBreakPositions = bestBreakPositions;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
                        if (farthestEndPoint < endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
                            text.setIndex(farthestEndPoint + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
                        if ((currentBreakPositions.size() == 0 ||
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   463
                             currentBreakPositions.peek().intValue() != text.getIndex())
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
                            && text.getIndex() != startPos) {
25522
10d789df41bb 8049892: Replace uses of 'new Integer()' with appropriate alternative across core classes
prr
parents: 13583
diff changeset
   465
                            currentBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
                        getNext();
25522
10d789df41bb 8049892: Replace uses of 'new Integer()' with appropriate alternative across core classes
prr
parents: 13583
diff changeset
   468
                        currentBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                // if we still have more break positions we can try, then promote the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
                // last break in possibleBreakPositions into currentBreakPositions,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
                // and get rid of all entries in currentBreakPositions that come after
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
                // it.  Then back up to that position and start over from there (i.e.,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
                // treat that position as the beginning of a new word)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
                else {
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   478
                    Integer temp = possibleBreakPositions.pop();
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   479
                    Integer temp2 = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
                    while (!currentBreakPositions.isEmpty() && temp.intValue() <
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   481
                           currentBreakPositions.peek().intValue()) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
                        temp2 = currentBreakPositions.pop();
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   483
                        wrongBreakPositions.add(temp2);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
                    currentBreakPositions.push(temp);
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   486
                    text.setIndex(currentBreakPositions.peek().intValue());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
                // re-sync "c" for the next go-round, and drop out of the loop if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
                // we've made it off the end of the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
                c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
                if (text.getIndex() >= endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
            // if we didn't hit any exceptional conditions on this last iteration,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
            // just advance to the next character and loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
                c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
        // dump the last break position in the list, and replace it with the actual
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
        // end of the range (which may be the same character, or may be further on
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
        // because the range actually ended with non-dictionary characters we want to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        // keep with the word)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
        if (!currentBreakPositions.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
            currentBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
        }
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   511
        currentBreakPositions.push(endPos);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
        // create a regular array to hold the break positions and copy
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
        // the break positions from the stack to the array (in addition,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
        // our starting position goes into this array as a break position).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
        // This array becomes the cache of break positions used by next()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
        // and previous(), so this is where we actually refresh the cache.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
        cachedBreakPositions = new int[currentBreakPositions.size() + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
        cachedBreakPositions[0] = startPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
        for (int i = 0; i < currentBreakPositions.size(); i++) {
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   522
            cachedBreakPositions[i + 1] = currentBreakPositions.elementAt(i).intValue();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
}