jdk/src/java.base/share/classes/sun/util/locale/provider/DictionaryBasedBreakIterator.java
author chegar
Sun, 17 Aug 2014 15:54:13 +0100
changeset 25859 3317bb8137f4
parent 25522 jdk/src/share/classes/sun/util/locale/provider/DictionaryBasedBreakIterator.java@10d789df41bb
child 36511 9d0388c6b336
permissions -rw-r--r--
8054834: Modular Source Code Reviewed-by: alanb, chegar, ihse, mduigou Contributed-by: alan.bateman@oracle.com, alex.buckley@oracle.com, chris.hegarty@oracle.com, erik.joelsson@oracle.com, jonathan.gibbons@oracle.com, karen.kinnear@oracle.com, magnus.ihse.bursie@oracle.com, mandy.chung@oracle.com, mark.reinhold@oracle.com, paul.sandoz@oracle.com
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
     2
 * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 715
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * The original version of this source code and documentation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * is protected by multiple US and International patents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    41
package sun.util.locale.provider;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    43
import java.io.IOException;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    44
import java.text.CharacterIterator;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    45
import java.util.ArrayList;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    46
import java.util.List;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
import java.util.Stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * to further subdivide ranges of text beyond what is possible using just the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * state-table-based algorithm.  This is necessary, for example, to handle
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * word and line breaking in Thai, which doesn't use spaces between words.  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * state-table-based algorithm used by RuleBasedBreakIterator is used to divide
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * up text as far as possible, and then contiguous ranges of letters are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * repeatedly compared against a list of known words (i.e., the dictionary)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * to divide them up into words.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * DictionaryBasedBreakIterator uses the same rule language as RuleBasedBreakIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * but adds one more special substitution name: <dictionary>.  This substitution
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * name is used to identify characters in words in the dictionary.  The idea is that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * if the iterator passes over a chunk of text that includes two or more characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * in a row that are included in <dictionary>, it goes back through that range and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * derives additional break positions (if possible) using the dictionary.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * DictionaryBasedBreakIterator is also constructed with the filename of a dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * file.  It follows a prescribed search path to locate the dictionary (right now,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * it looks for it in /com/ibm/text/resources in each directory in the classpath,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * and won't find it in JAR files, but this location is likely to change).  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * dictionary file is in a serialized binary format.  We have a very primitive (and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * slow) BuildDictionaryFile utility for creating dictionary files, but aren't
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * currently making it public.  Contact us for help.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
class DictionaryBasedBreakIterator extends RuleBasedBreakIterator {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
     * a list of known words that is used to divide up contiguous ranges of letters,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
     * stored in a compressed, indexed, format that offers fast access
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
    private BreakDictionary dictionary;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
     * a list of flags indicating which character categories are contained in
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
     * the dictionary file (this is used to determine which ranges of characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
     * to apply the dictionary to)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
    private boolean[] categoryFlags;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
     * a temporary hiding place for the number of dictionary characters in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
     * last range passed over by next()
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
    private int dictionaryCharCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
     * when a range of characters is divided up using the dictionary, the break
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
     * positions that are discovered are stored here, preventing us from having
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     * to use either the dictionary or the state table again until the iterator
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     * leaves this range of text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    private int[] cachedBreakPositions;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
     * if cachedBreakPositions is not null, this indicates which item in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     * cache the current iteration position refers to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    private int positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
     * Constructs a DictionaryBasedBreakIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
     * @param description Same as the description parameter on RuleBasedBreakIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
     * except for the special meaning of "<dictionary>".  This parameter is just
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     * passed through to RuleBasedBreakIterator's constructor.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     * @param dictionaryFilename The filename of the dictionary file to use
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   116
    DictionaryBasedBreakIterator(String dataFile, String dictionaryFile)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
                                        throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
        super(dataFile);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
        byte[] tmp = super.getAdditionalData();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
        if (tmp != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
            prepareCategoryFlags(tmp);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
            super.setAdditionalData(null);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
        dictionary = new BreakDictionary(dictionaryFile);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
    private void prepareCategoryFlags(byte[] data) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
        categoryFlags = new boolean[data.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
        for (int i = 0; i < data.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
            categoryFlags[i] = (data[i] == (byte)1) ? true : false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   134
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
    public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
        super.setText(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
     * Sets the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
     * (i.e., the CharacterIterator's starting offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
     * @return The offset of the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   147
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
    public int first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
        return super.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
     * Sets the current iteration position to the end of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
     * (i.e., the CharacterIterator's ending offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     * @return The text's past-the-end offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   160
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    public int last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
        cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
        dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
        return super.last();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
     * Advances the iterator one step backwards.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
     * @return The position of the last boundary position before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
     * current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   173
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
    public int previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
        // if we have cached break positions and we're still in the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
        // covered by them, just move one step backward in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        if (cachedBreakPositions != null && positionInCache > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
            --positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
            return cachedBreakPositions[positionInCache];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        // otherwise, dump the cache and use the inherited previous() method to move
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
        // backward.  This may fill up the cache with new break positions, in which
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
        // case we have to mark our position in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
            int result = super.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
            if (cachedBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
                positionInCache = cachedBreakPositions.length - 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
            return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
     * Sets the current iteration position to the last boundary position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
     * before the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
     * @param offset The position to begin searching from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
     * @return The position of the last boundary before "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   204
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
    public int preceding(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
        // if we have no cached break positions, or "offset" is outside the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        // range covered by the cache, we can just call the inherited routine
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
        // (which will eventually call other routines in this class that may
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
        // refresh the cache)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
        if (cachedBreakPositions == null || offset <= cachedBreakPositions[0] ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
                offset > cachedBreakPositions[cachedBreakPositions.length - 1]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
            return super.preceding(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
        // on the other hand, if "offset" is within the range covered by the cache,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
        // then all we have to do is search the cache for the last break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        // before "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
            positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
            while (positionInCache < cachedBreakPositions.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
                   && offset > cachedBreakPositions[positionInCache]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
                ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
            --positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
            return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
     * Sets the current iteration position to the first boundary position after
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
     * the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
     * @param offset The position to begin searching forward from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
     * @return The position of the first boundary after "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   240
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
    public int following(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
        // if we have no cached break positions, or if "offset" is outside the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
        // range covered by the cache, then dump the cache and call our
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
        // inherited following() method.  This will call other methods in this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
        // class that may refresh the cache.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
        if (cachedBreakPositions == null || offset < cachedBreakPositions[0] ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
                offset >= cachedBreakPositions[cachedBreakPositions.length - 1]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
            cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
            return super.following(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
        // on the other hand, if "offset" is within the range covered by the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
        // cache, then just search the cache for the first break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
        // after "offset"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
            positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
            while (positionInCache < cachedBreakPositions.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
                   && offset >= cachedBreakPositions[positionInCache]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
                ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
            return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
     * This is the implementation function for next().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   272
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
    protected int handleNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
        // if there are no cached break positions, or if we've just moved
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
        // off the end of the range covered by the cache, we have to dump
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
        // and possibly regenerate the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
        if (cachedBreakPositions == null ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
            positionInCache == cachedBreakPositions.length - 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
            // start by using the inherited handleNext() to find a tentative return
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
            // value.   dictionaryCharCount tells us how many dictionary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
            // we passed over on our way to the tentative return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            int startPos = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
            dictionaryCharCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
            int result = super.handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
            // if we passed over more than one dictionary character, then we use
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
            // divideUpDictionaryRange() to regenerate the cached break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
            // for the new range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
            if (dictionaryCharCount > 1 && result - startPos > 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
                divideUpDictionaryRange(startPos, result);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
            // otherwise, the value we got back from the inherited fuction
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            // is our return value, and we can dump the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
                cachedBreakPositions = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
                return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
        // if the cache of break positions has been regenerated (or existed all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
        // along), then just advance to the next break position in the cache
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
        // and return it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
        if (cachedBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
            ++positionInCache;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
            text.setIndex(cachedBreakPositions[positionInCache]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
            return cachedBreakPositions[positionInCache];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        return -9999;   // SHOULD NEVER GET HERE!
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
     * Looks up a character category for a character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   318
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
    protected int lookupCategory(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
        // this override of lookupCategory() exists only to keep track of whether we've
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
        // passed over any dictionary characters.  It calls the inherited lookupCategory()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
        // to do the real work, and then checks whether its return value is one of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
        // categories represented in the dictionary.  If it is, bump the dictionary-
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
        // character count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
        int result = super.lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
        if (result != RuleBasedBreakIterator.IGNORE && categoryFlags[result]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
            ++dictionaryCharCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
     * This is the function that actually implements the dictionary-based
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
     * algorithm.  Given the endpoints of a range of text, it uses the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
     * dictionary to determine the positions of any boundaries in this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
     * range.  It stores all the boundary positions it discovers in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
     * cachedBreakPositions so that we only have to do this work once
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
     * for each time we enter the range.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   340
    @SuppressWarnings("unchecked")
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
    private void divideUpDictionaryRange(int startPos, int endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
        // the range we're dividing may begin or end with non-dictionary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
        // (i.e., for line breaking, we may have leading or trailing punctuation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
        // that needs to be kept with the word).  Seek from the beginning of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
        // range to the first dictionary character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
        text.setIndex(startPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
        int category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
        while (category == IGNORE || !categoryFlags[category]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
            c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
        // initialize.  We maintain two stacks: currentBreakPositions contains
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        // the list of break positions that will be returned if we successfully
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
        // finish traversing the whole range now.  possibleBreakPositions lists
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
        // all other possible word ends we've passed along the way.  (Whenever
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
        // we reach an error [a sequence of characters that can't begin any word
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
        // in the dictionary], we back up, possibly delete some breaks from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
        // currentBreakPositions, move a break from possibleBreakPositions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
        // to currentBreakPositions, and start over from there.  This process
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        // continues in this way until we either successfully make it all the way
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
        // across the range, or exhaust all of our combinations of break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        // positions.)
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   367
        Stack<Integer> currentBreakPositions = new Stack<>();
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   368
        Stack<Integer> possibleBreakPositions = new Stack<>();
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   369
        List<Integer> wrongBreakPositions = new ArrayList<>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
        // the dictionary is implemented as a trie, which is treated as a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        // machine.  -1 represents the end of a legal word.  Every word in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
        // dictionary is represented by a path from the root node to -1.  A path
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        // that ends in state 0 is an illegal combination of characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        int state = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
        // these two variables are used for error handling.  We keep track of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
        // farthest we've gotten through the range being divided, and the combination
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        // of breaks that got us that far.  If we use up all possible break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
        // combinations, the text contains an error or a word that's not in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
        // dictionary.  In this case, we "bless" the break positions that got us the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
        // farthest as real break positions, and then start over from scratch with
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
        // the character where the error occurred.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
        int farthestEndPoint = text.getIndex();
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   385
        Stack<Integer> bestBreakPositions = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
        // initialize (we always exit the loop with a break statement)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
        c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
            // if we can transition to state "-1" from our current state, we're
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
            // on the last character of a legal word.  Push that position onto
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
            // the possible-break-positions stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
            if (dictionary.getNextState(state, 0) == -1) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   395
                possibleBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
            // look up the new state to transition to in the dictionary
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
            state = dictionary.getNextStateFromCharacter(state, c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
            // if the character we're sitting on causes us to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
            // the "end of word" state, then it was a non-dictionary character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
            // and we've successfully traversed the whole range.  Drop out
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
            // of the loop.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            if (state == -1) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   406
                currentBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
            // if the character we're sitting on causes us to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
            // the error state, or if we've gone off the end of the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
            // without transitioning to the "end of word" state, we've hit
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
            // an error...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
            else if (state == 0 || text.getIndex() >= endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
                // if this is the farthest we've gotten, take note of it in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
                // case there's an error in the text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
                if (text.getIndex() > farthestEndPoint) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
                    farthestEndPoint = text.getIndex();
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   420
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   421
                    @SuppressWarnings("unchecked")
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   422
                    Stack<Integer> currentBreakPositionsCopy = (Stack<Integer>) currentBreakPositions.clone();
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   423
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   424
                    bestBreakPositions = currentBreakPositionsCopy;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
                // wrongBreakPositions is a list of all break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
                // we've tried starting that didn't allow us to traverse
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
                // all the way through the text.  Every time we pop a
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   430
                // break position off of currentBreakPositions, we put it
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
                // into wrongBreakPositions to avoid trying it again later.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
                // If we make it to this spot, we're either going to back
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
                // up to a break in possibleBreakPositions and try starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
                // over from there, or we've exhausted all possible break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
                // positions and are going to do the fallback procedure.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
                // This loop prevents us from messing with anything in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
                // possibleBreakPositions that didn't work as a starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
                // point the last time we tried it (this is to prevent a bunch of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
                // repetitive checks from slowing down some extreme cases)
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   440
                while (!possibleBreakPositions.isEmpty()
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   441
                        && wrongBreakPositions.contains(possibleBreakPositions.peek())) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
                    possibleBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
                // if we've used up all possible break-position combinations, there's
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
                // an error or an unknown word in the text.  In this case, we start
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
                // over, treating the farthest character we've reached as the beginning
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                // of the range, and "blessing" the break positions that got us that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                // far as real break positions
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                if (possibleBreakPositions.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
                    if (bestBreakPositions != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
                        currentBreakPositions = bestBreakPositions;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
                        if (farthestEndPoint < endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
                            text.setIndex(farthestEndPoint + 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
                        if ((currentBreakPositions.size() == 0 ||
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   462
                             currentBreakPositions.peek().intValue() != text.getIndex())
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
                            && text.getIndex() != startPos) {
25522
10d789df41bb 8049892: Replace uses of 'new Integer()' with appropriate alternative across core classes
prr
parents: 13583
diff changeset
   464
                            currentBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
                        getNext();
25522
10d789df41bb 8049892: Replace uses of 'new Integer()' with appropriate alternative across core classes
prr
parents: 13583
diff changeset
   467
                        currentBreakPositions.push(text.getIndex());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
                // if we still have more break positions we can try, then promote the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                // last break in possibleBreakPositions into currentBreakPositions,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
                // and get rid of all entries in currentBreakPositions that come after
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
                // it.  Then back up to that position and start over from there (i.e.,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
                // treat that position as the beginning of a new word)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
                else {
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   477
                    Integer temp = possibleBreakPositions.pop();
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   478
                    Integer temp2 = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                    while (!currentBreakPositions.isEmpty() && temp.intValue() <
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   480
                           currentBreakPositions.peek().intValue()) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
                        temp2 = currentBreakPositions.pop();
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   482
                        wrongBreakPositions.add(temp2);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
                    currentBreakPositions.push(temp);
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   485
                    text.setIndex(currentBreakPositions.peek().intValue());
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
                // re-sync "c" for the next go-round, and drop out of the loop if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
                // we've made it off the end of the range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
                c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
                if (text.getIndex() >= endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
            // if we didn't hit any exceptional conditions on this last iteration,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
            // just advance to the next character and loop
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
                c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
        // dump the last break position in the list, and replace it with the actual
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
        // end of the range (which may be the same character, or may be further on
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
        // because the range actually ended with non-dictionary characters we want to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
        // keep with the word)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        if (!currentBreakPositions.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
            currentBreakPositions.pop();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
        }
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   510
        currentBreakPositions.push(endPos);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
        // create a regular array to hold the break positions and copy
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
        // the break positions from the stack to the array (in addition,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
        // our starting position goes into this array as a break position).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
        // This array becomes the cache of break positions used by next()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
        // and previous(), so this is where we actually refresh the cache.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
        cachedBreakPositions = new int[currentBreakPositions.size() + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
        cachedBreakPositions[0] = startPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
        for (int i = 0; i < currentBreakPositions.size(); i++) {
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 5506
diff changeset
   521
            cachedBreakPositions[i + 1] = currentBreakPositions.elementAt(i).intValue();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
        positionInCache = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
}