jdk/src/java.base/share/classes/sun/util/locale/provider/RuleBasedBreakIterator.java
author igerasim
Wed, 27 Aug 2014 22:08:19 +0400
changeset 26219 1a19360ff122
parent 25859 3317bb8137f4
child 36511 9d0388c6b336
permissions -rw-r--r--
8054714: Use StringJoiner where it makes the code cleaner Reviewed-by: psandoz, redestad
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
23010
6dadb192ad81 8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013
lana
parents: 21278
diff changeset
     2
 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * The original version of this source code and documentation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * is protected by multiple US and International patents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    41
package sun.util.locale.provider;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
import java.io.BufferedInputStream;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
import java.io.IOException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
import java.security.AccessController;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
import java.security.PrivilegedActionException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
import java.security.PrivilegedExceptionAction;
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    48
import java.text.BreakIterator;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
import java.text.CharacterIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
import java.text.StringCharacterIterator;
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    51
import java.util.MissingResourceException;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
import sun.text.CompactByteArray;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
import sun.text.SupplementaryCharacterData;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * <p>There are two kinds of rules, which are separated by semicolons: <i>substitutions</i>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * and <i>regular expressions.</i></p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * <p>A substitution rule defines a name that can be used in place of an expression. It
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * consists of a name, which is a string of characters contained in angle brackets, an equals
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * sign, and an expression. (There can be no whitespace on either side of the equals sign.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * To keep its syntactic meaning intact, the expression must be enclosed in parentheses or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * square brackets. A substitution is visible after its definition, and is filled in using
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * simple textual substitution. Substitution definitions can contain other substitutions, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * long as those substitutions have been defined first. Substitutions are generally used to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * make the regular expressions (which can get quite complex) shorted and easier to read.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * They typically define either character categories or commonly-used subexpressions.</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * <p>There is one special substitution.&nbsp; If the description defines a substitution
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * called &quot;&lt;ignore&gt;&quot;, the expression must be a [] expression, and the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 * expression defines a set of characters (the &quot;<em>ignore characters</em>&quot;) that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 * will be transparent to the BreakIterator.&nbsp; A sequence of characters will break the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
 * same way it would if any ignore characters it contains are taken out.&nbsp; Break
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
 * positions never occur befoer ignore characters.</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
 * <p>A regular expression uses a subset of the normal Unix regular-expression syntax, and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
 * defines a sequence of characters to be kept together. With one significant exception, the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
 * iterator uses a longest-possible-match algorithm when matching text to regular
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
 * expressions. The iterator also treats descriptions containing multiple regular expressions
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
 * as if they were ORed together (i.e., as if they were separated by |).</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
 * <p>The special characters recognized by the regular-expression parser are as follows:</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
 * <blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
 *   <table border="1" width="100%">
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
 *       <td width="6%">*</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
 *       <td width="94%">Specifies that the expression preceding the asterisk may occur any number
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
 *       of times (including not at all).</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
 *       <td width="6%">{}</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
 *       <td width="94%">Encloses a sequence of characters that is optional.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
 *       <td width="6%">()</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
 *       <td width="94%">Encloses a sequence of characters.&nbsp; If followed by *, the sequence
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
 *       repeats.&nbsp; Otherwise, the parentheses are just a grouping device and a way to delimit
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
 *       the ends of expressions containing |.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
 *       <td width="6%">|</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
 *       <td width="94%">Separates two alternative sequences of characters.&nbsp; Either one
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
 *       sequence or the other, but not both, matches this expression.&nbsp; The | character can
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
 *       only occur inside ().</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
 *       <td width="6%">.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
 *       <td width="94%">Matches any character.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
 *       <td width="6%">*?</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
 *       <td width="94%">Specifies a non-greedy asterisk.&nbsp; *? works the same way as *, except
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
 *       when there is overlap between the last group of characters in the expression preceding the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
 *       * and the first group of characters following the *.&nbsp; When there is this kind of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
 *       overlap, * will match the longest sequence of characters that match the expression before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
 *       the *, and *? will match the shortest sequence of characters matching the expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
 *       before the *?.&nbsp; For example, if you have &quot;xxyxyyyxyxyxxyxyxyy&quot; in the text,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
 *       &quot;x[xy]*x&quot; will match through to the last x (i.e., &quot;<strong>xxyxyyyxyxyxxyxyx</strong>yy&quot;,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
 *       but &quot;x[xy]*?x&quot; will only match the first two xes (&quot;<strong>xx</strong>yxyyyxyxyxxyxyxyy&quot;).</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
 *       <td width="6%">[]</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
 *       <td width="94%">Specifies a group of alternative characters.&nbsp; A [] expression will
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
 *       match any single character that is specified in the [] expression.&nbsp; For more on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
 *       syntax of [] expressions, see below.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
 *       <td width="6%">/</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
 *       <td width="94%">Specifies where the break position should go if text matches this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
 *       expression.&nbsp; (e.g., &quot;[a-z]&#42;/[:Zs:]*[1-0]&quot; will match if the iterator sees a run
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
 *       of letters, followed by a run of whitespace, followed by a digit, but the break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
 *       will actually go before the whitespace).&nbsp; Expressions that don't contain / put the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
 *       break position at the end of the matching text.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
 *       <td width="6%">\</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
 *       <td width="94%">Escape character.&nbsp; The \ itself is ignored, but causes the next
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
 *       character to be treated as literal character.&nbsp; This has no effect for many
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
 *       characters, but for the characters listed above, this deprives them of their special
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
 *       meaning.&nbsp; (There are no special escape sequences for Unicode characters, or tabs and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
 *       newlines; these are all handled by a higher-level protocol.&nbsp; In a Java string,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
 *       &quot;\n&quot; will be converted to a literal newline character by the time the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
 *       regular-expression parser sees it.&nbsp; Of course, this means that \ sequences that are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
 *       visible to the regexp parser must be written as \\ when inside a Java string.)&nbsp; All
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
 *       characters in the ASCII range except for letters, digits, and control characters are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
 *       reserved characters to the parser and must be preceded by \ even if they currently don't
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
 *       mean anything.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
 *       <td width="6%">!</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
 *       <td width="94%">If ! appears at the beginning of a regular expression, it tells the regexp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
 *       parser that this expression specifies the backwards-iteration behavior of the iterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
 *       and not its normal iteration behavior.&nbsp; This is generally only used in situations
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
 *       where the automatically-generated backwards-iteration brhavior doesn't produce
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
 *       satisfactory results and must be supplemented with extra client-specified rules.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
 *       <td width="6%"><em>(all others)</em></td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
 *       <td width="94%">All other characters are treated as literal characters, which must match
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
 *       the corresponding character(s) in the text exactly.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
 *   </table>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
 * </blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
 * <p>Within a [] expression, a number of other special characters can be used to specify
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
 * groups of characters:</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
 * <blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
 *   <table border="1" width="100%">
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
 *       <td width="6%">-</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
 *       <td width="94%">Specifies a range of matching characters.&nbsp; For example
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
 *       &quot;[a-p]&quot; matches all lowercase Latin letters from a to p (inclusive).&nbsp; The -
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
 *       sign specifies ranges of continuous Unicode numeric values, not ranges of characters in a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
 *       language's alphabetical order: &quot;[a-z]&quot; doesn't include capital letters, nor does
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
 *       it include accented letters such as a-umlaut.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
 *       <td width="6%">::</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
 *       <td width="94%">A pair of colons containing a one- or two-letter code matches all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
 *       characters in the corresponding Unicode category.&nbsp; The two-letter codes are the same
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
 *       as the two-letter codes in the Unicode database (for example, &quot;[:Sc::Sm:]&quot;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
 *       matches all currency symbols and all math symbols).&nbsp; Specifying a one-letter code is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
 *       the same as specifying all two-letter codes that begin with that letter (for example,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
 *       &quot;[:L:]&quot; matches all letters, and is equivalent to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
 *       &quot;[:Lu::Ll::Lo::Lm::Lt:]&quot;).&nbsp; Anything other than a valid two-letter Unicode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
 *       category code or a single letter that begins a Unicode category code is illegal within
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
 *       colons.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
 *       <td width="6%">[]</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
 *       <td width="94%">[] expressions can nest.&nbsp; This has no effect, except when used in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
 *       conjunction with the ^ token.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
 *       <td width="6%">^</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
 *       <td width="94%">Excludes the character (or the characters in the [] expression) following
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
 *       it from the group of characters.&nbsp; For example, &quot;[a-z^p]&quot; matches all Latin
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
 *       lowercase letters except p.&nbsp; &quot;[:L:^[&#92;u4e00-&#92;u9fff]]&quot; matches all letters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
 *       except the Han ideographs.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
 *       <td width="6%"><em>(all others)</em></td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
 *       <td width="94%">All other characters are treated as literal characters.&nbsp; (For
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
 *       example, &quot;[aeiou]&quot; specifies just the letters a, e, i, o, and u.)</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
 *   </table>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
 * </blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
 * <p>For a more complete explanation, see <a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
 * href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
 * &nbsp; For examples, see the resource data (which is annotated).</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
 * @author Richard Gillam
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
class RuleBasedBreakIterator extends BreakIterator {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
     * A token used as a character-category value to identify ignore characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
    protected static final byte IGNORE = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
     * The state number of the starting state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
    private static final short START_STATE = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
     * The state-transition value indicating "stop"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
    private static final short STOP_STATE = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
     * Magic number for the BreakIterator data file format.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
    static final byte[] LABEL = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        (byte)'B', (byte)'I', (byte)'d', (byte)'a', (byte)'t', (byte)'a',
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
        (byte)'\0'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
    static final int    LABEL_LENGTH = LABEL.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
     * Version number of the dictionary that was read in.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
    static final byte supportedVersion = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
     * Header size in byte count
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
    private static final int HEADER_LENGTH = 36;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
     * An array length of indices for BMP characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
    private static final int BMP_INDICES_LENGTH = 512;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
     * Tables that indexes from character values to character category numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
    private CompactByteArray charCategoryTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
    private SupplementaryCharacterData supplementaryCharCategoryTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
     * The table of state transitions used for forward iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
    private short[] stateTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
     * The table of state transitions used to sync up the iterator with the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
     * text in backwards and random-access iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
    private short[] backwardsStateTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
     * A list of flags indicating which states in the state table are accepting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
     * ("end") states
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
    private boolean[] endStates = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
     * A list of flags indicating which states in the state table are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
     * lookahead states (states which turn lookahead on and off)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
    private boolean[] lookaheadStates = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
     * A table for additional data. May be used by a subclass of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
     * RuleBasedBreakIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
    private byte[] additionalData = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
     * The number of character categories (and, thus, the number of columns in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
     * the state tables)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
    private int numCategories;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
     * The character iterator through which this BreakIterator accesses the text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
    private CharacterIterator text = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
     * A CRC32 value of all data in datafile
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
    private long checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
    // constructors
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
     * Constructs a RuleBasedBreakIterator according to the datafile
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
     * provided.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   319
    RuleBasedBreakIterator(String datafile)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
        readTables(datafile);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
     * Read datafile. The datafile's format is as follows:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
     *   BreakIteratorData {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
     *       u1           magic[7];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
     *       u1           version;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
     *       u4           totalDataSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
     *       header_info  header;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
     *       body         value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
     * <code>totalDataSize</code> is the summation of the size of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
     * <code>header_info</code> and <code>body</code> in byte count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
     * In <code>header</code>, each field except for checksum implies the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
     * length of each field. Since <code>BMPdataLength</code> is a fixed-length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
     *  data(512 entries), its length isn't included in <code>header</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
     * <code>checksum</code> is a CRC32 value of all in <code>body</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
     *   header_info {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
     *       u4           stateTableLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
     *       u4           backwardsStateTableLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
     *       u4           endStatesLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
     *       u4           lookaheadStatesLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
     *       u4           BMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
     *       u4           nonBMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
     *       u4           additionalDataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
     *       u8           checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
     * Finally, <code>BMPindices</code> and <code>BMPdata</code> are set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
     * <code>charCategoryTable</code>. <code>nonBMPdata</code> is set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
     * <code>supplementaryCharCategoryTable</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
     *   body {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
     *       u2           stateTable[stateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
     *       u2           backwardsStateTable[backwardsStateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
     *       u1           endStates[endStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
     *       u1           lookaheadStates[lookaheadStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
     *       u2           BMPindices[512];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
     *       u1           BMPdata[BMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
     *       u4           nonBMPdata[numNonBMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
     *       u1           additionalData[additionalDataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   372
    protected final void readTables(String datafile)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        byte[] buffer = readFile(datafile);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
        /* Read header_info. */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   378
        int stateTableLength = getInt(buffer, 0);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   379
        int backwardsStateTableLength = getInt(buffer, 4);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   380
        int endStatesLength = getInt(buffer, 8);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   381
        int lookaheadStatesLength = getInt(buffer, 12);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   382
        int BMPdataLength = getInt(buffer, 16);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   383
        int nonBMPdataLength = getInt(buffer, 20);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   384
        int additionalDataLength = getInt(buffer, 24);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   385
        checksum = getLong(buffer, 28);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
        /* Read stateTable[numCategories * numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
        stateTable = new short[stateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
        int offset = HEADER_LENGTH;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
        for (int i = 0; i < stateTableLength; i++, offset+=2) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   391
           stateTable[i] = getShort(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
        /* Read backwardsStateTable[numCategories * numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
        backwardsStateTable = new short[backwardsStateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
        for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   397
           backwardsStateTable[i] = getShort(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
        /* Read endStates[numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
        endStates = new boolean[endStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
        for (int i = 0; i < endStatesLength; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
           endStates[i] = buffer[offset] == 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
        /* Read lookaheadStates[numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
        lookaheadStates = new boolean[lookaheadStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
        for (int i = 0; i < lookaheadStatesLength; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
           lookaheadStates[i] = buffer[offset] == 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
        /* Read a category table and indices for BMP characters. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
        short[] temp1 = new short[BMP_INDICES_LENGTH];  // BMPindices
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
        for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   415
            temp1[i] = getShort(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
        byte[] temp2 = new byte[BMPdataLength];  // BMPdata
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
        System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
        offset += BMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
        charCategoryTable = new CompactByteArray(temp1, temp2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
        /* Read a category table for non-BMP characters. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
        int[] temp3 = new int[nonBMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
        for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   425
            temp3[i] = getInt(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
        supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
        /* Read additional data */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
        if (additionalDataLength > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
            additionalData = new byte[additionalDataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
            System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
        /* Set numCategories */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
        numCategories = stateTable.length / endStates.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
    protected byte[] readFile(final String datafile)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
        BufferedInputStream is;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
        try {
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 10419
diff changeset
   444
            is = AccessController.doPrivileged(
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 10419
diff changeset
   445
                new PrivilegedExceptionAction<BufferedInputStream>() {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   446
                    @Override
12848
da701d422d2c 7117230: clean up warnings in java.text
dbhole
parents: 10419
diff changeset
   447
                    public BufferedInputStream run() throws Exception {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                        return new BufferedInputStream(getClass().getResourceAsStream("/sun/text/resources/" + datafile));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
            );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
        catch (PrivilegedActionException e) {
10419
12c063b39232 7084245: Update usages of InternalError to use exception chaining
sherman
parents: 7668
diff changeset
   454
            throw new InternalError(e.toString(), e);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
        int offset = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
        /* First, read magic, version, and header_info. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
        int len = LABEL_LENGTH + 5;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
        byte[] buf = new byte[len];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
        if (is.read(buf) != len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
            throw new MissingResourceException("Wrong header length",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
                                               datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
        /* Validate the magic number. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
        for (int i = 0; i < LABEL_LENGTH; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
            if (buf[offset] != LABEL[offset]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
                throw new MissingResourceException("Wrong magic number",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
                                                   datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        /* Validate the version number. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        if (buf[offset] != supportedVersion) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
            throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
                                               datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
        /* Read data: totalDataSize + 8(for checksum) */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   482
        len = getInt(buf, ++offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
        buf = new byte[len];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
        if (is.read(buf) != len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
            throw new MissingResourceException("Wrong data length",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
                                               datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
        is.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
        return buf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
    byte[] getAdditionalData() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        return additionalData;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
    void setAdditionalData(byte[] b) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
        additionalData = b;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
    // boilerplate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
     * Clones this iterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
     * @return A newly-constructed RuleBasedBreakIterator with the same
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
     * behavior as this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   510
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
    public Object clone() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
        RuleBasedBreakIterator result = (RuleBasedBreakIterator) super.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
        if (text != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
            result.text = (CharacterIterator) text.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
     * Returns true if both BreakIterators are of the same class, have the same
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
     * rules, and iterate over the same text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   523
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
    public boolean equals(Object that) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
            if (that == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
                return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
            RuleBasedBreakIterator other = (RuleBasedBreakIterator) that;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
            if (checksum != other.checksum) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
                return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
            if (text == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
                return other.text == null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
                return text.equals(other.text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
        catch(ClassCastException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
     * Returns text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   548
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
    public String toString() {
26219
1a19360ff122 8054714: Use StringJoiner where it makes the code cleaner
igerasim
parents: 25859
diff changeset
   550
        return "[checksum=0x" + Long.toHexString(checksum) + ']';
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
     * Compute a hashcode for this BreakIterator
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
     * @return A hash code
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   557
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
    public int hashCode() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
        return (int)checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
    // BreakIterator overrides
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
     * Sets the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
     * (i.e., the CharacterIterator's starting offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
     * @return The offset of the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   571
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
    public int first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
        CharacterIterator t = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
        t.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
        return t.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
     * Sets the current iteration position to the end of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
     * (i.e., the CharacterIterator's ending offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
     * @return The text's past-the-end offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   584
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
    public int last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
        CharacterIterator t = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
        // I'm not sure why, but t.last() returns the offset of the last character,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
        // rather than the past-the-end offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
        t.setIndex(t.getEndIndex());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
        return t.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
     * Advances the iterator either forward or backward the specified number of steps.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
     * Negative values move backward, and positive values move forward.  This is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
     * equivalent to repeatedly calling next() or previous().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
     * @param n The number of steps to move.  The sign indicates the direction
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
     * (negative is backwards, and positive is forwards).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
     * @return The character offset of the boundary position n boundaries away from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
     * the current one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   603
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
    public int next(int n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
        int result = current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
        while (n > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
            result = handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
            --n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
        while (n < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
            result = previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
            ++n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
     * Advances the iterator to the next boundary position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
     * @return The position of the first boundary after this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   621
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
    public int next() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
        return handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   626
    private int cachedLastKnownBreak = BreakIterator.DONE;
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   627
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
     * Advances the iterator backwards, to the last boundary preceding this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
     * @return The position of the last boundary position preceding this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   632
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
    public int previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
        // if we're already sitting at the beginning of the text, return DONE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
        if (current() == text.getBeginIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
            return BreakIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
        // set things up.  handlePrevious() will back us up to some valid
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
        // break position before the current position (we back our internal
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
        // iterator up one step to prevent handlePrevious() from returning
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
        // the current position), but not necessarily the last one before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
        // where we started
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
        int start = current();
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   646
        int lastResult = cachedLastKnownBreak;
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   647
        if (lastResult >= start || lastResult <= BreakIterator.DONE) {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   648
            getPrevious();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   649
            lastResult = handlePrevious();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   650
        } else {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   651
            //it might be better to check if handlePrevious() give us closer
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   652
            //safe value but handlePrevious() is slow too
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   653
            //So, this has to be done carefully
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   654
            text.setIndex(lastResult);
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   655
        }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
        int result = lastResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
        // iterate forward from the known break position until we pass our
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
        // starting point.  The last break position before the starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
        // point is our return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
        while (result != BreakIterator.DONE && result < start) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
            lastResult = result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
            result = handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
        // set the current iteration position to be the last break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
        // before where we started, and then return that value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
        text.setIndex(lastResult);
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   669
        cachedLastKnownBreak = lastResult;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
        return lastResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
     * Returns previous character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
    private int getPrevious() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
        char c2 = text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
        if (Character.isLowSurrogate(c2) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
            text.getIndex() > text.getBeginIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
            char c1 = text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
            if (Character.isHighSurrogate(c1)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
                return Character.toCodePoint(c1, c2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
                text.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
        return (int)c2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
     * Returns current character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
    int getCurrent() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
        char c1 = text.current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
        if (Character.isHighSurrogate(c1) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
            text.getIndex() < text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
            char c2 = text.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
            text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
            if (Character.isLowSurrogate(c2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
                return Character.toCodePoint(c1, c2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
        return (int)c1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
     * Returns the count of next character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
    private int getCurrentCodePointCount() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
        char c1 = text.current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
        if (Character.isHighSurrogate(c1) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
            text.getIndex() < text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
            char c2 = text.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
            text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
            if (Character.isLowSurrogate(c2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
                return 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
        return 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
     * Returns next character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
    int getNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
        int index = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
        int endIndex = text.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
        if (index == endIndex ||
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   729
            (index += getCurrentCodePointCount()) >= endIndex) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
            return CharacterIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
        text.setIndex(index);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
        return getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
     * Returns the position of next character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
    private int getNextIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
        int index = text.getIndex() + getCurrentCodePointCount();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
        int endIndex = text.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
        if (index > endIndex) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
            return endIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
            return index;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
     * Throw IllegalArgumentException unless begin <= offset < end.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
    protected static final void checkOffset(int offset, CharacterIterator text) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
        if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
            throw new IllegalArgumentException("offset out of bounds");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
     * Sets the iterator to refer to the first boundary position following
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
     * the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
     * @offset The position from which to begin searching for a break position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
     * @return The position of the first break after the current position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   764
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
    public int following(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
        // Set our internal iteration position (temporarily)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
        // to the position passed in.  If this is the _beginning_ position,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
        // then we can just use next() to get our return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
        text.setIndex(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
        if (offset == text.getBeginIndex()) {
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   775
            cachedLastKnownBreak = handleNext();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   776
            return cachedLastKnownBreak;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
        // otherwise, we have to sync up first.  Use handlePrevious() to back
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
        // us up to a known break position before the specified position (if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
        // we can determine that the specified position is a break position,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
        // we don't back up at all).  This may or may not be the last break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
        // position at or before our starting position.  Advance forward
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
        // from here until we've passed the starting position.  The position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        // we stop on will be the first break position after the specified one.
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   786
        int result = cachedLastKnownBreak;
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   787
        if (result >= offset || result <= BreakIterator.DONE) {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   788
            result = handlePrevious();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   789
        } else {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   790
            //it might be better to check if handlePrevious() give us closer
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   791
            //safe value but handlePrevious() is slow too
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   792
            //So, this has to be done carefully
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   793
            text.setIndex(result);
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   794
        }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
        while (result != BreakIterator.DONE && result <= offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
            result = handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        }
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   798
        cachedLastKnownBreak = result;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
     * Sets the iterator to refer to the last boundary position before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
     * specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
     * @offset The position to begin searching for a break from.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
     * @return The position of the last boundary before the starting position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   808
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
    public int preceding(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
        // if we start by updating the current iteration position to the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
        // position specified by the caller, we can just use previous()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
        // to carry out this operation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
        text.setIndex(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
        return previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
    /**
21278
ef8a3a2a72f2 8022746: List of spelling errors in API doc
malenkov
parents: 13583
diff changeset
   820
     * Returns true if the specified position is a boundary position.  As a side
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
     * effect, leaves the iterator pointing to the first boundary position at
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
     * or after "offset".
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
     * @param offset the offset to check.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
     * @return True if "offset" is a boundary position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   826
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
    public boolean isBoundary(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
        if (offset == text.getBeginIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
        // to check whether this is a boundary, we can use following() on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
        // position before the specified one and return true if the position we
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
        // get back is the one the user specified
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
            return following(offset - 1) == offset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
     * Returns the current iteration position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
     * @return The current iteration position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   846
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
    public int current() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
        return getText().getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
     * Return a CharacterIterator over the text being analyzed.  This version
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
     * of this method returns the actual CharacterIterator we're using internally.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
     * Changing the state of this iterator can have undefined consequences.  If
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
     * you need to change it, clone it first.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
     * @return An iterator over the text being analyzed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   858
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
    public CharacterIterator getText() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
        // The iterator is initialized pointing to no text at all, so if this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
        // function is called while we're in that state, we have to fudge an
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
        // iterator to return.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
        if (text == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
            text = new StringCharacterIterator("");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
        return text;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
     * Set the iterator to analyze a new piece of text.  This function resets
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
     * the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
     * @param newText An iterator over the text to analyze.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   874
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
    public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
        // Test iterator to see if we need to wrap it in a SafeCharIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
        // The correct behavior for CharacterIterators is to allow the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
        // position to be set to the endpoint of the iterator.  Many
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
        // CharacterIterators do not uphold this, so this is a workaround
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
        // to permit them to use this class.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
        int end = newText.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
        boolean goodIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
            newText.setIndex(end);  // some buggy iterators throw an exception here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
            goodIterator = newText.getIndex() == end;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
        catch(IllegalArgumentException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
            goodIterator = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
        if (goodIterator) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
            text = newText;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
            text = new SafeCharIterator(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
        text.first();
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   898
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   899
        cachedLastKnownBreak = BreakIterator.DONE;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
    // implementation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
     * This method is the actual implementation of the next() method.  All iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
     * vectors through here.  This method initializes the state machine to state 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
     * and advances through the text character by character until we reach the end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
     * of the text or the state machine transitions to state 0.  We update our return
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
     * value every time the state machine passes through a possible end state.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
    protected int handleNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
        // if we're already at the end of the text, return DONE.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
        if (text.getIndex() == text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
            return BreakIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
        // no matter what, we always advance at least one character forward
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
        int result = getNextIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
        int lookaheadResult = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
        // begin in state 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
        int state = START_STATE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
        int category;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
        // loop until we reach the end of the text or transition to state 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
        while (c != CharacterIterator.DONE && state != STOP_STATE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
            // look up the current character's character category (which tells us
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
            // which column in the state table to look at)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
            // if the character isn't an ignore character, look up a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
            // transition in the state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
            if (category != IGNORE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
                state = lookupState(state, category);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
            // if the state we've just transitioned to is a lookahead state,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
            // (but not also an end state), save its position.  If it's
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
            // both a lookahead state and an end state, update the break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
            // to the last saved lookup-state position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
            if (lookaheadStates[state]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
                if (endStates[state]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
                    result = lookaheadResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
                    lookaheadResult = getNextIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
            // otherwise, if the state we've just transitioned to is an accepting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
            // state, update the break position to be the current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
                if (endStates[state]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
                    result = getNextIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
            c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
        // if we've run off the end of the text, and the very last character took us into
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
        // a lookahead state, advance the break position to the lookahead position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
        // (the theory here is that if there are no characters at all after the lookahead
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
        // position, that always matches the lookahead criteria)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
        if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
            result = lookaheadResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
        text.setIndex(result);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
     * This method backs the iterator back up to a "safe position" in the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
     * This is a position that we know, without any context, must be a break position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
     * The various calling methods then iterate forward from this safe position to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
     * the appropriate position to return.  (For more information, see the description
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
     * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
    protected int handlePrevious() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
        int state = START_STATE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
        int category = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
        int lastCategory = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
        // loop until we reach the beginning of the text or transition to state 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
        while (c != CharacterIterator.DONE && state != STOP_STATE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
            // save the last character's category and look up the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
            // character's category
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
            lastCategory = category;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
            // if the current character isn't an ignore character, look up a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
            // state transition in the backwards state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
            if (category != IGNORE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
                state = lookupBackwardState(state, category);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
            // then advance one character backwards
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
            c = getPrevious();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
        // if we didn't march off the beginning of the text, we're either one or two
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
        // positions away from the real break position.  (One because of the call to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
        // previous() at the end of the loop above, and another because the character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
        // that takes us into the stop state will always be the character BEFORE
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
        // the break position.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
        if (c != CharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
            if (lastCategory != IGNORE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
                getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
                getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
                getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
        return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
     * Looks up a character's category (i.e., its category for breaking purposes,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
     * not its Unicode category)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
    protected int lookupCategory(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
        if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
            return charCategoryTable.elementAt((char)c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
            return supplementaryCharCategoryTable.getValue(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
     * Given a current state and a character category, looks up the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
     * next state to transition to in the state table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
    protected int lookupState(int state, int category) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
        return stateTable[state * numCategories + category];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
     * Given a current state and a character category, looks up the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
     * next state to transition to in the backwards state table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
    protected int lookupBackwardState(int state, int category) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
        return backwardsStateTable[state * numCategories + category];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1056
    static long getLong(byte[] buf, int offset) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1057
        long num = buf[offset]&0xFF;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1058
        for (int i = 1; i < 8; i++) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1059
            num = num<<8 | (buf[offset+i]&0xFF);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1060
        }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1061
        return num;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1062
    }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1063
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1064
    static int getInt(byte[] buf, int offset) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1065
        int num = buf[offset]&0xFF;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1066
        for (int i = 1; i < 4; i++) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1067
            num = num<<8 | (buf[offset+i]&0xFF);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1068
        }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1069
        return num;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1070
    }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1071
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1072
    static short getShort(byte[] buf, int offset) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1073
        short num = (short)(buf[offset]&0xFF);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1074
        num = (short)(num<<8 | (buf[offset+1]&0xFF));
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1075
        return num;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1076
    }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1077
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
     * This class exists to work around a bug in incorrect implementations
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
     * of CharacterIterator, which incorrectly handle setIndex(endIndex).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
     * This iterator relies only on base.setIndex(n) where n is less than
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
     * endIndex.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
     * One caveat:  if the base iterator's begin and end indices change
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
     * the change will not be reflected by this wrapper.  Does that matter?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1087
    // TODO: Review this class to see if it's still required.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
    private static final class SafeCharIterator implements CharacterIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
                                                           Cloneable {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
        private CharacterIterator base;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
        private int rangeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
        private int rangeLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
        private int currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
        SafeCharIterator(CharacterIterator base) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
            this.base = base;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
            this.rangeStart = base.getBeginIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
            this.rangeLimit = base.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
            this.currentIndex = base.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1103
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
        public char first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
            return setIndex(rangeStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1108
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
        public char last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
            return setIndex(rangeLimit - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1113
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
        public char current() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
            if (currentIndex < rangeStart || currentIndex >= rangeLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
                return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
                return base.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1123
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
        public char next() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
            currentIndex++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
            if (currentIndex >= rangeLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
                currentIndex = rangeLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
                return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
                return base.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1136
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
        public char previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            currentIndex--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
            if (currentIndex < rangeStart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
                currentIndex = rangeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
                return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
                return base.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1149
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
        public char setIndex(int i) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
            if (i < rangeStart || i > rangeLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
                throw new IllegalArgumentException("Invalid position");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
            currentIndex = i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
            return current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1159
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
        public int getBeginIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
            return rangeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1164
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
        public int getEndIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
            return rangeLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1169
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
        public int getIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
            return currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1174
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
        public Object clone() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
            SafeCharIterator copy = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
                copy = (SafeCharIterator) super.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
            catch(CloneNotSupportedException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
                throw new Error("Clone not supported: " + e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
            CharacterIterator copyOfBase = (CharacterIterator) base.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
            copy.base = copyOfBase;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
            return copy;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
}