jdk/src/java.base/share/classes/sun/util/locale/provider/RuleBasedBreakIterator.java
author naoto
Mon, 12 Sep 2016 09:38:32 -0700
changeset 40813 dd5aa9c67561
parent 36511 9d0388c6b336
permissions -rw-r--r--
8165605: Thai resources in jdk.localedata cause split package issue with java.base Reviewed-by: mchung, erikj
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
     2
 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 4844
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * The original version of this source code and documentation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * is protected by multiple US and International patents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    41
package sun.util.locale.provider;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
import java.io.BufferedInputStream;
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
    44
import java.io.InputStream;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
import java.io.IOException;
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
    46
import java.lang.reflect.Module;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
import java.security.AccessController;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
import java.security.PrivilegedActionException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
import java.security.PrivilegedExceptionAction;
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    50
import java.text.BreakIterator;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
import java.text.CharacterIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
import java.text.StringCharacterIterator;
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
    53
import java.util.MissingResourceException;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
import sun.text.CompactByteArray;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
import sun.text.SupplementaryCharacterData;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * <p>There are two kinds of rules, which are separated by semicolons: <i>substitutions</i>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * and <i>regular expressions.</i></p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * <p>A substitution rule defines a name that can be used in place of an expression. It
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * consists of a name, which is a string of characters contained in angle brackets, an equals
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * sign, and an expression. (There can be no whitespace on either side of the equals sign.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * To keep its syntactic meaning intact, the expression must be enclosed in parentheses or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * square brackets. A substitution is visible after its definition, and is filled in using
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * simple textual substitution. Substitution definitions can contain other substitutions, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * long as those substitutions have been defined first. Substitutions are generally used to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * make the regular expressions (which can get quite complex) shorted and easier to read.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * They typically define either character categories or commonly-used subexpressions.</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 * <p>There is one special substitution.&nbsp; If the description defines a substitution
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 * called &quot;&lt;ignore&gt;&quot;, the expression must be a [] expression, and the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
 * expression defines a set of characters (the &quot;<em>ignore characters</em>&quot;) that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
 * will be transparent to the BreakIterator.&nbsp; A sequence of characters will break the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
 * same way it would if any ignore characters it contains are taken out.&nbsp; Break
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
 * positions never occur befoer ignore characters.</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
 * <p>A regular expression uses a subset of the normal Unix regular-expression syntax, and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
 * defines a sequence of characters to be kept together. With one significant exception, the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
 * iterator uses a longest-possible-match algorithm when matching text to regular
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
 * expressions. The iterator also treats descriptions containing multiple regular expressions
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
 * as if they were ORed together (i.e., as if they were separated by |).</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
 * <p>The special characters recognized by the regular-expression parser are as follows:</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
 * <blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
 *   <table border="1" width="100%">
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
 *       <td width="6%">*</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
 *       <td width="94%">Specifies that the expression preceding the asterisk may occur any number
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
 *       of times (including not at all).</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
 *       <td width="6%">{}</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
 *       <td width="94%">Encloses a sequence of characters that is optional.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
 *       <td width="6%">()</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
 *       <td width="94%">Encloses a sequence of characters.&nbsp; If followed by *, the sequence
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
 *       repeats.&nbsp; Otherwise, the parentheses are just a grouping device and a way to delimit
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
 *       the ends of expressions containing |.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
 *       <td width="6%">|</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
 *       <td width="94%">Separates two alternative sequences of characters.&nbsp; Either one
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
 *       sequence or the other, but not both, matches this expression.&nbsp; The | character can
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
 *       only occur inside ().</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
 *       <td width="6%">.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
 *       <td width="94%">Matches any character.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
 *       <td width="6%">*?</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
 *       <td width="94%">Specifies a non-greedy asterisk.&nbsp; *? works the same way as *, except
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
 *       when there is overlap between the last group of characters in the expression preceding the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
 *       * and the first group of characters following the *.&nbsp; When there is this kind of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
 *       overlap, * will match the longest sequence of characters that match the expression before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
 *       the *, and *? will match the shortest sequence of characters matching the expression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
 *       before the *?.&nbsp; For example, if you have &quot;xxyxyyyxyxyxxyxyxyy&quot; in the text,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
 *       &quot;x[xy]*x&quot; will match through to the last x (i.e., &quot;<strong>xxyxyyyxyxyxxyxyx</strong>yy&quot;,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
 *       but &quot;x[xy]*?x&quot; will only match the first two xes (&quot;<strong>xx</strong>yxyyyxyxyxxyxyxyy&quot;).</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
 *       <td width="6%">[]</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
 *       <td width="94%">Specifies a group of alternative characters.&nbsp; A [] expression will
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
 *       match any single character that is specified in the [] expression.&nbsp; For more on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
 *       syntax of [] expressions, see below.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
 *       <td width="6%">/</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
 *       <td width="94%">Specifies where the break position should go if text matches this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
 *       expression.&nbsp; (e.g., &quot;[a-z]&#42;/[:Zs:]*[1-0]&quot; will match if the iterator sees a run
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
 *       of letters, followed by a run of whitespace, followed by a digit, but the break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
 *       will actually go before the whitespace).&nbsp; Expressions that don't contain / put the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
 *       break position at the end of the matching text.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
 *       <td width="6%">\</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
 *       <td width="94%">Escape character.&nbsp; The \ itself is ignored, but causes the next
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
 *       character to be treated as literal character.&nbsp; This has no effect for many
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
 *       characters, but for the characters listed above, this deprives them of their special
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
 *       meaning.&nbsp; (There are no special escape sequences for Unicode characters, or tabs and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
 *       newlines; these are all handled by a higher-level protocol.&nbsp; In a Java string,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
 *       &quot;\n&quot; will be converted to a literal newline character by the time the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
 *       regular-expression parser sees it.&nbsp; Of course, this means that \ sequences that are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
 *       visible to the regexp parser must be written as \\ when inside a Java string.)&nbsp; All
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
 *       characters in the ASCII range except for letters, digits, and control characters are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
 *       reserved characters to the parser and must be preceded by \ even if they currently don't
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
 *       mean anything.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
 *       <td width="6%">!</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
 *       <td width="94%">If ! appears at the beginning of a regular expression, it tells the regexp
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
 *       parser that this expression specifies the backwards-iteration behavior of the iterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
 *       and not its normal iteration behavior.&nbsp; This is generally only used in situations
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
 *       where the automatically-generated backwards-iteration brhavior doesn't produce
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
 *       satisfactory results and must be supplemented with extra client-specified rules.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
 *       <td width="6%"><em>(all others)</em></td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
 *       <td width="94%">All other characters are treated as literal characters, which must match
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
 *       the corresponding character(s) in the text exactly.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
 *   </table>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
 * </blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
 * <p>Within a [] expression, a number of other special characters can be used to specify
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
 * groups of characters:</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
 * <blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
 *   <table border="1" width="100%">
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
 *       <td width="6%">-</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
 *       <td width="94%">Specifies a range of matching characters.&nbsp; For example
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
 *       &quot;[a-p]&quot; matches all lowercase Latin letters from a to p (inclusive).&nbsp; The -
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
 *       sign specifies ranges of continuous Unicode numeric values, not ranges of characters in a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
 *       language's alphabetical order: &quot;[a-z]&quot; doesn't include capital letters, nor does
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
 *       it include accented letters such as a-umlaut.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
 *       <td width="6%">::</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
 *       <td width="94%">A pair of colons containing a one- or two-letter code matches all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
 *       characters in the corresponding Unicode category.&nbsp; The two-letter codes are the same
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
 *       as the two-letter codes in the Unicode database (for example, &quot;[:Sc::Sm:]&quot;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
 *       matches all currency symbols and all math symbols).&nbsp; Specifying a one-letter code is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
 *       the same as specifying all two-letter codes that begin with that letter (for example,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
 *       &quot;[:L:]&quot; matches all letters, and is equivalent to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
 *       &quot;[:Lu::Ll::Lo::Lm::Lt:]&quot;).&nbsp; Anything other than a valid two-letter Unicode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
 *       category code or a single letter that begins a Unicode category code is illegal within
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
 *       colons.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
 *       <td width="6%">[]</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
 *       <td width="94%">[] expressions can nest.&nbsp; This has no effect, except when used in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
 *       conjunction with the ^ token.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
 *       <td width="6%">^</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
 *       <td width="94%">Excludes the character (or the characters in the [] expression) following
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
 *       it from the group of characters.&nbsp; For example, &quot;[a-z^p]&quot; matches all Latin
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
 *       lowercase letters except p.&nbsp; &quot;[:L:^[&#92;u4e00-&#92;u9fff]]&quot; matches all letters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
 *       except the Han ideographs.</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
 *     <tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
 *       <td width="6%"><em>(all others)</em></td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
 *       <td width="94%">All other characters are treated as literal characters.&nbsp; (For
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
 *       example, &quot;[aeiou]&quot; specifies just the letters a, e, i, o, and u.)</td>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
 *     </tr>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
 *   </table>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
 * </blockquote>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
 * <p>For a more complete explanation, see <a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
 * href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
 * &nbsp; For examples, see the resource data (which is annotated).</p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
 * @author Richard Gillam
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
class RuleBasedBreakIterator extends BreakIterator {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
     * A token used as a character-category value to identify ignore characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
    protected static final byte IGNORE = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
     * The state number of the starting state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
    private static final short START_STATE = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
     * The state-transition value indicating "stop"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
    private static final short STOP_STATE = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
     * Magic number for the BreakIterator data file format.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
    static final byte[] LABEL = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
        (byte)'B', (byte)'I', (byte)'d', (byte)'a', (byte)'t', (byte)'a',
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
        (byte)'\0'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
    static final int    LABEL_LENGTH = LABEL.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
     * Version number of the dictionary that was read in.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
    static final byte supportedVersion = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
     * Header size in byte count
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
    private static final int HEADER_LENGTH = 36;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
     * An array length of indices for BMP characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
    private static final int BMP_INDICES_LENGTH = 512;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
     * Tables that indexes from character values to character category numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
    private CompactByteArray charCategoryTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
    private SupplementaryCharacterData supplementaryCharCategoryTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
     * The table of state transitions used for forward iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
    private short[] stateTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
     * The table of state transitions used to sync up the iterator with the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
     * text in backwards and random-access iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
    private short[] backwardsStateTable = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
     * A list of flags indicating which states in the state table are accepting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
     * ("end") states
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
    private boolean[] endStates = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
     * A list of flags indicating which states in the state table are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
     * lookahead states (states which turn lookahead on and off)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    private boolean[] lookaheadStates = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
     * A table for additional data. May be used by a subclass of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
     * RuleBasedBreakIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
    private byte[] additionalData = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
     * The number of character categories (and, thus, the number of columns in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
     * the state tables)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
    private int numCategories;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
     * The character iterator through which this BreakIterator accesses the text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
    private CharacterIterator text = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
     * A CRC32 value of all data in datafile
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
    private long checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
    // constructors
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
    /**
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   318
     * Constructs a RuleBasedBreakIterator according to the module and the datafile
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
     * provided.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
     */
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   321
    RuleBasedBreakIterator(Module module, String datafile)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
        throws IOException, MissingResourceException {
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   323
        readTables(module, datafile);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
     * Read datafile. The datafile's format is as follows:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
     *   BreakIteratorData {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
     *       u1           magic[7];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
     *       u1           version;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
     *       u4           totalDataSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
     *       header_info  header;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
     *       body         value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
     * <code>totalDataSize</code> is the summation of the size of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
     * <code>header_info</code> and <code>body</code> in byte count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
     * In <code>header</code>, each field except for checksum implies the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
     * length of each field. Since <code>BMPdataLength</code> is a fixed-length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
     *  data(512 entries), its length isn't included in <code>header</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
     * <code>checksum</code> is a CRC32 value of all in <code>body</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
     *   header_info {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
     *       u4           stateTableLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
     *       u4           backwardsStateTableLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
     *       u4           endStatesLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
     *       u4           lookaheadStatesLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
     *       u4           BMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
     *       u4           nonBMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
     *       u4           additionalDataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
     *       u8           checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
     * Finally, <code>BMPindices</code> and <code>BMPdata</code> are set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
     * <code>charCategoryTable</code>. <code>nonBMPdata</code> is set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
     * <code>supplementaryCharCategoryTable</code>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
     * <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
     *   body {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
     *       u2           stateTable[stateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
     *       u2           backwardsStateTable[backwardsStateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
     *       u1           endStates[endStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
     *       u1           lookaheadStates[lookaheadStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
     *       u2           BMPindices[512];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
     *       u1           BMPdata[BMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
     *       u4           nonBMPdata[numNonBMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
     *       u1           additionalData[additionalDataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
     *   }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
     * </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
     */
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   374
    protected final void readTables(Module module, String datafile)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   377
        byte[] buffer = readFile(module, datafile);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        /* Read header_info. */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   380
        int stateTableLength = getInt(buffer, 0);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   381
        int backwardsStateTableLength = getInt(buffer, 4);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   382
        int endStatesLength = getInt(buffer, 8);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   383
        int lookaheadStatesLength = getInt(buffer, 12);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   384
        int BMPdataLength = getInt(buffer, 16);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   385
        int nonBMPdataLength = getInt(buffer, 20);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   386
        int additionalDataLength = getInt(buffer, 24);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   387
        checksum = getLong(buffer, 28);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
        /* Read stateTable[numCategories * numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
        stateTable = new short[stateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
        int offset = HEADER_LENGTH;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
        for (int i = 0; i < stateTableLength; i++, offset+=2) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   393
           stateTable[i] = getShort(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
        /* Read backwardsStateTable[numCategories * numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
        backwardsStateTable = new short[backwardsStateTableLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
        for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   399
           backwardsStateTable[i] = getShort(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
        /* Read endStates[numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
        endStates = new boolean[endStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
        for (int i = 0; i < endStatesLength; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
           endStates[i] = buffer[offset] == 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
        /* Read lookaheadStates[numRows] */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
        lookaheadStates = new boolean[lookaheadStatesLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
        for (int i = 0; i < lookaheadStatesLength; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
           lookaheadStates[i] = buffer[offset] == 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
        /* Read a category table and indices for BMP characters. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
        short[] temp1 = new short[BMP_INDICES_LENGTH];  // BMPindices
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
        for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   417
            temp1[i] = getShort(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
        byte[] temp2 = new byte[BMPdataLength];  // BMPdata
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
        System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
        offset += BMPdataLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
        charCategoryTable = new CompactByteArray(temp1, temp2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
        /* Read a category table for non-BMP characters. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
        int[] temp3 = new int[nonBMPdataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
        for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   427
            temp3[i] = getInt(buffer, offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
        supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
        /* Read additional data */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
        if (additionalDataLength > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
            additionalData = new byte[additionalDataLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
            System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
        /* Set numCategories */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
        numCategories = stateTable.length / endStates.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   441
    protected byte[] readFile(final Module module, final String datafile)
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
        BufferedInputStream is;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
        try {
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   446
            PrivilegedExceptionAction<BufferedInputStream> pa = () -> {
40813
dd5aa9c67561 8165605: Thai resources in jdk.localedata cause split package issue with java.base
naoto
parents: 36511
diff changeset
   447
                String pathName = "jdk.localedata".equals(module.getName()) ?
dd5aa9c67561 8165605: Thai resources in jdk.localedata cause split package issue with java.base
naoto
parents: 36511
diff changeset
   448
                     "sun/text/resources/ext/" :
dd5aa9c67561 8165605: Thai resources in jdk.localedata cause split package issue with java.base
naoto
parents: 36511
diff changeset
   449
                     "sun/text/resources/";
dd5aa9c67561 8165605: Thai resources in jdk.localedata cause split package issue with java.base
naoto
parents: 36511
diff changeset
   450
                InputStream in = module.getResourceAsStream(pathName + datafile);
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   451
                if (in == null) {
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   452
                    // Try to load the file with "java.base" module instance. Assumption
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   453
                    // here is that the fall back data files to be read should reside in
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   454
                    // java.base.
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   455
                    in = RuleBasedBreakIterator.class.getModule().getResourceAsStream("sun/text/resources/" + datafile);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
                }
36511
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   457
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   458
                return new BufferedInputStream(in);
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   459
            };
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   460
            is = AccessController.doPrivileged(pa);
9d0388c6b336 8142968: Module System implementation
alanb
parents: 26219
diff changeset
   461
        } catch (PrivilegedActionException e) {
10419
12c063b39232 7084245: Update usages of InternalError to use exception chaining
sherman
parents: 7668
diff changeset
   462
            throw new InternalError(e.toString(), e);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        int offset = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
        /* First, read magic, version, and header_info. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
        int len = LABEL_LENGTH + 5;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
        byte[] buf = new byte[len];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
        if (is.read(buf) != len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
            throw new MissingResourceException("Wrong header length",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                                               datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        /* Validate the magic number. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        for (int i = 0; i < LABEL_LENGTH; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
            if (buf[offset] != LABEL[offset]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
                throw new MissingResourceException("Wrong magic number",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                                                   datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
        /* Validate the version number. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
        if (buf[offset] != supportedVersion) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
            throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
                                               datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
        /* Read data: totalDataSize + 8(for checksum) */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   490
        len = getInt(buf, ++offset);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
        buf = new byte[len];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
        if (is.read(buf) != len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
            throw new MissingResourceException("Wrong data length",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
                                               datafile, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
        is.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
        return buf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
    byte[] getAdditionalData() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
        return additionalData;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
    void setAdditionalData(byte[] b) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        additionalData = b;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
    // boilerplate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
     * Clones this iterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
     * @return A newly-constructed RuleBasedBreakIterator with the same
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
     * behavior as this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   518
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
    public Object clone() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
        RuleBasedBreakIterator result = (RuleBasedBreakIterator) super.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
        if (text != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
            result.text = (CharacterIterator) text.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
     * Returns true if both BreakIterators are of the same class, have the same
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
     * rules, and iterate over the same text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   531
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
    public boolean equals(Object that) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
            if (that == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
                return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
            RuleBasedBreakIterator other = (RuleBasedBreakIterator) that;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
            if (checksum != other.checksum) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
                return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
            if (text == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
                return other.text == null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
                return text.equals(other.text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
        catch(ClassCastException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
     * Returns text
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   556
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
    public String toString() {
26219
1a19360ff122 8054714: Use StringJoiner where it makes the code cleaner
igerasim
parents: 25859
diff changeset
   558
        return "[checksum=0x" + Long.toHexString(checksum) + ']';
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
     * Compute a hashcode for this BreakIterator
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
     * @return A hash code
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   565
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
    public int hashCode() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
        return (int)checksum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
    // BreakIterator overrides
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
     * Sets the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
     * (i.e., the CharacterIterator's starting offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
     * @return The offset of the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   579
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
    public int first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
        CharacterIterator t = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
        t.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
        return t.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
     * Sets the current iteration position to the end of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
     * (i.e., the CharacterIterator's ending offset).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
     * @return The text's past-the-end offset.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   592
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
    public int last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
        CharacterIterator t = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
        // I'm not sure why, but t.last() returns the offset of the last character,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
        // rather than the past-the-end offset
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
        t.setIndex(t.getEndIndex());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
        return t.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
     * Advances the iterator either forward or backward the specified number of steps.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
     * Negative values move backward, and positive values move forward.  This is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
     * equivalent to repeatedly calling next() or previous().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
     * @param n The number of steps to move.  The sign indicates the direction
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
     * (negative is backwards, and positive is forwards).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
     * @return The character offset of the boundary position n boundaries away from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
     * the current one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   611
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
    public int next(int n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
        int result = current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
        while (n > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
            result = handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
            --n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
        while (n < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
            result = previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
            ++n;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
     * Advances the iterator to the next boundary position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
     * @return The position of the first boundary after this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   629
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
    public int next() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
        return handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   634
    private int cachedLastKnownBreak = BreakIterator.DONE;
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   635
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
     * Advances the iterator backwards, to the last boundary preceding this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
     * @return The position of the last boundary position preceding this one.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   640
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
    public int previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
        // if we're already sitting at the beginning of the text, return DONE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
        if (current() == text.getBeginIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
            return BreakIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
        // set things up.  handlePrevious() will back us up to some valid
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
        // break position before the current position (we back our internal
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
        // iterator up one step to prevent handlePrevious() from returning
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
        // the current position), but not necessarily the last one before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
        // where we started
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
        int start = current();
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   654
        int lastResult = cachedLastKnownBreak;
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   655
        if (lastResult >= start || lastResult <= BreakIterator.DONE) {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   656
            getPrevious();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   657
            lastResult = handlePrevious();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   658
        } else {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   659
            //it might be better to check if handlePrevious() give us closer
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   660
            //safe value but handlePrevious() is slow too
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   661
            //So, this has to be done carefully
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   662
            text.setIndex(lastResult);
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   663
        }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
        int result = lastResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
        // iterate forward from the known break position until we pass our
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
        // starting point.  The last break position before the starting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
        // point is our return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
        while (result != BreakIterator.DONE && result < start) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
            lastResult = result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
            result = handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
        // set the current iteration position to be the last break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
        // before where we started, and then return that value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
        text.setIndex(lastResult);
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   677
        cachedLastKnownBreak = lastResult;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
        return lastResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
     * Returns previous character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
    private int getPrevious() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
        char c2 = text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
        if (Character.isLowSurrogate(c2) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
            text.getIndex() > text.getBeginIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
            char c1 = text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
            if (Character.isHighSurrogate(c1)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
                return Character.toCodePoint(c1, c2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
                text.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
        return (int)c2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
     * Returns current character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
    int getCurrent() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
        char c1 = text.current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
        if (Character.isHighSurrogate(c1) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
            text.getIndex() < text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
            char c2 = text.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
            text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
            if (Character.isLowSurrogate(c2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
                return Character.toCodePoint(c1, c2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
        return (int)c1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
     * Returns the count of next character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
    private int getCurrentCodePointCount() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
        char c1 = text.current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
        if (Character.isHighSurrogate(c1) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
            text.getIndex() < text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
            char c2 = text.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
            text.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
            if (Character.isLowSurrogate(c2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
                return 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
        return 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
     * Returns next character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
    int getNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
        int index = text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
        int endIndex = text.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
        if (index == endIndex ||
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   737
            (index += getCurrentCodePointCount()) >= endIndex) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
            return CharacterIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
        text.setIndex(index);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
        return getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
     * Returns the position of next character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
    private int getNextIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
        int index = text.getIndex() + getCurrentCodePointCount();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
        int endIndex = text.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
        if (index > endIndex) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
            return endIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
            return index;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
     * Throw IllegalArgumentException unless begin <= offset < end.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
    protected static final void checkOffset(int offset, CharacterIterator text) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
        if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
            throw new IllegalArgumentException("offset out of bounds");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
     * Sets the iterator to refer to the first boundary position following
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
     * the specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
     * @offset The position from which to begin searching for a break position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
     * @return The position of the first break after the current position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   772
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
    public int following(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
        // Set our internal iteration position (temporarily)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
        // to the position passed in.  If this is the _beginning_ position,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
        // then we can just use next() to get our return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
        text.setIndex(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
        if (offset == text.getBeginIndex()) {
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   783
            cachedLastKnownBreak = handleNext();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   784
            return cachedLastKnownBreak;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
        // otherwise, we have to sync up first.  Use handlePrevious() to back
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
        // us up to a known break position before the specified position (if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
        // we can determine that the specified position is a break position,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
        // we don't back up at all).  This may or may not be the last break
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
        // position at or before our starting position.  Advance forward
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
        // from here until we've passed the starting position.  The position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
        // we stop on will be the first break position after the specified one.
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   794
        int result = cachedLastKnownBreak;
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   795
        if (result >= offset || result <= BreakIterator.DONE) {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   796
            result = handlePrevious();
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   797
        } else {
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   798
            //it might be better to check if handlePrevious() give us closer
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   799
            //safe value but handlePrevious() is slow too
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   800
            //So, this has to be done carefully
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   801
            text.setIndex(result);
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   802
        }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
        while (result != BreakIterator.DONE && result <= offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
            result = handleNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
        }
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   806
        cachedLastKnownBreak = result;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
     * Sets the iterator to refer to the last boundary position before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
     * specified position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
     * @offset The position to begin searching for a break from.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
     * @return The position of the last boundary before the starting position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   816
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
    public int preceding(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
        // if we start by updating the current iteration position to the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
        // position specified by the caller, we can just use previous()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
        // to carry out this operation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
        text.setIndex(offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
        return previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
    /**
21278
ef8a3a2a72f2 8022746: List of spelling errors in API doc
malenkov
parents: 13583
diff changeset
   828
     * Returns true if the specified position is a boundary position.  As a side
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
     * effect, leaves the iterator pointing to the first boundary position at
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
     * or after "offset".
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
     * @param offset the offset to check.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
     * @return True if "offset" is a boundary position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   834
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
    public boolean isBoundary(int offset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
        checkOffset(offset, text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
        if (offset == text.getBeginIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
        // to check whether this is a boundary, we can use following() on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
        // position before the specified one and return true if the position we
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
        // get back is the one the user specified
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
            return following(offset - 1) == offset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
     * Returns the current iteration position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
     * @return The current iteration position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   854
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
    public int current() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
        return getText().getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
     * Return a CharacterIterator over the text being analyzed.  This version
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
     * of this method returns the actual CharacterIterator we're using internally.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
     * Changing the state of this iterator can have undefined consequences.  If
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
     * you need to change it, clone it first.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
     * @return An iterator over the text being analyzed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   866
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
    public CharacterIterator getText() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
        // The iterator is initialized pointing to no text at all, so if this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
        // function is called while we're in that state, we have to fudge an
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
        // iterator to return.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
        if (text == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
            text = new StringCharacterIterator("");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
        return text;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
     * Set the iterator to analyze a new piece of text.  This function resets
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
     * the current iteration position to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
     * @param newText An iterator over the text to analyze.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
   882
    @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
    public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
        // Test iterator to see if we need to wrap it in a SafeCharIterator.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
        // The correct behavior for CharacterIterators is to allow the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
        // position to be set to the endpoint of the iterator.  Many
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
        // CharacterIterators do not uphold this, so this is a workaround
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
        // to permit them to use this class.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
        int end = newText.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
        boolean goodIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
            newText.setIndex(end);  // some buggy iterators throw an exception here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
            goodIterator = newText.getIndex() == end;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
        catch(IllegalArgumentException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
            goodIterator = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
        if (goodIterator) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
            text = newText;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
            text = new SafeCharIterator(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
        text.first();
4844
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   906
68e39b0ed557 6868503: RuleBasedBreakIterator is inefficient
peytoia
parents: 2
diff changeset
   907
        cachedLastKnownBreak = BreakIterator.DONE;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
    // implementation
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
    //=======================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
     * This method is the actual implementation of the next() method.  All iteration
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
     * vectors through here.  This method initializes the state machine to state 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
     * and advances through the text character by character until we reach the end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
     * of the text or the state machine transitions to state 0.  We update our return
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
     * value every time the state machine passes through a possible end state.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
    protected int handleNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
        // if we're already at the end of the text, return DONE.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
        if (text.getIndex() == text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
            return BreakIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
        // no matter what, we always advance at least one character forward
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
        int result = getNextIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
        int lookaheadResult = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
        // begin in state 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
        int state = START_STATE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
        int category;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
        // loop until we reach the end of the text or transition to state 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
        while (c != CharacterIterator.DONE && state != STOP_STATE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
            // look up the current character's character category (which tells us
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
            // which column in the state table to look at)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
            // if the character isn't an ignore character, look up a state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
            // transition in the state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
            if (category != IGNORE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
                state = lookupState(state, category);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
            // if the state we've just transitioned to is a lookahead state,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
            // (but not also an end state), save its position.  If it's
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
            // both a lookahead state and an end state, update the break position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
            // to the last saved lookup-state position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
            if (lookaheadStates[state]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
                if (endStates[state]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
                    result = lookaheadResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
                    lookaheadResult = getNextIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
            // otherwise, if the state we've just transitioned to is an accepting
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
            // state, update the break position to be the current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
                if (endStates[state]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
                    result = getNextIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
            c = getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
        // if we've run off the end of the text, and the very last character took us into
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
        // a lookahead state, advance the break position to the lookahead position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
        // (the theory here is that if there are no characters at all after the lookahead
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
        // position, that always matches the lookahead criteria)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
        if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
            result = lookaheadResult;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
        text.setIndex(result);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
     * This method backs the iterator back up to a "safe position" in the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
     * This is a position that we know, without any context, must be a break position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
     * The various calling methods then iterate forward from this safe position to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
     * the appropriate position to return.  (For more information, see the description
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
     * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
    protected int handlePrevious() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
        CharacterIterator text = getText();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
        int state = START_STATE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
        int category = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
        int lastCategory = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
        int c = getCurrent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
        // loop until we reach the beginning of the text or transition to state 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
        while (c != CharacterIterator.DONE && state != STOP_STATE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
            // save the last character's category and look up the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
            // character's category
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
            lastCategory = category;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
            category = lookupCategory(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
            // if the current character isn't an ignore character, look up a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
            // state transition in the backwards state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
            if (category != IGNORE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
                state = lookupBackwardState(state, category);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
            // then advance one character backwards
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
            c = getPrevious();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
        // if we didn't march off the beginning of the text, we're either one or two
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
        // positions away from the real break position.  (One because of the call to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
        // previous() at the end of the loop above, and another because the character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
        // that takes us into the stop state will always be the character BEFORE
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
        // the break position.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
        if (c != CharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
            if (lastCategory != IGNORE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
                getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
                getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
                getNext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
        return text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
     * Looks up a character's category (i.e., its category for breaking purposes,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
     * not its Unicode category)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
    protected int lookupCategory(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
        if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
            return charCategoryTable.elementAt((char)c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
            return supplementaryCharCategoryTable.getValue(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
     * Given a current state and a character category, looks up the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
     * next state to transition to in the state table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
    protected int lookupState(int state, int category) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
        return stateTable[state * numCategories + category];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
     * Given a current state and a character category, looks up the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
     * next state to transition to in the backwards state table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
    protected int lookupBackwardState(int state, int category) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
        return backwardsStateTable[state * numCategories + category];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1064
    static long getLong(byte[] buf, int offset) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1065
        long num = buf[offset]&0xFF;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1066
        for (int i = 1; i < 8; i++) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1067
            num = num<<8 | (buf[offset+i]&0xFF);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1068
        }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1069
        return num;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1070
    }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1071
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1072
    static int getInt(byte[] buf, int offset) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1073
        int num = buf[offset]&0xFF;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1074
        for (int i = 1; i < 4; i++) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1075
            num = num<<8 | (buf[offset+i]&0xFF);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1076
        }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1077
        return num;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1078
    }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1079
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1080
    static short getShort(byte[] buf, int offset) {
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1081
        short num = (short)(buf[offset]&0xFF);
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1082
        num = (short)(num<<8 | (buf[offset+1]&0xFF));
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1083
        return num;
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1084
    }
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1085
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
     * This class exists to work around a bug in incorrect implementations
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
     * of CharacterIterator, which incorrectly handle setIndex(endIndex).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
     * This iterator relies only on base.setIndex(n) where n is less than
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
     * endIndex.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
     * One caveat:  if the base iterator's begin and end indices change
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
     * the change will not be reflected by this wrapper.  Does that matter?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
     */
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1095
    // TODO: Review this class to see if it's still required.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
    private static final class SafeCharIterator implements CharacterIterator,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
                                                           Cloneable {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
        private CharacterIterator base;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
        private int rangeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
        private int rangeLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
        private int currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
        SafeCharIterator(CharacterIterator base) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
            this.base = base;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
            this.rangeStart = base.getBeginIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
            this.rangeLimit = base.getEndIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
            this.currentIndex = base.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1111
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
        public char first() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
            return setIndex(rangeStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1116
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
        public char last() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
            return setIndex(rangeLimit - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1121
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
        public char current() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
            if (currentIndex < rangeStart || currentIndex >= rangeLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
                return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
                return base.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1131
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
        public char next() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
            currentIndex++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
            if (currentIndex >= rangeLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
                currentIndex = rangeLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
                return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
                return base.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1144
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
        public char previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
            currentIndex--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
            if (currentIndex < rangeStart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                currentIndex = rangeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
                return base.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1157
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
        public char setIndex(int i) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
            if (i < rangeStart || i > rangeLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
                throw new IllegalArgumentException("Invalid position");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
            currentIndex = i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
            return current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1167
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
        public int getBeginIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
            return rangeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1172
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
        public int getEndIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
            return rangeLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1177
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
        public int getIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
            return currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
13583
dc0017b1a452 6336885: RFE: Locale Data Deployment Enhancements
naoto
parents: 12848
diff changeset
  1182
        @Override
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
        public Object clone() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
            SafeCharIterator copy = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
                copy = (SafeCharIterator) super.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
            catch(CloneNotSupportedException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
                throw new Error("Clone not supported: " + e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
            CharacterIterator copyOfBase = (CharacterIterator) base.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
            copy.base = copyOfBase;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
            return copy;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
}