jdk/src/java.base/share/classes/sun/util/locale/provider/RuleBasedBreakIterator.java
changeset 42560 95af45781076
parent 42559 f71b844f33d1
parent 41945 31f5023200d4
child 42561 84b1f0f39cb0
equal deleted inserted replaced
42559:f71b844f33d1 42560:95af45781076
     1 /*
       
     2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 /*
       
    27  *
       
    28  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
       
    29  * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
       
    30  *
       
    31  * The original version of this source code and documentation
       
    32  * is copyrighted and owned by Taligent, Inc., a wholly-owned
       
    33  * subsidiary of IBM. These materials are provided under terms
       
    34  * of a License Agreement between Taligent and Sun. This technology
       
    35  * is protected by multiple US and International patents.
       
    36  *
       
    37  * This notice and attribution to Taligent may not be removed.
       
    38  * Taligent is a registered trademark of Taligent, Inc.
       
    39  */
       
    40 
       
    41 package sun.util.locale.provider;
       
    42 
       
    43 import java.io.BufferedInputStream;
       
    44 import java.io.InputStream;
       
    45 import java.io.IOException;
       
    46 import java.lang.reflect.Module;
       
    47 import java.security.AccessController;
       
    48 import java.security.PrivilegedActionException;
       
    49 import java.security.PrivilegedExceptionAction;
       
    50 import java.text.BreakIterator;
       
    51 import java.text.CharacterIterator;
       
    52 import java.text.StringCharacterIterator;
       
    53 import java.util.MissingResourceException;
       
    54 import sun.text.CompactByteArray;
       
    55 import sun.text.SupplementaryCharacterData;
       
    56 
       
    57 /**
       
    58  * <p>A subclass of BreakIterator whose behavior is specified using a list of rules.</p>
       
    59  *
       
    60  * <p>There are two kinds of rules, which are separated by semicolons: <i>substitutions</i>
       
    61  * and <i>regular expressions.</i></p>
       
    62  *
       
    63  * <p>A substitution rule defines a name that can be used in place of an expression. It
       
    64  * consists of a name, which is a string of characters contained in angle brackets, an equals
       
    65  * sign, and an expression. (There can be no whitespace on either side of the equals sign.)
       
    66  * To keep its syntactic meaning intact, the expression must be enclosed in parentheses or
       
    67  * square brackets. A substitution is visible after its definition, and is filled in using
       
    68  * simple textual substitution. Substitution definitions can contain other substitutions, as
       
    69  * long as those substitutions have been defined first. Substitutions are generally used to
       
    70  * make the regular expressions (which can get quite complex) shorted and easier to read.
       
    71  * They typically define either character categories or commonly-used subexpressions.</p>
       
    72  *
       
    73  * <p>There is one special substitution.&nbsp; If the description defines a substitution
       
    74  * called &quot;&lt;ignore&gt;&quot;, the expression must be a [] expression, and the
       
    75  * expression defines a set of characters (the &quot;<em>ignore characters</em>&quot;) that
       
    76  * will be transparent to the BreakIterator.&nbsp; A sequence of characters will break the
       
    77  * same way it would if any ignore characters it contains are taken out.&nbsp; Break
       
    78  * positions never occur befoer ignore characters.</p>
       
    79  *
       
    80  * <p>A regular expression uses a subset of the normal Unix regular-expression syntax, and
       
    81  * defines a sequence of characters to be kept together. With one significant exception, the
       
    82  * iterator uses a longest-possible-match algorithm when matching text to regular
       
    83  * expressions. The iterator also treats descriptions containing multiple regular expressions
       
    84  * as if they were ORed together (i.e., as if they were separated by |).</p>
       
    85  *
       
    86  * <p>The special characters recognized by the regular-expression parser are as follows:</p>
       
    87  *
       
    88  * <blockquote>
       
    89  *   <table border="1" width="100%">
       
    90  *     <tr>
       
    91  *       <td width="6%">*</td>
       
    92  *       <td width="94%">Specifies that the expression preceding the asterisk may occur any number
       
    93  *       of times (including not at all).</td>
       
    94  *     </tr>
       
    95  *     <tr>
       
    96  *       <td width="6%">{}</td>
       
    97  *       <td width="94%">Encloses a sequence of characters that is optional.</td>
       
    98  *     </tr>
       
    99  *     <tr>
       
   100  *       <td width="6%">()</td>
       
   101  *       <td width="94%">Encloses a sequence of characters.&nbsp; If followed by *, the sequence
       
   102  *       repeats.&nbsp; Otherwise, the parentheses are just a grouping device and a way to delimit
       
   103  *       the ends of expressions containing |.</td>
       
   104  *     </tr>
       
   105  *     <tr>
       
   106  *       <td width="6%">|</td>
       
   107  *       <td width="94%">Separates two alternative sequences of characters.&nbsp; Either one
       
   108  *       sequence or the other, but not both, matches this expression.&nbsp; The | character can
       
   109  *       only occur inside ().</td>
       
   110  *     </tr>
       
   111  *     <tr>
       
   112  *       <td width="6%">.</td>
       
   113  *       <td width="94%">Matches any character.</td>
       
   114  *     </tr>
       
   115  *     <tr>
       
   116  *       <td width="6%">*?</td>
       
   117  *       <td width="94%">Specifies a non-greedy asterisk.&nbsp; *? works the same way as *, except
       
   118  *       when there is overlap between the last group of characters in the expression preceding the
       
   119  *       * and the first group of characters following the *.&nbsp; When there is this kind of
       
   120  *       overlap, * will match the longest sequence of characters that match the expression before
       
   121  *       the *, and *? will match the shortest sequence of characters matching the expression
       
   122  *       before the *?.&nbsp; For example, if you have &quot;xxyxyyyxyxyxxyxyxyy&quot; in the text,
       
   123  *       &quot;x[xy]*x&quot; will match through to the last x (i.e., &quot;<strong>xxyxyyyxyxyxxyxyx</strong>yy&quot;,
       
   124  *       but &quot;x[xy]*?x&quot; will only match the first two xes (&quot;<strong>xx</strong>yxyyyxyxyxxyxyxyy&quot;).</td>
       
   125  *     </tr>
       
   126  *     <tr>
       
   127  *       <td width="6%">[]</td>
       
   128  *       <td width="94%">Specifies a group of alternative characters.&nbsp; A [] expression will
       
   129  *       match any single character that is specified in the [] expression.&nbsp; For more on the
       
   130  *       syntax of [] expressions, see below.</td>
       
   131  *     </tr>
       
   132  *     <tr>
       
   133  *       <td width="6%">/</td>
       
   134  *       <td width="94%">Specifies where the break position should go if text matches this
       
   135  *       expression.&nbsp; (e.g., &quot;[a-z]&#42;/[:Zs:]*[1-0]&quot; will match if the iterator sees a run
       
   136  *       of letters, followed by a run of whitespace, followed by a digit, but the break position
       
   137  *       will actually go before the whitespace).&nbsp; Expressions that don't contain / put the
       
   138  *       break position at the end of the matching text.</td>
       
   139  *     </tr>
       
   140  *     <tr>
       
   141  *       <td width="6%">\</td>
       
   142  *       <td width="94%">Escape character.&nbsp; The \ itself is ignored, but causes the next
       
   143  *       character to be treated as literal character.&nbsp; This has no effect for many
       
   144  *       characters, but for the characters listed above, this deprives them of their special
       
   145  *       meaning.&nbsp; (There are no special escape sequences for Unicode characters, or tabs and
       
   146  *       newlines; these are all handled by a higher-level protocol.&nbsp; In a Java string,
       
   147  *       &quot;\n&quot; will be converted to a literal newline character by the time the
       
   148  *       regular-expression parser sees it.&nbsp; Of course, this means that \ sequences that are
       
   149  *       visible to the regexp parser must be written as \\ when inside a Java string.)&nbsp; All
       
   150  *       characters in the ASCII range except for letters, digits, and control characters are
       
   151  *       reserved characters to the parser and must be preceded by \ even if they currently don't
       
   152  *       mean anything.</td>
       
   153  *     </tr>
       
   154  *     <tr>
       
   155  *       <td width="6%">!</td>
       
   156  *       <td width="94%">If ! appears at the beginning of a regular expression, it tells the regexp
       
   157  *       parser that this expression specifies the backwards-iteration behavior of the iterator,
       
   158  *       and not its normal iteration behavior.&nbsp; This is generally only used in situations
       
   159  *       where the automatically-generated backwards-iteration brhavior doesn't produce
       
   160  *       satisfactory results and must be supplemented with extra client-specified rules.</td>
       
   161  *     </tr>
       
   162  *     <tr>
       
   163  *       <td width="6%"><em>(all others)</em></td>
       
   164  *       <td width="94%">All other characters are treated as literal characters, which must match
       
   165  *       the corresponding character(s) in the text exactly.</td>
       
   166  *     </tr>
       
   167  *   </table>
       
   168  * </blockquote>
       
   169  *
       
   170  * <p>Within a [] expression, a number of other special characters can be used to specify
       
   171  * groups of characters:</p>
       
   172  *
       
   173  * <blockquote>
       
   174  *   <table border="1" width="100%">
       
   175  *     <tr>
       
   176  *       <td width="6%">-</td>
       
   177  *       <td width="94%">Specifies a range of matching characters.&nbsp; For example
       
   178  *       &quot;[a-p]&quot; matches all lowercase Latin letters from a to p (inclusive).&nbsp; The -
       
   179  *       sign specifies ranges of continuous Unicode numeric values, not ranges of characters in a
       
   180  *       language's alphabetical order: &quot;[a-z]&quot; doesn't include capital letters, nor does
       
   181  *       it include accented letters such as a-umlaut.</td>
       
   182  *     </tr>
       
   183  *     <tr>
       
   184  *       <td width="6%">::</td>
       
   185  *       <td width="94%">A pair of colons containing a one- or two-letter code matches all
       
   186  *       characters in the corresponding Unicode category.&nbsp; The two-letter codes are the same
       
   187  *       as the two-letter codes in the Unicode database (for example, &quot;[:Sc::Sm:]&quot;
       
   188  *       matches all currency symbols and all math symbols).&nbsp; Specifying a one-letter code is
       
   189  *       the same as specifying all two-letter codes that begin with that letter (for example,
       
   190  *       &quot;[:L:]&quot; matches all letters, and is equivalent to
       
   191  *       &quot;[:Lu::Ll::Lo::Lm::Lt:]&quot;).&nbsp; Anything other than a valid two-letter Unicode
       
   192  *       category code or a single letter that begins a Unicode category code is illegal within
       
   193  *       colons.</td>
       
   194  *     </tr>
       
   195  *     <tr>
       
   196  *       <td width="6%">[]</td>
       
   197  *       <td width="94%">[] expressions can nest.&nbsp; This has no effect, except when used in
       
   198  *       conjunction with the ^ token.</td>
       
   199  *     </tr>
       
   200  *     <tr>
       
   201  *       <td width="6%">^</td>
       
   202  *       <td width="94%">Excludes the character (or the characters in the [] expression) following
       
   203  *       it from the group of characters.&nbsp; For example, &quot;[a-z^p]&quot; matches all Latin
       
   204  *       lowercase letters except p.&nbsp; &quot;[:L:^[&#92;u4e00-&#92;u9fff]]&quot; matches all letters
       
   205  *       except the Han ideographs.</td>
       
   206  *     </tr>
       
   207  *     <tr>
       
   208  *       <td width="6%"><em>(all others)</em></td>
       
   209  *       <td width="94%">All other characters are treated as literal characters.&nbsp; (For
       
   210  *       example, &quot;[aeiou]&quot; specifies just the letters a, e, i, o, and u.)</td>
       
   211  *     </tr>
       
   212  *   </table>
       
   213  * </blockquote>
       
   214  *
       
   215  * <p>For a more complete explanation, see <a
       
   216  * href="http://www.ibm.com/java/education/boundaries/boundaries.html">http://www.ibm.com/java/education/boundaries/boundaries.html</a>.
       
   217  * &nbsp; For examples, see the resource data (which is annotated).</p>
       
   218  *
       
   219  * @author Richard Gillam
       
   220  */
       
   221 class RuleBasedBreakIterator extends BreakIterator {
       
   222 
       
   223     /**
       
   224      * A token used as a character-category value to identify ignore characters
       
   225      */
       
   226     protected static final byte IGNORE = -1;
       
   227 
       
   228     /**
       
   229      * The state number of the starting state
       
   230      */
       
   231     private static final short START_STATE = 1;
       
   232 
       
   233     /**
       
   234      * The state-transition value indicating "stop"
       
   235      */
       
   236     private static final short STOP_STATE = 0;
       
   237 
       
   238     /**
       
   239      * Magic number for the BreakIterator data file format.
       
   240      */
       
   241     static final byte[] LABEL = {
       
   242         (byte)'B', (byte)'I', (byte)'d', (byte)'a', (byte)'t', (byte)'a',
       
   243         (byte)'\0'
       
   244     };
       
   245     static final int    LABEL_LENGTH = LABEL.length;
       
   246 
       
   247     /**
       
   248      * Version number of the dictionary that was read in.
       
   249      */
       
   250     static final byte supportedVersion = 1;
       
   251 
       
   252     /**
       
   253      * Header size in byte count
       
   254      */
       
   255     private static final int HEADER_LENGTH = 36;
       
   256 
       
   257     /**
       
   258      * An array length of indices for BMP characters
       
   259      */
       
   260     private static final int BMP_INDICES_LENGTH = 512;
       
   261 
       
   262     /**
       
   263      * Tables that indexes from character values to character category numbers
       
   264      */
       
   265     private CompactByteArray charCategoryTable = null;
       
   266     private SupplementaryCharacterData supplementaryCharCategoryTable = null;
       
   267 
       
   268     /**
       
   269      * The table of state transitions used for forward iteration
       
   270      */
       
   271     private short[] stateTable = null;
       
   272 
       
   273     /**
       
   274      * The table of state transitions used to sync up the iterator with the
       
   275      * text in backwards and random-access iteration
       
   276      */
       
   277     private short[] backwardsStateTable = null;
       
   278 
       
   279     /**
       
   280      * A list of flags indicating which states in the state table are accepting
       
   281      * ("end") states
       
   282      */
       
   283     private boolean[] endStates = null;
       
   284 
       
   285     /**
       
   286      * A list of flags indicating which states in the state table are
       
   287      * lookahead states (states which turn lookahead on and off)
       
   288      */
       
   289     private boolean[] lookaheadStates = null;
       
   290 
       
   291     /**
       
   292      * A table for additional data. May be used by a subclass of
       
   293      * RuleBasedBreakIterator.
       
   294      */
       
   295     private byte[] additionalData = null;
       
   296 
       
   297     /**
       
   298      * The number of character categories (and, thus, the number of columns in
       
   299      * the state tables)
       
   300      */
       
   301     private int numCategories;
       
   302 
       
   303     /**
       
   304      * The character iterator through which this BreakIterator accesses the text
       
   305      */
       
   306     private CharacterIterator text = null;
       
   307 
       
   308     /**
       
   309      * A CRC32 value of all data in datafile
       
   310      */
       
   311     private long checksum;
       
   312 
       
   313     //=======================================================================
       
   314     // constructors
       
   315     //=======================================================================
       
   316 
       
   317     /**
       
   318      * Constructs a RuleBasedBreakIterator according to the module and the datafile
       
   319      * provided.
       
   320      */
       
   321     RuleBasedBreakIterator(Module module, String datafile)
       
   322         throws IOException, MissingResourceException {
       
   323         readTables(module, datafile);
       
   324     }
       
   325 
       
   326     /**
       
   327      * Read datafile. The datafile's format is as follows:
       
   328      * <pre>
       
   329      *   BreakIteratorData {
       
   330      *       u1           magic[7];
       
   331      *       u1           version;
       
   332      *       u4           totalDataSize;
       
   333      *       header_info  header;
       
   334      *       body         value;
       
   335      *   }
       
   336      * </pre>
       
   337      * <code>totalDataSize</code> is the summation of the size of
       
   338      * <code>header_info</code> and <code>body</code> in byte count.
       
   339      * <p>
       
   340      * In <code>header</code>, each field except for checksum implies the
       
   341      * length of each field. Since <code>BMPdataLength</code> is a fixed-length
       
   342      *  data(512 entries), its length isn't included in <code>header</code>.
       
   343      * <code>checksum</code> is a CRC32 value of all in <code>body</code>.
       
   344      * <pre>
       
   345      *   header_info {
       
   346      *       u4           stateTableLength;
       
   347      *       u4           backwardsStateTableLength;
       
   348      *       u4           endStatesLength;
       
   349      *       u4           lookaheadStatesLength;
       
   350      *       u4           BMPdataLength;
       
   351      *       u4           nonBMPdataLength;
       
   352      *       u4           additionalDataLength;
       
   353      *       u8           checksum;
       
   354      *   }
       
   355      * </pre>
       
   356      * <p>
       
   357      *
       
   358      * Finally, <code>BMPindices</code> and <code>BMPdata</code> are set to
       
   359      * <code>charCategoryTable</code>. <code>nonBMPdata</code> is set to
       
   360      * <code>supplementaryCharCategoryTable</code>.
       
   361      * <pre>
       
   362      *   body {
       
   363      *       u2           stateTable[stateTableLength];
       
   364      *       u2           backwardsStateTable[backwardsStateTableLength];
       
   365      *       u1           endStates[endStatesLength];
       
   366      *       u1           lookaheadStates[lookaheadStatesLength];
       
   367      *       u2           BMPindices[512];
       
   368      *       u1           BMPdata[BMPdataLength];
       
   369      *       u4           nonBMPdata[numNonBMPdataLength];
       
   370      *       u1           additionalData[additionalDataLength];
       
   371      *   }
       
   372      * </pre>
       
   373      */
       
   374     protected final void readTables(Module module, String datafile)
       
   375         throws IOException, MissingResourceException {
       
   376 
       
   377         byte[] buffer = readFile(module, datafile);
       
   378 
       
   379         /* Read header_info. */
       
   380         int stateTableLength = getInt(buffer, 0);
       
   381         int backwardsStateTableLength = getInt(buffer, 4);
       
   382         int endStatesLength = getInt(buffer, 8);
       
   383         int lookaheadStatesLength = getInt(buffer, 12);
       
   384         int BMPdataLength = getInt(buffer, 16);
       
   385         int nonBMPdataLength = getInt(buffer, 20);
       
   386         int additionalDataLength = getInt(buffer, 24);
       
   387         checksum = getLong(buffer, 28);
       
   388 
       
   389         /* Read stateTable[numCategories * numRows] */
       
   390         stateTable = new short[stateTableLength];
       
   391         int offset = HEADER_LENGTH;
       
   392         for (int i = 0; i < stateTableLength; i++, offset+=2) {
       
   393            stateTable[i] = getShort(buffer, offset);
       
   394         }
       
   395 
       
   396         /* Read backwardsStateTable[numCategories * numRows] */
       
   397         backwardsStateTable = new short[backwardsStateTableLength];
       
   398         for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) {
       
   399            backwardsStateTable[i] = getShort(buffer, offset);
       
   400         }
       
   401 
       
   402         /* Read endStates[numRows] */
       
   403         endStates = new boolean[endStatesLength];
       
   404         for (int i = 0; i < endStatesLength; i++, offset++) {
       
   405            endStates[i] = buffer[offset] == 1;
       
   406         }
       
   407 
       
   408         /* Read lookaheadStates[numRows] */
       
   409         lookaheadStates = new boolean[lookaheadStatesLength];
       
   410         for (int i = 0; i < lookaheadStatesLength; i++, offset++) {
       
   411            lookaheadStates[i] = buffer[offset] == 1;
       
   412         }
       
   413 
       
   414         /* Read a category table and indices for BMP characters. */
       
   415         short[] temp1 = new short[BMP_INDICES_LENGTH];  // BMPindices
       
   416         for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) {
       
   417             temp1[i] = getShort(buffer, offset);
       
   418         }
       
   419         byte[] temp2 = new byte[BMPdataLength];  // BMPdata
       
   420         System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
       
   421         offset += BMPdataLength;
       
   422         charCategoryTable = new CompactByteArray(temp1, temp2);
       
   423 
       
   424         /* Read a category table for non-BMP characters. */
       
   425         int[] temp3 = new int[nonBMPdataLength];
       
   426         for (int i = 0; i < nonBMPdataLength; i++, offset+=4) {
       
   427             temp3[i] = getInt(buffer, offset);
       
   428         }
       
   429         supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
       
   430 
       
   431         /* Read additional data */
       
   432         if (additionalDataLength > 0) {
       
   433             additionalData = new byte[additionalDataLength];
       
   434             System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength);
       
   435         }
       
   436 
       
   437         /* Set numCategories */
       
   438         numCategories = stateTable.length / endStates.length;
       
   439     }
       
   440 
       
   441     protected byte[] readFile(final Module module, final String datafile)
       
   442         throws IOException, MissingResourceException {
       
   443 
       
   444         BufferedInputStream is;
       
   445         try {
       
   446             PrivilegedExceptionAction<BufferedInputStream> pa = () -> {
       
   447                 String pathName = "jdk.localedata".equals(module.getName()) ?
       
   448                      "sun/text/resources/ext/" :
       
   449                      "sun/text/resources/";
       
   450                 InputStream in = module.getResourceAsStream(pathName + datafile);
       
   451                 if (in == null) {
       
   452                     // Try to load the file with "java.base" module instance. Assumption
       
   453                     // here is that the fall back data files to be read should reside in
       
   454                     // java.base.
       
   455                     in = RuleBasedBreakIterator.class.getModule().getResourceAsStream("sun/text/resources/" + datafile);
       
   456                 }
       
   457 
       
   458                 return new BufferedInputStream(in);
       
   459             };
       
   460             is = AccessController.doPrivileged(pa);
       
   461         } catch (PrivilegedActionException e) {
       
   462             throw new InternalError(e.toString(), e);
       
   463         }
       
   464 
       
   465         int offset = 0;
       
   466 
       
   467         /* First, read magic, version, and header_info. */
       
   468         int len = LABEL_LENGTH + 5;
       
   469         byte[] buf = new byte[len];
       
   470         if (is.read(buf) != len) {
       
   471             throw new MissingResourceException("Wrong header length",
       
   472                                                datafile, "");
       
   473         }
       
   474 
       
   475         /* Validate the magic number. */
       
   476         for (int i = 0; i < LABEL_LENGTH; i++, offset++) {
       
   477             if (buf[offset] != LABEL[offset]) {
       
   478                 throw new MissingResourceException("Wrong magic number",
       
   479                                                    datafile, "");
       
   480             }
       
   481         }
       
   482 
       
   483         /* Validate the version number. */
       
   484         if (buf[offset] != supportedVersion) {
       
   485             throw new MissingResourceException("Unsupported version(" + buf[offset] + ")",
       
   486                                                datafile, "");
       
   487         }
       
   488 
       
   489         /* Read data: totalDataSize + 8(for checksum) */
       
   490         len = getInt(buf, ++offset);
       
   491         buf = new byte[len];
       
   492         if (is.read(buf) != len) {
       
   493             throw new MissingResourceException("Wrong data length",
       
   494                                                datafile, "");
       
   495         }
       
   496 
       
   497         is.close();
       
   498 
       
   499         return buf;
       
   500     }
       
   501 
       
   502     byte[] getAdditionalData() {
       
   503         return additionalData;
       
   504     }
       
   505 
       
   506     void setAdditionalData(byte[] b) {
       
   507         additionalData = b;
       
   508     }
       
   509 
       
   510     //=======================================================================
       
   511     // boilerplate
       
   512     //=======================================================================
       
   513     /**
       
   514      * Clones this iterator.
       
   515      * @return A newly-constructed RuleBasedBreakIterator with the same
       
   516      * behavior as this one.
       
   517      */
       
   518     @Override
       
   519     public Object clone() {
       
   520         RuleBasedBreakIterator result = (RuleBasedBreakIterator) super.clone();
       
   521         if (text != null) {
       
   522             result.text = (CharacterIterator) text.clone();
       
   523         }
       
   524         return result;
       
   525     }
       
   526 
       
   527     /**
       
   528      * Returns true if both BreakIterators are of the same class, have the same
       
   529      * rules, and iterate over the same text.
       
   530      */
       
   531     @Override
       
   532     public boolean equals(Object that) {
       
   533         try {
       
   534             if (that == null) {
       
   535                 return false;
       
   536             }
       
   537 
       
   538             RuleBasedBreakIterator other = (RuleBasedBreakIterator) that;
       
   539             if (checksum != other.checksum) {
       
   540                 return false;
       
   541             }
       
   542             if (text == null) {
       
   543                 return other.text == null;
       
   544             } else {
       
   545                 return text.equals(other.text);
       
   546             }
       
   547         }
       
   548         catch(ClassCastException e) {
       
   549             return false;
       
   550         }
       
   551     }
       
   552 
       
   553     /**
       
   554      * Returns text
       
   555      */
       
   556     @Override
       
   557     public String toString() {
       
   558         return "[checksum=0x" + Long.toHexString(checksum) + ']';
       
   559     }
       
   560 
       
   561     /**
       
   562      * Compute a hashcode for this BreakIterator
       
   563      * @return A hash code
       
   564      */
       
   565     @Override
       
   566     public int hashCode() {
       
   567         return (int)checksum;
       
   568     }
       
   569 
       
   570     //=======================================================================
       
   571     // BreakIterator overrides
       
   572     //=======================================================================
       
   573 
       
   574     /**
       
   575      * Sets the current iteration position to the beginning of the text.
       
   576      * (i.e., the CharacterIterator's starting offset).
       
   577      * @return The offset of the beginning of the text.
       
   578      */
       
   579     @Override
       
   580     public int first() {
       
   581         CharacterIterator t = getText();
       
   582 
       
   583         t.first();
       
   584         return t.getIndex();
       
   585     }
       
   586 
       
   587     /**
       
   588      * Sets the current iteration position to the end of the text.
       
   589      * (i.e., the CharacterIterator's ending offset).
       
   590      * @return The text's past-the-end offset.
       
   591      */
       
   592     @Override
       
   593     public int last() {
       
   594         CharacterIterator t = getText();
       
   595 
       
   596         // I'm not sure why, but t.last() returns the offset of the last character,
       
   597         // rather than the past-the-end offset
       
   598         t.setIndex(t.getEndIndex());
       
   599         return t.getIndex();
       
   600     }
       
   601 
       
   602     /**
       
   603      * Advances the iterator either forward or backward the specified number of steps.
       
   604      * Negative values move backward, and positive values move forward.  This is
       
   605      * equivalent to repeatedly calling next() or previous().
       
   606      * @param n The number of steps to move.  The sign indicates the direction
       
   607      * (negative is backwards, and positive is forwards).
       
   608      * @return The character offset of the boundary position n boundaries away from
       
   609      * the current one.
       
   610      */
       
   611     @Override
       
   612     public int next(int n) {
       
   613         int result = current();
       
   614         while (n > 0) {
       
   615             result = handleNext();
       
   616             --n;
       
   617         }
       
   618         while (n < 0) {
       
   619             result = previous();
       
   620             ++n;
       
   621         }
       
   622         return result;
       
   623     }
       
   624 
       
   625     /**
       
   626      * Advances the iterator to the next boundary position.
       
   627      * @return The position of the first boundary after this one.
       
   628      */
       
   629     @Override
       
   630     public int next() {
       
   631         return handleNext();
       
   632     }
       
   633 
       
   634     private int cachedLastKnownBreak = BreakIterator.DONE;
       
   635 
       
   636     /**
       
   637      * Advances the iterator backwards, to the last boundary preceding this one.
       
   638      * @return The position of the last boundary position preceding this one.
       
   639      */
       
   640     @Override
       
   641     public int previous() {
       
   642         // if we're already sitting at the beginning of the text, return DONE
       
   643         CharacterIterator text = getText();
       
   644         if (current() == text.getBeginIndex()) {
       
   645             return BreakIterator.DONE;
       
   646         }
       
   647 
       
   648         // set things up.  handlePrevious() will back us up to some valid
       
   649         // break position before the current position (we back our internal
       
   650         // iterator up one step to prevent handlePrevious() from returning
       
   651         // the current position), but not necessarily the last one before
       
   652         // where we started
       
   653         int start = current();
       
   654         int lastResult = cachedLastKnownBreak;
       
   655         if (lastResult >= start || lastResult <= BreakIterator.DONE) {
       
   656             getPrevious();
       
   657             lastResult = handlePrevious();
       
   658         } else {
       
   659             //it might be better to check if handlePrevious() give us closer
       
   660             //safe value but handlePrevious() is slow too
       
   661             //So, this has to be done carefully
       
   662             text.setIndex(lastResult);
       
   663         }
       
   664         int result = lastResult;
       
   665 
       
   666         // iterate forward from the known break position until we pass our
       
   667         // starting point.  The last break position before the starting
       
   668         // point is our return value
       
   669         while (result != BreakIterator.DONE && result < start) {
       
   670             lastResult = result;
       
   671             result = handleNext();
       
   672         }
       
   673 
       
   674         // set the current iteration position to be the last break position
       
   675         // before where we started, and then return that value
       
   676         text.setIndex(lastResult);
       
   677         cachedLastKnownBreak = lastResult;
       
   678         return lastResult;
       
   679     }
       
   680 
       
   681     /**
       
   682      * Returns previous character
       
   683      */
       
   684     private int getPrevious() {
       
   685         char c2 = text.previous();
       
   686         if (Character.isLowSurrogate(c2) &&
       
   687             text.getIndex() > text.getBeginIndex()) {
       
   688             char c1 = text.previous();
       
   689             if (Character.isHighSurrogate(c1)) {
       
   690                 return Character.toCodePoint(c1, c2);
       
   691             } else {
       
   692                 text.next();
       
   693             }
       
   694         }
       
   695         return (int)c2;
       
   696     }
       
   697 
       
   698     /**
       
   699      * Returns current character
       
   700      */
       
   701     int getCurrent() {
       
   702         char c1 = text.current();
       
   703         if (Character.isHighSurrogate(c1) &&
       
   704             text.getIndex() < text.getEndIndex()) {
       
   705             char c2 = text.next();
       
   706             text.previous();
       
   707             if (Character.isLowSurrogate(c2)) {
       
   708                 return Character.toCodePoint(c1, c2);
       
   709             }
       
   710         }
       
   711         return (int)c1;
       
   712     }
       
   713 
       
   714     /**
       
   715      * Returns the count of next character.
       
   716      */
       
   717     private int getCurrentCodePointCount() {
       
   718         char c1 = text.current();
       
   719         if (Character.isHighSurrogate(c1) &&
       
   720             text.getIndex() < text.getEndIndex()) {
       
   721             char c2 = text.next();
       
   722             text.previous();
       
   723             if (Character.isLowSurrogate(c2)) {
       
   724                 return 2;
       
   725             }
       
   726         }
       
   727         return 1;
       
   728     }
       
   729 
       
   730     /**
       
   731      * Returns next character
       
   732      */
       
   733     int getNext() {
       
   734         int index = text.getIndex();
       
   735         int endIndex = text.getEndIndex();
       
   736         if (index == endIndex ||
       
   737             (index += getCurrentCodePointCount()) >= endIndex) {
       
   738             return CharacterIterator.DONE;
       
   739         }
       
   740         text.setIndex(index);
       
   741         return getCurrent();
       
   742     }
       
   743 
       
   744     /**
       
   745      * Returns the position of next character.
       
   746      */
       
   747     private int getNextIndex() {
       
   748         int index = text.getIndex() + getCurrentCodePointCount();
       
   749         int endIndex = text.getEndIndex();
       
   750         if (index > endIndex) {
       
   751             return endIndex;
       
   752         } else {
       
   753             return index;
       
   754         }
       
   755     }
       
   756 
       
   757     /**
       
   758      * Throw IllegalArgumentException unless begin <= offset < end.
       
   759      */
       
   760     protected static final void checkOffset(int offset, CharacterIterator text) {
       
   761         if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
       
   762             throw new IllegalArgumentException("offset out of bounds");
       
   763         }
       
   764     }
       
   765 
       
   766     /**
       
   767      * Sets the iterator to refer to the first boundary position following
       
   768      * the specified position.
       
   769      * @offset The position from which to begin searching for a break position.
       
   770      * @return The position of the first break after the current position.
       
   771      */
       
   772     @Override
       
   773     public int following(int offset) {
       
   774 
       
   775         CharacterIterator text = getText();
       
   776         checkOffset(offset, text);
       
   777 
       
   778         // Set our internal iteration position (temporarily)
       
   779         // to the position passed in.  If this is the _beginning_ position,
       
   780         // then we can just use next() to get our return value
       
   781         text.setIndex(offset);
       
   782         if (offset == text.getBeginIndex()) {
       
   783             cachedLastKnownBreak = handleNext();
       
   784             return cachedLastKnownBreak;
       
   785         }
       
   786 
       
   787         // otherwise, we have to sync up first.  Use handlePrevious() to back
       
   788         // us up to a known break position before the specified position (if
       
   789         // we can determine that the specified position is a break position,
       
   790         // we don't back up at all).  This may or may not be the last break
       
   791         // position at or before our starting position.  Advance forward
       
   792         // from here until we've passed the starting position.  The position
       
   793         // we stop on will be the first break position after the specified one.
       
   794         int result = cachedLastKnownBreak;
       
   795         if (result >= offset || result <= BreakIterator.DONE) {
       
   796             result = handlePrevious();
       
   797         } else {
       
   798             //it might be better to check if handlePrevious() give us closer
       
   799             //safe value but handlePrevious() is slow too
       
   800             //So, this has to be done carefully
       
   801             text.setIndex(result);
       
   802         }
       
   803         while (result != BreakIterator.DONE && result <= offset) {
       
   804             result = handleNext();
       
   805         }
       
   806         cachedLastKnownBreak = result;
       
   807         return result;
       
   808     }
       
   809 
       
   810     /**
       
   811      * Sets the iterator to refer to the last boundary position before the
       
   812      * specified position.
       
   813      * @offset The position to begin searching for a break from.
       
   814      * @return The position of the last boundary before the starting position.
       
   815      */
       
   816     @Override
       
   817     public int preceding(int offset) {
       
   818         // if we start by updating the current iteration position to the
       
   819         // position specified by the caller, we can just use previous()
       
   820         // to carry out this operation
       
   821         CharacterIterator text = getText();
       
   822         checkOffset(offset, text);
       
   823         text.setIndex(offset);
       
   824         return previous();
       
   825     }
       
   826 
       
   827     /**
       
   828      * Returns true if the specified position is a boundary position.  As a side
       
   829      * effect, leaves the iterator pointing to the first boundary position at
       
   830      * or after "offset".
       
   831      * @param offset the offset to check.
       
   832      * @return True if "offset" is a boundary position.
       
   833      */
       
   834     @Override
       
   835     public boolean isBoundary(int offset) {
       
   836         CharacterIterator text = getText();
       
   837         checkOffset(offset, text);
       
   838         if (offset == text.getBeginIndex()) {
       
   839             return true;
       
   840         }
       
   841 
       
   842         // to check whether this is a boundary, we can use following() on the
       
   843         // position before the specified one and return true if the position we
       
   844         // get back is the one the user specified
       
   845         else {
       
   846             return following(offset - 1) == offset;
       
   847         }
       
   848     }
       
   849 
       
   850     /**
       
   851      * Returns the current iteration position.
       
   852      * @return The current iteration position.
       
   853      */
       
   854     @Override
       
   855     public int current() {
       
   856         return getText().getIndex();
       
   857     }
       
   858 
       
   859     /**
       
   860      * Return a CharacterIterator over the text being analyzed.  This version
       
   861      * of this method returns the actual CharacterIterator we're using internally.
       
   862      * Changing the state of this iterator can have undefined consequences.  If
       
   863      * you need to change it, clone it first.
       
   864      * @return An iterator over the text being analyzed.
       
   865      */
       
   866     @Override
       
   867     public CharacterIterator getText() {
       
   868         // The iterator is initialized pointing to no text at all, so if this
       
   869         // function is called while we're in that state, we have to fudge an
       
   870         // iterator to return.
       
   871         if (text == null) {
       
   872             text = new StringCharacterIterator("");
       
   873         }
       
   874         return text;
       
   875     }
       
   876 
       
   877     /**
       
   878      * Set the iterator to analyze a new piece of text.  This function resets
       
   879      * the current iteration position to the beginning of the text.
       
   880      * @param newText An iterator over the text to analyze.
       
   881      */
       
   882     @Override
       
   883     public void setText(CharacterIterator newText) {
       
   884         // Test iterator to see if we need to wrap it in a SafeCharIterator.
       
   885         // The correct behavior for CharacterIterators is to allow the
       
   886         // position to be set to the endpoint of the iterator.  Many
       
   887         // CharacterIterators do not uphold this, so this is a workaround
       
   888         // to permit them to use this class.
       
   889         int end = newText.getEndIndex();
       
   890         boolean goodIterator;
       
   891         try {
       
   892             newText.setIndex(end);  // some buggy iterators throw an exception here
       
   893             goodIterator = newText.getIndex() == end;
       
   894         }
       
   895         catch(IllegalArgumentException e) {
       
   896             goodIterator = false;
       
   897         }
       
   898 
       
   899         if (goodIterator) {
       
   900             text = newText;
       
   901         }
       
   902         else {
       
   903             text = new SafeCharIterator(newText);
       
   904         }
       
   905         text.first();
       
   906 
       
   907         cachedLastKnownBreak = BreakIterator.DONE;
       
   908     }
       
   909 
       
   910 
       
   911     //=======================================================================
       
   912     // implementation
       
   913     //=======================================================================
       
   914 
       
   915     /**
       
   916      * This method is the actual implementation of the next() method.  All iteration
       
   917      * vectors through here.  This method initializes the state machine to state 1
       
   918      * and advances through the text character by character until we reach the end
       
   919      * of the text or the state machine transitions to state 0.  We update our return
       
   920      * value every time the state machine passes through a possible end state.
       
   921      */
       
   922     protected int handleNext() {
       
   923         // if we're already at the end of the text, return DONE.
       
   924         CharacterIterator text = getText();
       
   925         if (text.getIndex() == text.getEndIndex()) {
       
   926             return BreakIterator.DONE;
       
   927         }
       
   928 
       
   929         // no matter what, we always advance at least one character forward
       
   930         int result = getNextIndex();
       
   931         int lookaheadResult = 0;
       
   932 
       
   933         // begin in state 1
       
   934         int state = START_STATE;
       
   935         int category;
       
   936         int c = getCurrent();
       
   937 
       
   938         // loop until we reach the end of the text or transition to state 0
       
   939         while (c != CharacterIterator.DONE && state != STOP_STATE) {
       
   940 
       
   941             // look up the current character's character category (which tells us
       
   942             // which column in the state table to look at)
       
   943             category = lookupCategory(c);
       
   944 
       
   945             // if the character isn't an ignore character, look up a state
       
   946             // transition in the state table
       
   947             if (category != IGNORE) {
       
   948                 state = lookupState(state, category);
       
   949             }
       
   950 
       
   951             // if the state we've just transitioned to is a lookahead state,
       
   952             // (but not also an end state), save its position.  If it's
       
   953             // both a lookahead state and an end state, update the break position
       
   954             // to the last saved lookup-state position
       
   955             if (lookaheadStates[state]) {
       
   956                 if (endStates[state]) {
       
   957                     result = lookaheadResult;
       
   958                 }
       
   959                 else {
       
   960                     lookaheadResult = getNextIndex();
       
   961                 }
       
   962             }
       
   963 
       
   964             // otherwise, if the state we've just transitioned to is an accepting
       
   965             // state, update the break position to be the current iteration position
       
   966             else {
       
   967                 if (endStates[state]) {
       
   968                     result = getNextIndex();
       
   969                 }
       
   970             }
       
   971 
       
   972             c = getNext();
       
   973         }
       
   974 
       
   975         // if we've run off the end of the text, and the very last character took us into
       
   976         // a lookahead state, advance the break position to the lookahead position
       
   977         // (the theory here is that if there are no characters at all after the lookahead
       
   978         // position, that always matches the lookahead criteria)
       
   979         if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) {
       
   980             result = lookaheadResult;
       
   981         }
       
   982 
       
   983         text.setIndex(result);
       
   984         return result;
       
   985     }
       
   986 
       
   987     /**
       
   988      * This method backs the iterator back up to a "safe position" in the text.
       
   989      * This is a position that we know, without any context, must be a break position.
       
   990      * The various calling methods then iterate forward from this safe position to
       
   991      * the appropriate position to return.  (For more information, see the description
       
   992      * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
       
   993      */
       
   994     protected int handlePrevious() {
       
   995         CharacterIterator text = getText();
       
   996         int state = START_STATE;
       
   997         int category = 0;
       
   998         int lastCategory = 0;
       
   999         int c = getCurrent();
       
  1000 
       
  1001         // loop until we reach the beginning of the text or transition to state 0
       
  1002         while (c != CharacterIterator.DONE && state != STOP_STATE) {
       
  1003 
       
  1004             // save the last character's category and look up the current
       
  1005             // character's category
       
  1006             lastCategory = category;
       
  1007             category = lookupCategory(c);
       
  1008 
       
  1009             // if the current character isn't an ignore character, look up a
       
  1010             // state transition in the backwards state table
       
  1011             if (category != IGNORE) {
       
  1012                 state = lookupBackwardState(state, category);
       
  1013             }
       
  1014 
       
  1015             // then advance one character backwards
       
  1016             c = getPrevious();
       
  1017         }
       
  1018 
       
  1019         // if we didn't march off the beginning of the text, we're either one or two
       
  1020         // positions away from the real break position.  (One because of the call to
       
  1021         // previous() at the end of the loop above, and another because the character
       
  1022         // that takes us into the stop state will always be the character BEFORE
       
  1023         // the break position.)
       
  1024         if (c != CharacterIterator.DONE) {
       
  1025             if (lastCategory != IGNORE) {
       
  1026                 getNext();
       
  1027                 getNext();
       
  1028             }
       
  1029             else {
       
  1030                 getNext();
       
  1031             }
       
  1032         }
       
  1033         return text.getIndex();
       
  1034     }
       
  1035 
       
  1036     /**
       
  1037      * Looks up a character's category (i.e., its category for breaking purposes,
       
  1038      * not its Unicode category)
       
  1039      */
       
  1040     protected int lookupCategory(int c) {
       
  1041         if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
       
  1042             return charCategoryTable.elementAt((char)c);
       
  1043         } else {
       
  1044             return supplementaryCharCategoryTable.getValue(c);
       
  1045         }
       
  1046     }
       
  1047 
       
  1048     /**
       
  1049      * Given a current state and a character category, looks up the
       
  1050      * next state to transition to in the state table.
       
  1051      */
       
  1052     protected int lookupState(int state, int category) {
       
  1053         return stateTable[state * numCategories + category];
       
  1054     }
       
  1055 
       
  1056     /**
       
  1057      * Given a current state and a character category, looks up the
       
  1058      * next state to transition to in the backwards state table.
       
  1059      */
       
  1060     protected int lookupBackwardState(int state, int category) {
       
  1061         return backwardsStateTable[state * numCategories + category];
       
  1062     }
       
  1063 
       
  1064     static long getLong(byte[] buf, int offset) {
       
  1065         long num = buf[offset]&0xFF;
       
  1066         for (int i = 1; i < 8; i++) {
       
  1067             num = num<<8 | (buf[offset+i]&0xFF);
       
  1068         }
       
  1069         return num;
       
  1070     }
       
  1071 
       
  1072     static int getInt(byte[] buf, int offset) {
       
  1073         int num = buf[offset]&0xFF;
       
  1074         for (int i = 1; i < 4; i++) {
       
  1075             num = num<<8 | (buf[offset+i]&0xFF);
       
  1076         }
       
  1077         return num;
       
  1078     }
       
  1079 
       
  1080     static short getShort(byte[] buf, int offset) {
       
  1081         short num = (short)(buf[offset]&0xFF);
       
  1082         num = (short)(num<<8 | (buf[offset+1]&0xFF));
       
  1083         return num;
       
  1084     }
       
  1085 
       
  1086     /*
       
  1087      * This class exists to work around a bug in incorrect implementations
       
  1088      * of CharacterIterator, which incorrectly handle setIndex(endIndex).
       
  1089      * This iterator relies only on base.setIndex(n) where n is less than
       
  1090      * endIndex.
       
  1091      *
       
  1092      * One caveat:  if the base iterator's begin and end indices change
       
  1093      * the change will not be reflected by this wrapper.  Does that matter?
       
  1094      */
       
  1095     // TODO: Review this class to see if it's still required.
       
  1096     private static final class SafeCharIterator implements CharacterIterator,
       
  1097                                                            Cloneable {
       
  1098 
       
  1099         private CharacterIterator base;
       
  1100         private int rangeStart;
       
  1101         private int rangeLimit;
       
  1102         private int currentIndex;
       
  1103 
       
  1104         SafeCharIterator(CharacterIterator base) {
       
  1105             this.base = base;
       
  1106             this.rangeStart = base.getBeginIndex();
       
  1107             this.rangeLimit = base.getEndIndex();
       
  1108             this.currentIndex = base.getIndex();
       
  1109         }
       
  1110 
       
  1111         @Override
       
  1112         public char first() {
       
  1113             return setIndex(rangeStart);
       
  1114         }
       
  1115 
       
  1116         @Override
       
  1117         public char last() {
       
  1118             return setIndex(rangeLimit - 1);
       
  1119         }
       
  1120 
       
  1121         @Override
       
  1122         public char current() {
       
  1123             if (currentIndex < rangeStart || currentIndex >= rangeLimit) {
       
  1124                 return DONE;
       
  1125             }
       
  1126             else {
       
  1127                 return base.setIndex(currentIndex);
       
  1128             }
       
  1129         }
       
  1130 
       
  1131         @Override
       
  1132         public char next() {
       
  1133 
       
  1134             currentIndex++;
       
  1135             if (currentIndex >= rangeLimit) {
       
  1136                 currentIndex = rangeLimit;
       
  1137                 return DONE;
       
  1138             }
       
  1139             else {
       
  1140                 return base.setIndex(currentIndex);
       
  1141             }
       
  1142         }
       
  1143 
       
  1144         @Override
       
  1145         public char previous() {
       
  1146 
       
  1147             currentIndex--;
       
  1148             if (currentIndex < rangeStart) {
       
  1149                 currentIndex = rangeStart;
       
  1150                 return DONE;
       
  1151             }
       
  1152             else {
       
  1153                 return base.setIndex(currentIndex);
       
  1154             }
       
  1155         }
       
  1156 
       
  1157         @Override
       
  1158         public char setIndex(int i) {
       
  1159 
       
  1160             if (i < rangeStart || i > rangeLimit) {
       
  1161                 throw new IllegalArgumentException("Invalid position");
       
  1162             }
       
  1163             currentIndex = i;
       
  1164             return current();
       
  1165         }
       
  1166 
       
  1167         @Override
       
  1168         public int getBeginIndex() {
       
  1169             return rangeStart;
       
  1170         }
       
  1171 
       
  1172         @Override
       
  1173         public int getEndIndex() {
       
  1174             return rangeLimit;
       
  1175         }
       
  1176 
       
  1177         @Override
       
  1178         public int getIndex() {
       
  1179             return currentIndex;
       
  1180         }
       
  1181 
       
  1182         @Override
       
  1183         public Object clone() {
       
  1184 
       
  1185             SafeCharIterator copy = null;
       
  1186             try {
       
  1187                 copy = (SafeCharIterator) super.clone();
       
  1188             }
       
  1189             catch(CloneNotSupportedException e) {
       
  1190                 throw new Error("Clone not supported: " + e);
       
  1191             }
       
  1192 
       
  1193             CharacterIterator copyOfBase = (CharacterIterator) base.clone();
       
  1194             copy.base = copyOfBase;
       
  1195             return copy;
       
  1196         }
       
  1197     }
       
  1198 }