jdk/src/share/classes/java/text/BreakDictionary.java
author alanb
Thu, 18 Aug 2011 16:47:20 +0100
changeset 10347 1c9efe1ec7d3
parent 5506 202f599c92aa
child 10419 12c063b39232
permissions -rw-r--r--
7015589: (spec) BufferedWriter.close leaves stream open if close of underlying Writer fails Reviewed-by: forax, mduigou
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     2
 * Copyright (c) 1999, 2003, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * The original version of this source code and documentation
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * is protected by multiple US and International patents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
package java.text;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
import java.io.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
import java.security.AccessController;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
import java.security.PrivilegedActionException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
import java.security.PrivilegedExceptionAction;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
import java.util.MissingResourceException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
import sun.text.CompactByteArray;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
import sun.text.SupplementaryCharacterData;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * This is the class that represents the list of known words used by
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * DictionaryBasedBreakIterator.  The conceptual data structure used
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * here is a trie: there is a node hanging off the root node for every
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * letter that can start a word.  Each of these nodes has a node hanging
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * off of it for every letter that can be the second letter of a word
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * if this node is the first letter, and so on.  The trie is represented
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * as a two-dimensional array that can be treated as a table of state
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * transitions.  Indexes are used to compress this array, taking
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * advantage of the fact that this array will always be very sparse.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
class BreakDictionary {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
    //=========================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
    // data members
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
    //=========================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
      * The version of the dictionary that was read in.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
      */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
    private static int supportedVersion = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
     * Maps from characters to column numbers.  The main use of this is to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
     * avoid making room in the array for empty columns.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
    private CompactByteArray columnMap = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
    private SupplementaryCharacterData supplementaryCharColumnMap = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
     * The number of actual columns in the table
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
    private int numCols;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
     * Columns are organized into groups of 32.  This says how many
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
     * column groups.  (We could calculate this, but we store the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
     * value to avoid having to repeatedly calculate it.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
    private int numColGroups;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
     * The actual compressed state table.  Each conceptual row represents
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
     * a state, and the cells in it contain the row numbers of the states
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
     * to transition to for each possible letter.  0 is used to indicate
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
     * an illegal combination of letters (i.e., the error state).  The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
     * table is compressed by eliminating all the unpopulated (i.e., zero)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
     * cells.  Multiple conceptual rows can then be doubled up in a single
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     * physical row by sliding them up and possibly shifting them to one
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     * side or the other so the populated cells don't collide.  Indexes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
     * are used to identify unpopulated cells and to locate populated cells.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
    private short[] table = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     * This index maps logical row numbers to physical row numbers
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    private short[] rowIndex = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
     * A bitmap is used to tell which cells in the comceptual table are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
     * populated.  This array contains all the unique bit combinations
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
     * in that bitmap.  If the table is more than 32 columns wide,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     * successive entries in this array are used for a single row.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
    private int[] rowIndexFlags = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
     * This index maps from a logical row number into the bitmap table above.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
     * (This keeps us from storing duplicate bitmap combinations.)  Since there
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
     * are a lot of rows with only one populated cell, instead of wasting space
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
     * in the bitmap table, we just store a negative number in this index for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
     * rows with one populated cell.  The absolute value of that number is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
     * the column number of the populated cell.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
    private short[] rowIndexFlagsIndex = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
     * For each logical row, this index contains a constant that is added to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
     * the logical column number to get the physical column number
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
    private byte[] rowIndexShifts = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
    //=========================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
    // deserialization
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
    //=========================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
    public BreakDictionary(String dictionaryName)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
        readDictionaryFile(dictionaryName);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
    private void readDictionaryFile(final String dictionaryName)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
        throws IOException, MissingResourceException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
        BufferedInputStream in;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
            in = (BufferedInputStream)AccessController.doPrivileged(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
                new PrivilegedExceptionAction() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
                    public Object run() throws Exception {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
                        return new BufferedInputStream(getClass().getResourceAsStream("/sun/text/resources/" + dictionaryName));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
            );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
        catch (PrivilegedActionException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
            throw new InternalError(e.toString());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
        byte[] buf = new byte[8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
        if (in.read(buf) != 8) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
            throw new MissingResourceException("Wrong data length",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
                                               dictionaryName, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
        // check vesion
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
        int version = BreakIterator.getInt(buf, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
        if (version != supportedVersion) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
            throw new MissingResourceException("Dictionary version(" + version + ") is unsupported",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
                                                           dictionaryName, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
        // get data size
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
        int len = BreakIterator.getInt(buf, 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        buf = new byte[len];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
        if (in.read(buf) != len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
            throw new MissingResourceException("Wrong data length",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
                                               dictionaryName, "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
        // close the stream
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
        in.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        int l;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        int offset = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
        // read in the column map for BMP characteres (this is serialized in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        // its internal form: an index array followed by a data array)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
        short[] temp = new short[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
        for (int i = 0; i < l; i++, offset+=2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
            temp[i] = BreakIterator.getShort(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
        byte[] temp2 = new byte[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
        for (int i = 0; i < l; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
            temp2[i] = buf[offset];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
        columnMap = new CompactByteArray(temp, temp2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
        // read in numCols and numColGroups
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
        numCols = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
        numColGroups = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
        // read in the row-number index
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
        rowIndex = new short[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
        for (int i = 0; i < l; i++, offset+=2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
            rowIndex[i] = BreakIterator.getShort(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
        // load in the populated-cells bitmap: index first, then bitmap list
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
        rowIndexFlagsIndex = new short[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        for (int i = 0; i < l; i++, offset+=2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
            rowIndexFlagsIndex[i] = BreakIterator.getShort(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
        rowIndexFlags = new int[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
        for (int i = 0; i < l; i++, offset+=4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
            rowIndexFlags[i] = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
        // load in the row-shift index
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
        rowIndexShifts = new byte[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
        for (int i = 0; i < l; i++, offset++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
            rowIndexShifts[i] = buf[offset];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
        // load in the actual state table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
        table = new short[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
        for (int i = 0; i < l; i++, offset+=2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
            table[i] = BreakIterator.getShort(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
        // finally, prepare the column map for supplementary characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
        l = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
        offset += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
        int[] temp3 = new int[l];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
        for (int i = 0; i < l; i++, offset+=4) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
            temp3[i] = BreakIterator.getInt(buf, offset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
        supplementaryCharColumnMap = new SupplementaryCharacterData(temp3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
    //=========================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
    // access to the words
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
    //=========================================================================
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
     * Uses the column map to map the character to a column number, then
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
     * passes the row and column number to getNextState()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
     * @param row The current state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
     * @param ch The character whose column we're interested in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
     * @return The new state to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
    public final short getNextStateFromCharacter(int row, int ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
        int col;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
            col = columnMap.elementAt((char)ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
            col = supplementaryCharColumnMap.getValue(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
        return getNextState(row, col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
     * Returns the value in the cell with the specified (logical) row and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
     * column numbers.  In DictionaryBasedBreakIterator, the row number is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
     * a state number, the column number is an input, and the return value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
     * is the row number of the new state to transition to.  (0 is the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
     * "error" state, and -1 is the "end of word" state in a dictionary)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
     * @param row The row number of the current state
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
     * @param col The column number of the input character (0 means "not a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
     * dictionary character")
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
     * @return The row number of the new state to transition to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    public final short getNextState(int row, int col) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
        if (cellIsPopulated(row, col)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
            // we map from logical to physical row number by looking up the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
            // mapping in rowIndex; we map from logical column number to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
            // physical column number by looking up a shift value for this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
            // logical row and offsetting the logical column number by
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
            // the shift amount.  Then we can use internalAt() to actually
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
            // get the value out of the table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            return internalAt(rowIndex[row], col + rowIndexShifts[row]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
     * Given (logical) row and column numbers, returns true if the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
     * cell in that position is populated
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
    private final boolean cellIsPopulated(int row, int col) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
        // look up the entry in the bitmap index for the specified row.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
        // If it's a negative number, it's the column number of the only
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
        // populated cell in the row
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        if (rowIndexFlagsIndex[row] < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
            return col == -rowIndexFlagsIndex[row];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
        // if it's a positive number, it's the offset of an entry in the bitmap
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
        // list.  If the table is more than 32 columns wide, the bitmap is stored
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
        // successive entries in the bitmap list, so we have to divide the column
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
        // number by 32 and offset the number we got out of the index by the result.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
        // Once we have the appropriate piece of the bitmap, test the appropriate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
        // bit and return the result.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
            int flags = rowIndexFlags[rowIndexFlagsIndex[row] + (col >> 5)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
            return (flags & (1 << (col & 0x1f))) != 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
     * Implementation of getNextState() when we know the specified cell is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
     * populated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
     * @param row The PHYSICAL row number of the cell
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
     * @param col The PHYSICAL column number of the cell
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
     * @return The value stored in the cell
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
    private final short internalAt(int row, int col) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
        // the table is a one-dimensional array, so this just does the math necessary
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        // to treat it as a two-dimensional array (we don't just use a two-dimensional
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        // array because two-dimensional arrays are inefficient in Java)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
        return table[row * numCols + col];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
}