jdk/src/share/classes/sun/text/normalizer/NormalizerBase.java
author sherman
Tue, 30 Aug 2011 11:53:11 -0700
changeset 10419 12c063b39232
parent 5506 202f599c92aa
child 11136 f0f53bbe5bd1
permissions -rw-r--r--
7084245: Update usages of InternalError to use exception chaining Summary: to use new InternalError constructor with cause chainning Reviewed-by: alanb, ksrini, xuelei, neugens Contributed-by: sebastian.sickelmann@gmx.de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 3101
diff changeset
     2
 * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 3101
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 3101
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 3101
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 3101
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 3101
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
 *******************************************************************************
2497
903fd9d785ef 6404304: RFE: Unicode 5.1 support
peytoia
parents: 2
diff changeset
    27
 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
 *                                                                             *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * The original version of this source code and documentation is copyrighted   *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 * and owned by IBM, These materials are provided under terms of a License     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * Agreement between IBM and Sun. This technology is protected by multiple     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * US and International patents. This notice and attribution to IBM may not    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * to removed.                                                                 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 *******************************************************************************
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
package sun.text.normalizer;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
import java.text.CharacterIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
import java.text.Normalizer;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * Unicode Normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * <h2>Unicode normalization API</h2>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * <code>normalize</code> transforms Unicode text into an equivalent composed or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * decomposed form, allowing for easier sorting and searching of text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * <code>normalize</code> supports the standard normalization forms described in
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * Characters with accents or other adornments can be encoded in
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * several different ways in Unicode.  For example, take the character A-acute.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * In Unicode, this can be encoded as a single character (the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * "composed" form):
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * </p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * or as two separate characters (the "decomposed" form):
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 *      0041    LATIN CAPITAL LETTER A
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 *      0301    COMBINING ACUTE ACCENT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * </p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * To a user of your program, however, both of these sequences should be
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * treated as the same "user-level" character "A with acute accent".  When you
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * are searching or comparing text, you must ensure that these two sequences are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * treated equivalently.  In addition, you must handle characters with more than
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 * one accent.  Sometimes the order of a character's combining accents is
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 * significant, while in other cases accent sequences in different orders are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
 * really equivalent.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
 * Similarly, the string "ffi" can be encoded as three separate letters:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
 *      0066    LATIN SMALL LETTER F
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
 *      0066    LATIN SMALL LETTER F
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
 *      0069    LATIN SMALL LETTER I
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
 * </p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
 * or as the single character
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
 *      FB03    LATIN SMALL LIGATURE FFI
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
 * </p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
 * The ffi ligature is not a distinct semantic character, and strictly speaking
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
 * it shouldn't be in Unicode at all, but it was included for compatibility
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
 * with existing character sets that already provided it.  The Unicode standard
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
 * identifies such characters by giving them "compatibility" decompositions
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
 * into the corresponding semantic characters.  When sorting and searching, you
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
 * will often want to use these mappings.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
 * <code>normalize</code> helps solve these problems by transforming text into
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
 * the canonical composed and decomposed forms as shown in the first example
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
 * above. In addition, you can have it perform compatibility decompositions so
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
 * that you can treat compatibility characters the same as their equivalents.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
 * Finally, <code>normalize</code> rearranges accents into the proper canonical
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
 * order, so that you do not have to worry about accent rearrangement on your
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
 * own.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
 * Form FCD, "Fast C or D", is also designed for collation.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
 * It allows to work on strings that are not necessarily normalized
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
 * with an algorithm (like in collation) that works under "canonical closure",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
 * i.e., it treats precomposed characters and their decomposed equivalents the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
 * same.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
 * It is not a normalization form because it does not provide for uniqueness of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
 * representation. Multiple strings may be canonically equivalent (their NFDs
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
 * are identical) and may all conform to FCD without being identical themselves.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
 * The form is defined such that the "raw decomposition", the recursive
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
 * canonical decomposition of each character, results in a string that is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
 * canonically ordered. This means that precomposed characters are allowed for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
 * as long as their decompositions do not need canonical reordering.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
 * Its advantage for a process like collation is that all NFD and most NFC texts
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
 * - and many unnormalized texts - already conform to FCD and do not need to be
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
 * normalized (NFD) for such a process. The FCD quick check will return YES for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
 * most strings in practice.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
 * normalize(FCD) may be implemented with NFD.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
 * For more details on FCD see the collation design document:
2497
903fd9d785ef 6404304: RFE: Unicode 5.1 support
peytoia
parents: 2
diff changeset
   129
 * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
 * ICU collation performs either NFD or FCD normalization automatically if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
 * normalization is turned on for the collator object. Beyond collation and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
 * string search, normalized strings may be useful for string equivalence
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
 * comparisons, transliteration/transcription, unique representations, etc.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
 * The W3C generally recommends to exchange texts in NFC.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
 * Note also that most legacy character encodings use only precomposed forms and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
 * often do not encode any combining marks by themselves. For conversion to such
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
 * character encodings the Unicode text needs to be normalized to NFC.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
 * For more usage examples, see the Unicode Standard Annex.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
 * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
public final class NormalizerBase implements Cloneable {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    // Private data
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    private char[] buffer = new char[100];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
    private int bufferStart = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
    private int bufferPos   = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    private int bufferLimit = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    // The input text and our position in it
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
    private UCharacterIterator  text;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    private Mode                mode = NFC;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
    private int                 options = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
    private int                 currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
    private int                 nextIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
     * Options bit set value to select Unicode 3.2 normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
     * (except NormalizationCorrections).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
     * At most one Unicode version can be selected at a time.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
     * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
    public static final int UNICODE_3_2=0x20;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
     * Constant indicating that the end of the iteration has been reached.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
     * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
    public static final int DONE = UCharacterIterator.DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
     * Constants for normalization modes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
    public static class Mode {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
        private int modeValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
        private Mode(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
            modeValue = value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
         * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
        protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
                                char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
                                UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
            int srcLen = (srcLimit - srcStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
            int destLen = (destLimit - destStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
            if( srcLen > destLen ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
                return srcLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
            System.arraycopy(src,srcStart,dest,destStart,srcLen);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
            return srcLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
         * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
        protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
                                char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
                                int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
            return normalize(   src, srcStart, srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
                                dest,destStart,destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
                                NormalizerImpl.getNX(options)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
                                );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
         * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
        protected String normalize(String src, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
            return src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
         * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
        protected int getMinC() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
            return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
         * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
        protected int getMask() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
            return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
         * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
        protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
            return null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
         * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
        protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
            return null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
         * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
        protected QuickCheckResult quickCheck(char[] src,int start, int limit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
                                              boolean allowMaybe,UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
            if(allowMaybe) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
                return MAYBE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
            return NO;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
        /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
         * This method is used for method dispatch
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
         * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
        protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
     * No decomposition/composition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
    public static final Mode NONE = new Mode(1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
     * Canonical decomposition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
    public static final Mode NFD = new NFDMode(2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
    private static final class NFDMode extends Mode {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
        private NFDMode(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
            super(value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
        protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
                                char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
                                UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
            int[] trailCC = new int[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            return NormalizerImpl.decompose(src,  srcStart,srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
                                            dest, destStart,destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
                                            false, trailCC,nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
        protected String normalize( String src, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
            return decompose(src,false,options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
        protected int getMinC() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
            return NormalizerImpl.MIN_WITH_LEAD_CC;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
        protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
            return new IsPrevNFDSafe();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
            return new IsNextNFDSafe();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
        protected int getMask() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
        protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
                                              int limit,boolean allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
                                              UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
            return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
                                             src, start,limit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
                                             NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
                                                                              NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
                                                                              ),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
                                             NormalizerImpl.QC_NFD,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
                                             0,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
                                             allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
                                             nx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
                                             );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
            return NormalizerImpl.isNFSkippable(c,this,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
                                                (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
                                                );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
     * Compatibility decomposition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
    public static final Mode NFKD = new NFKDMode(3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
    private static final class NFKDMode extends Mode {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
        private NFKDMode(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
            super(value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
                                char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
                                UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
            int[] trailCC = new int[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
            return NormalizerImpl.decompose(src,  srcStart,srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
                                            dest, destStart,destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
                                            true, trailCC, nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        protected String normalize( String src, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
            return decompose(src,true,options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
        protected int getMinC() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
            return NormalizerImpl.MIN_WITH_LEAD_CC;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
            return new IsPrevNFDSafe();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
        protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
            return new IsNextNFDSafe();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
        protected int getMask() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
        protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
                                              int limit,boolean allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
                                              UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
            return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
                                             src,start,limit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
                                             NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
                                                                              NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
                                                                              ),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
                                             NormalizerImpl.QC_NFKD,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
                                             NormalizerImpl.OPTIONS_COMPAT,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
                                             allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
                                             nx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
                                             );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
        protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
            return NormalizerImpl.isNFSkippable(c, this,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
                                                (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
                                                );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
     * Canonical decomposition followed by canonical composition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
    public static final Mode NFC = new NFCMode(4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
    private static final class NFCMode extends Mode{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
        private NFCMode(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
            super(value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
        protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
                                char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
                                UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
            return NormalizerImpl.compose( src, srcStart, srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
                                           dest,destStart,destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
                                           0, nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
        protected String normalize( String src, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
            return compose(src, false, options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
        protected int getMinC() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
            return NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
                                                    NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
                                                    );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
        protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
            return new IsPrevTrueStarter();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
        protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
            return new IsNextTrueStarter();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
        protected int getMask() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
        protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
                                              int limit,boolean allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
                                              UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
            return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
                                             src,start,limit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
                                             NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                                                                              NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                                                                              ),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                                             NormalizerImpl.QC_NFC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
                                             0,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
                                             allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
                                             nx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
                                             );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
        protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
            return NormalizerImpl.isNFSkippable(c,this,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
                                                ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
                                                  (NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
                                                  )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
                                                );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
     * Compatibility decomposition followed by canonical composition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
    public static final Mode NFKC =new NFKCMode(5);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
    private static final class NFKCMode extends Mode{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
        private NFKCMode(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
            super(value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
                                char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
                                UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
            return NormalizerImpl.compose(src,  srcStart,srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                                          dest, destStart,destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
                                          NormalizerImpl.OPTIONS_COMPAT, nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
        protected String normalize( String src, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
            return compose(src, true, options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
        protected int getMinC() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
            return NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
                                                    NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
                                                    );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
        protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
            return new IsPrevTrueStarter();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
        protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
            return new IsNextTrueStarter();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
        protected int getMask() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
            return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
        protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
                                              int limit,boolean allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
                                              UnicodeSet nx) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
            return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
                                             src,start,limit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
                                             NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
                                                                              NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
                                                                              ),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
                                             NormalizerImpl.QC_NFKC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
                                             NormalizerImpl.OPTIONS_COMPAT,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
                                             allowMaybe,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
                                             nx
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
                                             );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
        protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
            return NormalizerImpl.isNFSkippable(c, this,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
                                                ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
                                                  (NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
                                                  )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
                                                );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
     * Result values for quickCheck().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
     * For details see Unicode Technical Report 15.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
    public static final class QuickCheckResult{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
        private int resultValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
        private QuickCheckResult(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
            resultValue=value;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
     * Indicates that string is not in the normalized format
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
    public static final QuickCheckResult NO = new QuickCheckResult(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
     * Indicates that string is in the normalized format
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
    public static final QuickCheckResult YES = new QuickCheckResult(1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
     * Indicates it cannot be determined if string is in the normalized
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
     * format without further thorough checks.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
    public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
    // Constructors
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
     * Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
     * normalized form of a given string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
     * The <tt>options</tt> parameter specifies which optional
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
     * <tt>Normalizer</tt> features are to be enabled for this object.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
     * @param str  The string to be normalized.  The normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
     *              will start at the beginning of the string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
     * @param mode The normalization mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
     * @param opt Any optional features to be enabled.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
     *            Currently the only available option is {@link #UNICODE_3_2}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
     *            If you want the default behavior corresponding to one of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
     *            standard Unicode Normalization Forms, use 0 for this argument.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
     * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
    public NormalizerBase(String str, Mode mode, int opt) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
        this.text = UCharacterIterator.getInstance(str);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
        this.mode = mode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
        this.options=opt;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
     * Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
     * normalized form of the given text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
     * @param iter  The input text to be normalized.  The normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
     *              will start at the beginning of the string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
     * @param mode  The normalization mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
    public NormalizerBase(CharacterIterator iter, Mode mode) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
          this(iter, mode, UNICODE_LATEST);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
     * Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
     * normalized form of the given text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
     * @param iter  The input text to be normalized.  The normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
     *              will start at the beginning of the string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
     * @param mode  The normalization mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
     * @param opt Any optional features to be enabled.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
     *            Currently the only available option is {@link #UNICODE_3_2}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
     *            If you want the default behavior corresponding to one of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
     *            standard Unicode Normalization Forms, use 0 for this argument.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
     * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
    public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
        this.text = UCharacterIterator.getInstance(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
                                                   (CharacterIterator)iter.clone()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
                                                   );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
        this.mode = mode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
        this.options = opt;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
     * Clones this <tt>Normalizer</tt> object.  All properties of this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
     * object are duplicated in the new object, including the cloning of any
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
     * {@link CharacterIterator} that was passed in to the constructor
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
     * or to {@link #setText(CharacterIterator) setText}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
     * However, the text storage underlying
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
     * the <tt>CharacterIterator</tt> is not duplicated unless the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
     * iterator's <tt>clone</tt> method does so.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
    public Object clone() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
            NormalizerBase copy = (NormalizerBase) super.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
            copy.text = (UCharacterIterator) text.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
            //clone the internal buffer
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
            if (buffer != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
                copy.buffer = new char[buffer.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
                System.arraycopy(buffer,0,copy.buffer,0,buffer.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
            return copy;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
        catch (CloneNotSupportedException e) {
10419
12c063b39232 7084245: Update usages of InternalError to use exception chaining
sherman
parents: 5506
diff changeset
   639
            throw new InternalError(e.toString(), e);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
    //--------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
    // Static Utility methods
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
    //--------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
     * Compose a string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
     * The string will be composed to according the the specified mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
     * @param str        The string to compose.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
     * @param compat     If true the string will be composed accoding to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
     *                    NFKC rules and if false will be composed according to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
     *                    NFC rules.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
     * @param options    The only recognized option is UNICODE_3_2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
     * @return String    The composed string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
     * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
    public static String compose(String str, boolean compat, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
        char[] dest, src;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
        if (options == UNICODE_3_2_0_ORIGINAL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
            String mappedStr = NormalizerImpl.convert(str);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
            dest = new char[mappedStr.length()*MAX_BUF_SIZE_COMPOSE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
            src = mappedStr.toCharArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
            dest = new char[str.length()*MAX_BUF_SIZE_COMPOSE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
            src = str.toCharArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
        int destSize=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
        UnicodeSet nx = NormalizerImpl.getNX(options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
        /* reset options bits that should only be set here or inside compose() */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
        options&=~(NormalizerImpl.OPTIONS_SETS_MASK|NormalizerImpl.OPTIONS_COMPAT|NormalizerImpl.OPTIONS_COMPOSE_CONTIGUOUS);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
        if(compat) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
            options|=NormalizerImpl.OPTIONS_COMPAT;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
        for(;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
            destSize=NormalizerImpl.compose(src,0,src.length,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
                                            dest,0,dest.length,options,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
                                            nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
            if(destSize<=dest.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
                return new String(dest,0,destSize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
                dest = new char[destSize];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
    private static final int MAX_BUF_SIZE_COMPOSE = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
    private static final int MAX_BUF_SIZE_DECOMPOSE = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
     * Decompose a string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
     * The string will be decomposed to according the the specified mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
     * @param str       The string to decompose.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
     * @param compat    If true the string will be decomposed accoding to NFKD
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
     *                   rules and if false will be decomposed according to NFD
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
     *                   rules.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
     * @return String   The decomposed string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
    public static String decompose(String str, boolean compat) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
        return decompose(str,compat,UNICODE_LATEST);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
     * Decompose a string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
     * The string will be decomposed to according the the specified mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
     * @param str     The string to decompose.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
     * @param compat  If true the string will be decomposed accoding to NFKD
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
     *                 rules and if false will be decomposed according to NFD
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
     *                 rules.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
     * @param options The normalization options, ORed together (0 for no options).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
     * @return String The decomposed string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
     * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
    public static String decompose(String str, boolean compat, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
        int[] trailCC = new int[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
        int destSize=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
        UnicodeSet nx = NormalizerImpl.getNX(options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
        char[] dest;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
        if (options == UNICODE_3_2_0_ORIGINAL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
            String mappedStr = NormalizerImpl.convert(str);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
            dest = new char[mappedStr.length()*MAX_BUF_SIZE_DECOMPOSE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
            for(;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
                destSize=NormalizerImpl.decompose(mappedStr.toCharArray(),0,mappedStr.length(),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
                                                  dest,0,dest.length,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
                                                  compat,trailCC, nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
                if(destSize<=dest.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
                    return new String(dest,0,destSize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
                    dest = new char[destSize];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
            dest = new char[str.length()*MAX_BUF_SIZE_DECOMPOSE];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
            for(;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
                destSize=NormalizerImpl.decompose(str.toCharArray(),0,str.length(),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
                                                  dest,0,dest.length,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
                                                  compat,trailCC, nx);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
                if(destSize<=dest.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
                    return new String(dest,0,destSize);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
                    dest = new char[destSize];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
     * Normalize a string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
     * The string will be normalized according the the specified normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
     * mode and options.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
     * @param src       The char array to compose.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
     * @param srcStart  Start index of the source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
     * @param srcLimit  Limit index of the source
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
     * @param dest      The char buffer to fill in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
     * @param destStart Start index of the destination buffer
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
     * @param destLimit End index of the destination buffer
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
     * @param mode      The normalization mode; one of Normalizer.NONE,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
     *                   Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
     *                   Normalizer.NFKD, Normalizer.DEFAULT
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
     * @param options The normalization options, ORed together (0 for no options).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
     * @return int      The total buffer size needed;if greater than length of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
     *                   result, the output was truncated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
     * @exception       IndexOutOfBoundsException if the target capacity is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
     *                   less than the required length
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
     * @stable ICU 2.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
    public static int normalize(char[] src,int srcStart, int srcLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
                                char[] dest,int destStart, int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
                                Mode  mode, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
        int length = mode.normalize(src,srcStart,srcLimit,dest,destStart,destLimit, options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
        if(length<=(destLimit-destStart)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
            return length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
            throw new IndexOutOfBoundsException(Integer.toString(length));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
    // Iteration API
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
     * Return the current character in the normalized text->
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
     * @return The codepoint as an int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
    public int current() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
        if(bufferPos<bufferLimit || nextNormalize()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
            return getCodePointAt(bufferPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
            return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
     * Return the next character in the normalized text and advance
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
     * the iteration position by one.  If the end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
     * of the text has already been reached, {@link #DONE} is returned.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
     * @return The codepoint as an int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
    public int next() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
        if(bufferPos<bufferLimit ||  nextNormalize()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
            int c=getCodePointAt(bufferPos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
            bufferPos+=(c>0xFFFF) ? 2 : 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
            return c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
            return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
     * Return the previous character in the normalized text and decrement
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
     * the iteration position by one.  If the beginning
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
     * of the text has already been reached, {@link #DONE} is returned.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
     * @return The codepoint as an int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
    public int previous() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
        if(bufferPos>0 || previousNormalize()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
            int c=getCodePointAt(bufferPos-1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
            bufferPos-=(c>0xFFFF) ? 2 : 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
            return c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
            return DONE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
     * Reset the index to the beginning of the text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
     * This is equivalent to setIndexOnly(startIndex)).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
    public void reset() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
        text.setIndex(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
        currentIndex=nextIndex=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
        clearBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
     * Set the iteration position in the input text that is being normalized,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
     * without any immediate normalization.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
     * After setIndexOnly(), getIndex() will return the same index that is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
     * specified here.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
     * @param index the desired index in the input text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
    public void setIndexOnly(int index) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
        text.setIndex(index);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
        currentIndex=nextIndex=index; // validates index
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
        clearBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
     * Set the iteration position in the input text that is being normalized
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
     * and return the first normalized character at that position.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
     * <b>Note:</b> This method sets the position in the <em>input</em> text,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
     * while {@link #next} and {@link #previous} iterate through characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
     * in the normalized <em>output</em>.  This means that there is not
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
     * necessarily a one-to-one correspondence between characters returned
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
     * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
     * returned from <tt>setIndex</tt> and {@link #getIndex}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
     * @param index the desired index in the input text->
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
     * @return   the first normalized character that is the result of iterating
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
     *            forward starting at the given index.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
     * @throws IllegalArgumentException if the given index is less than
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
     *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
     * @return The codepoint as an int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
     * @deprecated ICU 3.2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
     * @obsolete ICU 3.2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
     public int setIndex(int index) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
         setIndexOnly(index);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
         return current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
     }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
     * Retrieve the index of the start of the input text. This is the begin
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
     * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
     * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
     * @deprecated ICU 2.2. Use startIndex() instead.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
     * @return The codepoint as an int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
     * @see #startIndex
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
    public int getBeginIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
        return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
     * Retrieve the index of the end of the input text.  This is the end index
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
     * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
     * over which this <tt>Normalizer</tt> is iterating
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
     * @deprecated ICU 2.2. Use endIndex() instead.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
     * @return The codepoint as an int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
     * @see #endIndex
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
    public int getEndIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
        return endIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
     * Retrieve the current iteration position in the input text that is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
     * being normalized.  This method is useful in applications such as
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
     * searching, where you need to be able to determine the position in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
     * the input text that corresponds to a given normalized output character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
     * <b>Note:</b> This method sets the position in the <em>input</em>, while
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
     * {@link #next} and {@link #previous} iterate through characters in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
     * <em>output</em>.  This means that there is not necessarily a one-to-one
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
     * correspondence between characters returned by <tt>next</tt> and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
     * <tt>previous</tt> and the indices passed to and returned from
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
     * <tt>setIndex</tt> and {@link #getIndex}.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
     * @return The current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
    public int getIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
        if(bufferPos<bufferLimit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
            return currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
            return nextIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
     * Retrieve the index of the end of the input text->  This is the end index
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
     * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
     * over which this <tt>Normalizer</tt> is iterating
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
     * @return The current iteration position
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
    public int endIndex() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
        return text.getLength();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
    // Property access methods
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
     * Set the normalization mode for this object.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
     * <b>Note:</b>If the normalization mode is changed while iterating
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
     * over a string, calls to {@link #next} and {@link #previous} may
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
     * return previously buffers characters in the old normalization mode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
     * until the iteration is able to re-sync at the next base character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
     * It is safest to call {@link #setText setText()}, {@link #first},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
     * {@link #last}, etc. after calling <tt>setMode</tt>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
     * @param newMode the new mode for this <tt>Normalizer</tt>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
     * The supported modes are:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
     * <ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
     *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
     *                                  followed by canonical composition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
     *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
     *                                  follwed by canonical composition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
     *  <li>{@link #DECOMP}         - Unicode canonical decomposition
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
     *  <li>{@link #DECOMP_COMPAT}  - Unicode compatibility decomposition.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
     *  <li>{@link #NO_OP}          - Do nothing but return characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
     *                                  from the underlying input text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
     * </ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
     * @see #getMode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
    public void setMode(Mode newMode) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
        mode = newMode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
     * Return the basic operation performed by this <tt>Normalizer</tt>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
     * @see #setMode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
    public Mode getMode() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
        return mode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
     * Set the input text over which this <tt>Normalizer</tt> will iterate.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
     * The iteration position is set to the beginning of the input text->
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
     * @param newText   The new string to be normalized.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
    public void setText(String newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
        UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
        if (newIter == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
            throw new InternalError("Could not create a new UCharacterIterator");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
        text = newIter;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
        reset();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
     * Set the input text over which this <tt>Normalizer</tt> will iterate.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
     * The iteration position is set to the beginning of the input text->
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
     * @param newText   The new string to be normalized.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
     * @stable ICU 2.8
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
    public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
        UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
        if (newIter == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
            throw new InternalError("Could not create a new UCharacterIterator");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
        text = newIter;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
        currentIndex=nextIndex=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
        clearBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
    // Private utility methods
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
    //-------------------------------------------------------------------------
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
    /* backward iteration --------------------------------------------------- */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
     * read backwards and get norm32
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
     * return 0 if the character is <minC
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
     * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
     * surrogate but read second!)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
    private static  long getPrevNorm32(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
                                       int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
                                       int/*unsigned*/ mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
                                       char[] chars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
        long norm32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
        int ch=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
        /* need src.hasPrevious() */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
        if((ch=src.previous()) == UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
        chars[0]=(char)ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
        chars[1]=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
        /* check for a surrogate before getting norm32 to see if we need to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
         * predecrement further */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
        if(chars[0]<minC) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
        } else if(!UTF16.isSurrogate(chars[0])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
            return NormalizerImpl.getNorm32(chars[0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
        } else if(UTF16.isLeadSurrogate(chars[0]) || (src.getIndex()==0)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
            /* unpaired surrogate */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
            chars[1]=(char)src.current();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
        } else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
            norm32=NormalizerImpl.getNorm32(chars[1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
            if((norm32&mask)==0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
                /* all surrogate pairs with this lead surrogate have irrelevant
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
                 * data */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
                return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
                /* norm32 must be a surrogate special */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
                return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
            /* unpaired second surrogate, undo the c2=src.previous() movement */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
            src.moveIndex( 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
    private interface IsPrevBoundary{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
        public boolean isPrevBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
                                      int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
                                      int/*unsigned*/ mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
                                      char[] chars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
    private static final class IsPrevNFDSafe implements IsPrevBoundary{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
        /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
         * for NF*D:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
         * read backwards and check if the lead combining class is 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
         * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
         * surrogate but read second!)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
        public boolean isPrevBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
                                      int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
                                      int/*unsigned*/ ccOrQCMask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
                                      char[] chars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
            return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
                                                          ccOrQCMask, chars),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
                                            ccOrQCMask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
                                            ccOrQCMask& NormalizerImpl.QC_MASK);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
    private static final class IsPrevTrueStarter implements IsPrevBoundary{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
        /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
         * read backwards and check if the character is (or its decomposition
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
         * begins with) a "true starter" (cc==0 and NF*C_YES)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
         * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
         * surrogate but read second!)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
         */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
        public boolean isPrevBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
                                      int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
                                      int/*unsigned*/ ccOrQCMask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
                                      char[] chars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
            long norm32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
            int/*unsigned*/ decompQCMask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
            decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
            norm32=getPrevNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
            return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
    private static int findPreviousIterationBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
                                                     IsPrevBoundary obj,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
                                                     int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
                                                     int/*mask*/ mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
                                                     char[] buffer,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
                                                     int[] startIndex) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
        char[] chars=new char[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
        boolean isBoundary;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
        /* fill the buffer from the end backwards */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
        startIndex[0] = buffer.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
        chars[0]=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
        while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            isBoundary=obj.isPrevBoundary(src, minC, mask, chars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
            /* always write this character to the front of the buffer */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
            /* make sure there is enough space in the buffer */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
            if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
                // grow the buffer
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
                char[] newBuf = new char[buffer.length*2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
                /* move the current buffer contents up */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
                System.arraycopy(buffer,startIndex[0],newBuf,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                                 newBuf.length-(buffer.length-startIndex[0]),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                                 buffer.length-startIndex[0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
                //adjust the startIndex
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
                startIndex[0]+=newBuf.length-buffer.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
                buffer=newBuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
                newBuf=null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
            buffer[--startIndex[0]]=chars[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
            if(chars[1]!=0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
                buffer[--startIndex[0]]=chars[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
            /* stop if this just-copied character is a boundary */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
            if(isBoundary) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
        /* return the length of the buffer contents */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
        return buffer.length-startIndex[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
    private static int previous(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
                                char[] dest, int destStart, int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
                                Mode mode,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
                                boolean doNormalize,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
                                boolean[] pNeededToNormalize,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
                                int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
        IsPrevBoundary isPreviousBoundary;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
        int destLength, bufferLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
        int/*unsigned*/ mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
        int c,c2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
        char minC;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
        int destCapacity = destLimit-destStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
        destLength=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
        if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
            pNeededToNormalize[0]=false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
        minC = (char)mode.getMinC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
        mask = mode.getMask();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
        isPreviousBoundary = mode.getPrevBoundary();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
        if(isPreviousBoundary==null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
            destLength=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
            if((c=src.previous())>=0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
                destLength=1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
                if(UTF16.isTrailSurrogate((char)c)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
                    c2= src.previous();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
                    if(c2!= UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
                        if(UTF16.isLeadSurrogate((char)c2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
                            if(destCapacity>=2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
                                dest[1]=(char)c; // trail surrogate
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
                                destLength=2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
                            // lead surrogate to be written below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
                            c=c2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
                            src.moveIndex(1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
                if(destCapacity>0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
                    dest[0]=(char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
            return destLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
        char[] buffer = new char[100];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
        int[] startIndex= new int[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
        bufferLength=findPreviousIterationBoundary(src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
                                                   isPreviousBoundary,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
                                                   minC, mask,buffer,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
                                                   startIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
        if(bufferLength>0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
            if(doNormalize) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
                destLength=NormalizerBase.normalize(buffer,startIndex[0],
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
                                                startIndex[0]+bufferLength,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
                                                dest, destStart,destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
                                                mode, options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
                if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
                    pNeededToNormalize[0]=(boolean)(destLength!=bufferLength ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
                                                    Utility.arrayRegionMatches(
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
                                                                               buffer,0,dest,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
                                                                               destStart,destLimit
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
                                                                               ));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
                /* just copy the source characters */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
                if(destCapacity>0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
                    System.arraycopy(buffer,startIndex[0],dest,0,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
                                     (bufferLength<destCapacity) ?
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
                                     bufferLength : destCapacity
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
                                     );
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
        return destLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
    /* forward iteration ---------------------------------------------------- */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
     * read forward and check if the character is a next-iteration boundary
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
     * if c2!=0 then (c, c2) is a surrogate pair
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
    private interface IsNextBoundary{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
        boolean isNextBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
                               int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
                               int/*unsigned*/ mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
                               int[] chars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
     * read forward and get norm32
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
     * return 0 if the character is <minC
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
     * if c2!=0 then (c2, c) is a surrogate pair
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
     * always reads complete characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
    private static long /*unsigned*/ getNextNorm32(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
                                                   int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
                                                   int/*unsigned*/ mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
                                                   int[] chars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
        long norm32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
        /* need src.hasNext() to be true */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
        chars[0]=src.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
        chars[1]=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
        if(chars[0]<minC) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
        norm32=NormalizerImpl.getNorm32((char)chars[0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
        if(UTF16.isLeadSurrogate((char)chars[0])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
            if(src.current()!=UCharacterIterator.DONE &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
               UTF16.isTrailSurrogate((char)(chars[1]=src.current()))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
                src.moveIndex(1); /* skip the c2 surrogate */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
                if((norm32&mask)==0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
                    /* irrelevant data */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
                    return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
                    /* norm32 must be a surrogate special */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
                    return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
                /* unmatched surrogate */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
                return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
        return norm32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
     * for NF*D:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
     * read forward and check if the lead combining class is 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
     * if c2!=0 then (c, c2) is a surrogate pair
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
    private static final class IsNextNFDSafe implements IsNextBoundary{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
        public boolean isNextBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
                                      int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
                                      int/*unsigned*/ ccOrQCMask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
                                      int[] chars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
            return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
                                            ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
     * for NF*C:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
     * read forward and check if the character is (or its decomposition begins
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
     * with) a "true starter" (cc==0 and NF*C_YES)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
     * if c2!=0 then (c, c2) is a surrogate pair
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
    private static final class IsNextTrueStarter implements IsNextBoundary{
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
        public boolean isNextBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
                                      int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
                                      int/*unsigned*/ ccOrQCMask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
                                      int[] chars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
            long norm32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
            int/*unsigned*/ decompQCMask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
            decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
            norm32=getNextNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
            return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
    private static int findNextIterationBoundary(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
                                                 IsNextBoundary obj,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
                                                 int/*unsigned*/ minC,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
                                                 int/*unsigned*/ mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
                                                 char[] buffer) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
        if(src.current()==UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
            return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
        /* get one character and ignore its properties */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
        int[] chars = new int[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
        chars[0]=src.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
        buffer[0]=(char)chars[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
        int bufferIndex = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
        if(UTF16.isLeadSurrogate((char)chars[0])&&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
           src.current()!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
            if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
                buffer[bufferIndex++]=(char)chars[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
                src.moveIndex(-1); /* back out the non-trail-surrogate */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
        /* get all following characters until we see a boundary */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
        /* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
         * is part of the string */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
        while( src.current()!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
            if(obj.isNextBoundary(src, minC, mask, chars)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
                /* back out the latest movement to stop at the boundary */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
                src.moveIndex(chars[1]==0 ? -1 : -2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
                if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
                    buffer[bufferIndex++]=(char)chars[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
                    if(chars[1]!=0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
                        buffer[bufferIndex++]=(char)chars[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
                    char[] newBuf = new char[buffer.length*2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
                    System.arraycopy(buffer,0,newBuf,0,bufferIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
                    buffer = newBuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
                    buffer[bufferIndex++]=(char)chars[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
                    if(chars[1]!=0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
                        buffer[bufferIndex++]=(char)chars[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
        /* return the length of the buffer contents */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
        return bufferIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
    private static int next(UCharacterIterator src,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
                            char[] dest, int destStart, int destLimit,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
                            NormalizerBase.Mode mode,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
                            boolean doNormalize,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
                            boolean[] pNeededToNormalize,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
                            int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
        IsNextBoundary isNextBoundary;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
        int /*unsigned*/ mask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
        int /*unsigned*/ bufferLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
        int c,c2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
        char minC;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
        int destCapacity = destLimit - destStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
        int destLength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
        if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
            pNeededToNormalize[0]=false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
        minC = (char)mode.getMinC();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
        mask = mode.getMask();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
        isNextBoundary = mode.getNextBoundary();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
        if(isNextBoundary==null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
            destLength=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
            c=src.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
            if(c!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
                destLength=1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
                if(UTF16.isLeadSurrogate((char)c)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
                    c2= src.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
                    if(c2!= UCharacterIterator.DONE) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
                        if(UTF16.isTrailSurrogate((char)c2)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
                            if(destCapacity>=2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
                                dest[1]=(char)c2; // trail surrogate
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
                                destLength=2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
                            // lead surrogate to be written below
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
                            src.moveIndex(-1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
                if(destCapacity>0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
                    dest[0]=(char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
            return destLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
        char[] buffer=new char[100];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
        int[] startIndex = new int[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
        bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
                                               buffer);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
        if(bufferLength>0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
            if(doNormalize) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
                destLength=mode.normalize(buffer,startIndex[0],bufferLength,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
                                          dest,destStart,destLimit, options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
                if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
                    pNeededToNormalize[0]=(boolean)(destLength!=bufferLength ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
                                                    Utility.arrayRegionMatches(buffer,startIndex[0],
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
                                                                               dest,destStart,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
                                                                               destLength));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
                /* just copy the source characters */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
                if(destCapacity>0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
                    System.arraycopy(buffer,0,dest,destStart,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
                                     Math.min(bufferLength,destCapacity)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
                                     );
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
        return destLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
    private void clearBuffer() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
        bufferLimit=bufferStart=bufferPos=0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
    private boolean nextNormalize() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
        clearBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
        currentIndex=nextIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
        text.setIndex(nextIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
        bufferLimit=next(text,buffer,bufferStart,buffer.length,mode,true,null,options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
        nextIndex=text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
        return (bufferLimit>0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
    private boolean previousNormalize() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
        clearBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
        nextIndex=currentIndex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
        text.setIndex(currentIndex);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
        bufferLimit=previous(text,buffer,bufferStart,buffer.length,mode,true,null,options);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
        currentIndex=text.getIndex();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
        bufferPos = bufferLimit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
        return bufferLimit>0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
    private int getCodePointAt(int index) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
        if( UTF16.isSurrogate(buffer[index])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
            if(UTF16.isLeadSurrogate(buffer[index])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
                if((index+1)<bufferLimit &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
                   UTF16.isTrailSurrogate(buffer[index+1])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
                    return UCharacterProperty.getRawSupplementary(
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
                                                                  buffer[index],
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
                                                                  buffer[index+1]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
                                                                  );
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
            }else if(UTF16.isTrailSurrogate(buffer[index])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
                if(index>0 && UTF16.isLeadSurrogate(buffer[index-1])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
                    return UCharacterProperty.getRawSupplementary(
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
                                                                  buffer[index-1],
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
                                                                  buffer[index]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
                                                                  );
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
        return buffer[index];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
     * Internal API
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
     * @internal
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
    public static boolean isNFSkippable(int c, Mode mode) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
        return mode.isNFSkippable(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
    // Options
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
     * Default option for Unicode 3.2.0 normalization.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
     * Corrigendum 4 was fixed in Unicode 3.2.0 but isn't supported in
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1546
     * IDNA/StringPrep.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
     * The public review issue #29 was fixed in Unicode 4.1.0. Corrigendum 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
     * allowed Unicode 3.2 to 4.0.1 to apply the fix for PRI #29, but it isn't
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
     * supported by IDNA/StringPrep as well as Corrigendum 4.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
    public static final int UNICODE_3_2_0_ORIGINAL =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
                               UNICODE_3_2 |
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
                               NormalizerImpl.WITHOUT_CORRIGENDUM4_CORRECTIONS |
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
                               NormalizerImpl.BEFORE_PRI_29;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1555
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
     * Default option for the latest Unicode normalization. This option is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
     * provided mainly for testing.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
     * The value zero means that normalization is done with the fixes for
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
     *   - Corrigendum 4 (Five CJK Canonical Mapping Errors)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
     *   - Corrigendum 5 (Normalization Idempotency)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1563
    public static final int UNICODE_LATEST = 0x00;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
    // public constructor and methods for java.text.Normalizer and
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1567
    // sun.text.Normalizer
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
     * Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
     * normalized form of a given string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
     * @param str  The string to be normalized.  The normalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
     *              will start at the beginning of the string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
     * @param mode The normalization mode.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1579
    public NormalizerBase(String str, Mode mode) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
          this(str, mode, UNICODE_LATEST);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
     * Normalizes a <code>String</code> using the given normalization form.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
     * @param str      the input string to be normalized.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
     * @param form     the normalization form
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
    public static String normalize(String str, Normalizer.Form form) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
        return normalize(str, form, UNICODE_LATEST);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
     * Normalizes a <code>String</code> using the given normalization form.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
     * @param str      the input string to be normalized.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
     * @param form     the normalization form
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
     * @param options   the optional features to be enabled.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
    public static String normalize(String str, Normalizer.Form form, int options) {
3101
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1601
        int len = str.length();
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1602
        boolean asciiOnly = true;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1603
        if (len < 80) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1604
            for (int i = 0; i < len; i++) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1605
                if (str.charAt(i) > 127) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1606
                    asciiOnly = false;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1607
                    break;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1608
                }
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1609
            }
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1610
        } else {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1611
            char[] a = str.toCharArray();
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1612
            for (int i = 0; i < len; i++) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1613
                if (a[i] > 127) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1614
                    asciiOnly = false;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1615
                    break;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1616
                }
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1617
            }
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1618
        }
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1619
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
        switch (form) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
        case NFC :
3101
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1622
            return asciiOnly ? str : NFC.normalize(str, options);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
        case NFD :
3101
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1624
            return asciiOnly ? str : NFD.normalize(str, options);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
        case NFKC :
3101
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1626
            return asciiOnly ? str : NFKC.normalize(str, options);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
        case NFKD :
3101
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses
peytoia
parents: 2497
diff changeset
  1628
            return asciiOnly ? str : NFKD.normalize(str, options);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
        throw new IllegalArgumentException("Unexpected normalization form: " +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
                                           form);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
     * Test if a string is in a given normalization form.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
     * This is semantically equivalent to source.equals(normalize(source, mode)).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
     * Unlike quickCheck(), this function returns a definitive result,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
     * never a "maybe".
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
     * For NFD, NFKD, and FCD, both functions work exactly the same.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
     * For NFC and NFKC where quickCheck may return "maybe", this function will
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
     * perform further tests to arrive at a true/false result.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
     * @param str       the input string to be checked to see if it is normalized
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
     * @param form      the normalization form
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
     * @param options   the optional features to be enabled.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1648
    public static boolean isNormalized(String str, Normalizer.Form form) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
        return isNormalized(str, form, UNICODE_LATEST);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
     * Test if a string is in a given normalization form.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
     * This is semantically equivalent to source.equals(normalize(source, mode)).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
     * Unlike quickCheck(), this function returns a definitive result,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
     * never a "maybe".
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
     * For NFD, NFKD, and FCD, both functions work exactly the same.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
     * For NFC and NFKC where quickCheck may return "maybe", this function will
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
     * perform further tests to arrive at a true/false result.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
     * @param str       the input string to be checked to see if it is normalized
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
     * @param form      the normalization form
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
     * @param options   the optional features to be enabled.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
    public static boolean isNormalized(String str, Normalizer.Form form, int options) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
        switch (form) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
        case NFC:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
            return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
        case NFD:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
            return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
        case NFKC:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
            return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
        case NFKD:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1674
            return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1675
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1676
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1677
        throw new IllegalArgumentException("Unexpected normalization form: " +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1678
                                           form);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1679
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1680
}