src/java.base/share/classes/sun/text/normalizer/Normalizer2.java
author rgoel
Mon, 11 Mar 2019 17:34:23 +0530
changeset 54054 1def2d745747
parent 50045 d9d55f64d136
permissions -rw-r--r--
8220414: Correct copyright headers in Norm2AllModes.java and Normalizer2.java Summary: Updated copyright headers Reviewed-by: alanb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
31680
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     1
/*
54054
1def2d745747 8220414: Correct copyright headers in Norm2AllModes.java and Normalizer2.java
rgoel
parents: 50045
diff changeset
     2
 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
31680
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     4
 *
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    10
 *
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    15
 * accompanied this code).
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    16
 *
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    20
 *
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    23
 * questions.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    24
 */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    25
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    26
/*
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    27
 *******************************************************************************
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    28
 *   Copyright (C) 2009-2014, International Business Machines
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    29
 *   Corporation and others.  All Rights Reserved.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    30
 *******************************************************************************
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    31
 */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    32
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    33
package sun.text.normalizer;
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    34
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    35
/**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    36
 * Unicode normalization functionality for standard Unicode normalization or
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    37
 * for using custom mapping tables.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    38
 * All instances of this class are unmodifiable/immutable.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    39
 * The Normalizer2 class is not intended for public subclassing.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    40
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    41
 * The primary functions are to produce a normalized string and to detect whether
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    42
 * a string is already normalized.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    43
 * The most commonly used normalization forms are those defined in
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    44
 * http://www.unicode.org/unicode/reports/tr15/
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    45
 * However, this API supports additional normalization forms for specialized purposes.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    46
 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    47
 * and can be used in implementations of UTS #46.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    48
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    49
 * Not only are the standard compose and decompose modes supplied,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    50
 * but additional modes are provided as documented in the Mode enum.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    51
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    52
 * Some of the functions in this class identify normalization boundaries.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    53
 * At a normalization boundary, the portions of the string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    54
 * before it and starting from it do not interact and can be handled independently.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    55
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    56
 * The spanQuickCheckYes() stops at a normalization boundary.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    57
 * When the goal is a normalized string, then the text before the boundary
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    58
 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    59
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    60
 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    61
 * a character is guaranteed to be at a normalization boundary,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    62
 * regardless of context.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    63
 * This is used for moving from one normalization boundary to the next
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    64
 * or preceding boundary, and for performing iterative normalization.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    65
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    66
 * Iterative normalization is useful when only a small portion of a
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    67
 * longer string needs to be processed.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    68
 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    69
 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    70
 * (to process only the substring for which sort key bytes are computed).
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    71
 * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    72
 * The set of normalization boundaries returned by these functions may not be
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    73
 * complete: There may be more boundaries that could be returned.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    74
 * Different functions may return different boundaries.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    75
 * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    76
 * @author Markus W. Scherer
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    77
 */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    78
abstract class Normalizer2 {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    79
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    80
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    81
     * Returns a Normalizer2 instance for Unicode NFC normalization.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    82
     * Same as getInstance(null, "nfc", Mode.COMPOSE).
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    83
     * Returns an unmodifiable singleton instance.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    84
     * @return the requested Normalizer2, if successful
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    85
     * @stable ICU 49
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    86
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    87
    public static Normalizer2 getNFCInstance() {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    88
        return Norm2AllModes.getNFCInstance().comp;
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    89
    }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    90
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    91
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    92
     * Returns a Normalizer2 instance for Unicode NFD normalization.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    93
     * Same as getInstance(null, "nfc", Mode.DECOMPOSE).
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    94
     * Returns an unmodifiable singleton instance.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    95
     * @return the requested Normalizer2, if successful
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    96
     * @stable ICU 49
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    97
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    98
    public static Normalizer2 getNFDInstance() {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
    99
        return Norm2AllModes.getNFCInstance().decomp;
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   100
    }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   101
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   102
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   103
     * Returns a Normalizer2 instance for Unicode NFKC normalization.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   104
     * Same as getInstance(null, "nfkc", Mode.COMPOSE).
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   105
     * Returns an unmodifiable singleton instance.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   106
     * @return the requested Normalizer2, if successful
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   107
     * @stable ICU 49
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   108
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   109
    public static Normalizer2 getNFKCInstance() {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   110
        return Norm2AllModes.getNFKCInstance().comp;
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   111
    }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   112
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   113
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   114
     * Returns a Normalizer2 instance for Unicode NFKD normalization.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   115
     * Same as getInstance(null, "nfkc", Mode.DECOMPOSE).
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   116
     * Returns an unmodifiable singleton instance.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   117
     * @return the requested Normalizer2, if successful
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   118
     * @stable ICU 49
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   119
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   120
    public static Normalizer2 getNFKDInstance() {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   121
        return Norm2AllModes.getNFKCInstance().decomp;
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   122
    }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   123
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   124
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   125
     * Returns the normalized form of the source string.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   126
     * @param src source string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   127
     * @return normalized src
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   128
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   129
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   130
    public String normalize(CharSequence src) {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   131
        if(src instanceof String) {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   132
            // Fastpath: Do not construct a new String if the src is a String
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   133
            // and is already normalized.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   134
            int spanLength=spanQuickCheckYes(src);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   135
            if(spanLength==src.length()) {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   136
                return (String)src;
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   137
            }
50045
d9d55f64d136 8191410: Unicode 10
rgoel
parents: 47216
diff changeset
   138
            if (spanLength != 0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents: 47216
diff changeset
   139
                StringBuilder sb=new StringBuilder(src.length()).append(src, 0, spanLength);
d9d55f64d136 8191410: Unicode 10
rgoel
parents: 47216
diff changeset
   140
                return normalizeSecondAndAppend(sb, src.subSequence(spanLength, src.length())).toString();
d9d55f64d136 8191410: Unicode 10
rgoel
parents: 47216
diff changeset
   141
            }
31680
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   142
        }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   143
        return normalize(src, new StringBuilder(src.length())).toString();
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   144
    }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   145
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   146
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   147
     * Writes the normalized form of the source string to the destination string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   148
     * (replacing its contents) and returns the destination string.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   149
     * The source and destination strings must be different objects.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   150
     * @param src source string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   151
     * @param dest destination string; its contents is replaced with normalized src
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   152
     * @return dest
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   153
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   154
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   155
    public abstract StringBuilder normalize(CharSequence src, StringBuilder dest);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   156
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   157
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   158
     * Writes the normalized form of the source string to the destination Appendable
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   159
     * and returns the destination Appendable.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   160
     * The source and destination strings must be different objects.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   161
     *
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   162
     * <p>Any {@link java.io.IOException} is wrapped into a {@link com.ibm.icu.util.ICUUncheckedIOException}.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   163
     *
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   164
     * @param src source string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   165
     * @param dest destination Appendable; gets normalized src appended
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   166
     * @return dest
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   167
     * @stable ICU 4.6
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   168
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   169
    public abstract Appendable normalize(CharSequence src, Appendable dest);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   170
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   171
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   172
     * Appends the normalized form of the second string to the first string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   173
     * (merging them at the boundary) and returns the first string.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   174
     * The result is normalized if the first string was normalized.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   175
     * The first and second strings must be different objects.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   176
     * @param first string, should be normalized
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   177
     * @param second string, will be normalized
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   178
     * @return first
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   179
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   180
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   181
    public abstract StringBuilder normalizeSecondAndAppend(
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   182
            StringBuilder first, CharSequence second);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   183
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   184
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   185
     * Appends the second string to the first string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   186
     * (merging them at the boundary) and returns the first string.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   187
     * The result is normalized if both the strings were normalized.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   188
     * The first and second strings must be different objects.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   189
     * @param first string, should be normalized
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   190
     * @param second string, should be normalized
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   191
     * @return first
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   192
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   193
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   194
    public abstract StringBuilder append(StringBuilder first, CharSequence second);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   195
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   196
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   197
     * Gets the decomposition mapping of c.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   198
     * Roughly equivalent to normalizing the String form of c
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   199
     * on a DECOMPOSE Normalizer2 instance, but much faster, and except that this function
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   200
     * returns null if c does not have a decomposition mapping in this instance's data.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   201
     * This function is independent of the mode of the Normalizer2.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   202
     * @param c code point
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   203
     * @return c's decomposition mapping, if any; otherwise null
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   204
     * @stable ICU 4.6
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   205
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   206
    public abstract String getDecomposition(int c);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   207
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   208
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   209
     * Gets the combining class of c.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   210
     * The default implementation returns 0
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   211
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   212
     * @param c code point
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   213
     * @return c's combining class
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   214
     * @stable ICU 49
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   215
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   216
    public int getCombiningClass(int c) { return 0; }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   217
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   218
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   219
     * Tests if the string is normalized.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   220
     * Internally, in cases where the quickCheck() method would return "maybe"
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   221
     * (which is only possible for the two COMPOSE modes) this method
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   222
     * resolves to "yes" or "no" to provide a definitive result,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   223
     * at the cost of doing more work in those cases.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   224
     * @param s input string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   225
     * @return true if s is normalized
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   226
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   227
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   228
    public abstract boolean isNormalized(CharSequence s);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   229
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   230
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   231
     * Returns the end of the normalized substring of the input string.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   232
     * In other words, with <code>end=spanQuickCheckYes(s);</code>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   233
     * the substring <code>s.subSequence(0, end)</code>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   234
     * will pass the quick check with a "yes" result.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   235
     * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   236
     * The returned end index is usually one or more characters before the
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   237
     * "no" or "maybe" character: The end index is at a normalization boundary.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   238
     * (See the class documentation for more about normalization boundaries.)
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   239
     * <p>
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   240
     * When the goal is a normalized string and most input strings are expected
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   241
     * to be normalized already, then call this method,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   242
     * and if it returns a prefix shorter than the input string,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   243
     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   244
     * @param s input string
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   245
     * @return "yes" span end index
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   246
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   247
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   248
    public abstract int spanQuickCheckYes(CharSequence s);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   249
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   250
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   251
     * Tests if the character always has a normalization boundary before it,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   252
     * regardless of context.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   253
     * If true, then the character does not normalization-interact with
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   254
     * preceding characters.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   255
     * In other words, a string containing this character can be normalized
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   256
     * by processing portions before this character and starting from this
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   257
     * character independently.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   258
     * This is used for iterative normalization. See the class documentation for details.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   259
     * @param c character to test
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   260
     * @return true if c has a normalization boundary before it
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   261
     * @stable ICU 4.4
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   262
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   263
    public abstract boolean hasBoundaryBefore(int c);
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   264
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   265
    /**
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   266
     * Sole constructor.  (For invocation by subclass constructors,
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   267
     * typically implicit.)
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   268
     * @internal
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   269
     * deprecated This API is ICU internal only.
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   270
     */
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   271
    protected Normalizer2() {
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   272
    }
88c53c2293b4 8032446: Support Unicode 7.0.0 in JDK 9
peytoia
parents:
diff changeset
   273
}