jdk-sandbox: jdk/src/share/classes/sun/text/normalizer/NormalizerBase.java@12c063b39232 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 3101 diff changeset	2	* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
2 90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 3101 diff changeset	7	* published by the Free Software Foundation. Oracle designates this
2 90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 3101 diff changeset	9	* by Oracle in the LICENSE file that accompanied this code.
2 90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 3101 diff changeset	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 3101 diff changeset	22	* or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 3101 diff changeset	23	* questions.
2 90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25	/*
90ce3da70b43 Initial load duke parents: diff changeset	26	*******************************************************************************
2497 903fd9d785ef 6404304: RFE: Unicode 5.1 support peytoia parents: 2 diff changeset	27	* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
2 90ce3da70b43 Initial load duke parents: diff changeset	28	* *
90ce3da70b43 Initial load duke parents: diff changeset	29	* The original version of this source code and documentation is copyrighted *
90ce3da70b43 Initial load duke parents: diff changeset	30	* and owned by IBM, These materials are provided under terms of a License *
90ce3da70b43 Initial load duke parents: diff changeset	31	* Agreement between IBM and Sun. This technology is protected by multiple *
90ce3da70b43 Initial load duke parents: diff changeset	32	* US and International patents. This notice and attribution to IBM may not *
90ce3da70b43 Initial load duke parents: diff changeset	33	* to removed. *
90ce3da70b43 Initial load duke parents: diff changeset	34	*******************************************************************************
90ce3da70b43 Initial load duke parents: diff changeset	35	*/
90ce3da70b43 Initial load duke parents: diff changeset	36
90ce3da70b43 Initial load duke parents: diff changeset	37	package sun.text.normalizer;
90ce3da70b43 Initial load duke parents: diff changeset	38
90ce3da70b43 Initial load duke parents: diff changeset	39	import java.text.CharacterIterator;
90ce3da70b43 Initial load duke parents: diff changeset	40	import java.text.Normalizer;
90ce3da70b43 Initial load duke parents: diff changeset	41
90ce3da70b43 Initial load duke parents: diff changeset	42	/**
90ce3da70b43 Initial load duke parents: diff changeset	43	* Unicode Normalization
90ce3da70b43 Initial load duke parents: diff changeset	44	*
90ce3da70b43 Initial load duke parents: diff changeset	45	* <h2>Unicode normalization API</h2>
90ce3da70b43 Initial load duke parents: diff changeset	46	*
90ce3da70b43 Initial load duke parents: diff changeset	47	* <code>normalize</code> transforms Unicode text into an equivalent composed or
90ce3da70b43 Initial load duke parents: diff changeset	48	* decomposed form, allowing for easier sorting and searching of text.
90ce3da70b43 Initial load duke parents: diff changeset	49	* <code>normalize</code> supports the standard normalization forms described in
90ce3da70b43 Initial load duke parents: diff changeset	50	* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
90ce3da70b43 Initial load duke parents: diff changeset	51	* Unicode Standard Annex #15 — Unicode Normalization Forms</a>.
90ce3da70b43 Initial load duke parents: diff changeset	52	*
90ce3da70b43 Initial load duke parents: diff changeset	53	* Characters with accents or other adornments can be encoded in
90ce3da70b43 Initial load duke parents: diff changeset	54	* several different ways in Unicode. For example, take the character A-acute.
90ce3da70b43 Initial load duke parents: diff changeset	55	* In Unicode, this can be encoded as a single character (the
90ce3da70b43 Initial load duke parents: diff changeset	56	* "composed" form):
90ce3da70b43 Initial load duke parents: diff changeset	57	*
90ce3da70b43 Initial load duke parents: diff changeset	58	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	59	* 00C1 LATIN CAPITAL LETTER A WITH ACUTE
90ce3da70b43 Initial load duke parents: diff changeset	60	* </p>
90ce3da70b43 Initial load duke parents: diff changeset	61	*
90ce3da70b43 Initial load duke parents: diff changeset	62	* or as two separate characters (the "decomposed" form):
90ce3da70b43 Initial load duke parents: diff changeset	63	*
90ce3da70b43 Initial load duke parents: diff changeset	64	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	65	* 0041 LATIN CAPITAL LETTER A
90ce3da70b43 Initial load duke parents: diff changeset	66	* 0301 COMBINING ACUTE ACCENT
90ce3da70b43 Initial load duke parents: diff changeset	67	* </p>
90ce3da70b43 Initial load duke parents: diff changeset	68	*
90ce3da70b43 Initial load duke parents: diff changeset	69	* To a user of your program, however, both of these sequences should be
90ce3da70b43 Initial load duke parents: diff changeset	70	* treated as the same "user-level" character "A with acute accent". When you
90ce3da70b43 Initial load duke parents: diff changeset	71	* are searching or comparing text, you must ensure that these two sequences are
90ce3da70b43 Initial load duke parents: diff changeset	72	* treated equivalently. In addition, you must handle characters with more than
90ce3da70b43 Initial load duke parents: diff changeset	73	* one accent. Sometimes the order of a character's combining accents is
90ce3da70b43 Initial load duke parents: diff changeset	74	* significant, while in other cases accent sequences in different orders are
90ce3da70b43 Initial load duke parents: diff changeset	75	* really equivalent.
90ce3da70b43 Initial load duke parents: diff changeset	76	*
90ce3da70b43 Initial load duke parents: diff changeset	77	* Similarly, the string "ffi" can be encoded as three separate letters:
90ce3da70b43 Initial load duke parents: diff changeset	78	*
90ce3da70b43 Initial load duke parents: diff changeset	79	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	80	* 0066 LATIN SMALL LETTER F
90ce3da70b43 Initial load duke parents: diff changeset	81	* 0066 LATIN SMALL LETTER F
90ce3da70b43 Initial load duke parents: diff changeset	82	* 0069 LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	83	* </p>
90ce3da70b43 Initial load duke parents: diff changeset	84	*
90ce3da70b43 Initial load duke parents: diff changeset	85	* or as the single character
90ce3da70b43 Initial load duke parents: diff changeset	86	*
90ce3da70b43 Initial load duke parents: diff changeset	87	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	88	* FB03 LATIN SMALL LIGATURE FFI
90ce3da70b43 Initial load duke parents: diff changeset	89	* </p>
90ce3da70b43 Initial load duke parents: diff changeset	90	*
90ce3da70b43 Initial load duke parents: diff changeset	91	* The ffi ligature is not a distinct semantic character, and strictly speaking
90ce3da70b43 Initial load duke parents: diff changeset	92	* it shouldn't be in Unicode at all, but it was included for compatibility
90ce3da70b43 Initial load duke parents: diff changeset	93	* with existing character sets that already provided it. The Unicode standard
90ce3da70b43 Initial load duke parents: diff changeset	94	* identifies such characters by giving them "compatibility" decompositions
90ce3da70b43 Initial load duke parents: diff changeset	95	* into the corresponding semantic characters. When sorting and searching, you
90ce3da70b43 Initial load duke parents: diff changeset	96	* will often want to use these mappings.
90ce3da70b43 Initial load duke parents: diff changeset	97	*
90ce3da70b43 Initial load duke parents: diff changeset	98	* <code>normalize</code> helps solve these problems by transforming text into
90ce3da70b43 Initial load duke parents: diff changeset	99	* the canonical composed and decomposed forms as shown in the first example
90ce3da70b43 Initial load duke parents: diff changeset	100	* above. In addition, you can have it perform compatibility decompositions so
90ce3da70b43 Initial load duke parents: diff changeset	101	* that you can treat compatibility characters the same as their equivalents.
90ce3da70b43 Initial load duke parents: diff changeset	102	* Finally, <code>normalize</code> rearranges accents into the proper canonical
90ce3da70b43 Initial load duke parents: diff changeset	103	* order, so that you do not have to worry about accent rearrangement on your
90ce3da70b43 Initial load duke parents: diff changeset	104	* own.
90ce3da70b43 Initial load duke parents: diff changeset	105	*
90ce3da70b43 Initial load duke parents: diff changeset	106	* Form FCD, "Fast C or D", is also designed for collation.
90ce3da70b43 Initial load duke parents: diff changeset	107	* It allows to work on strings that are not necessarily normalized
90ce3da70b43 Initial load duke parents: diff changeset	108	* with an algorithm (like in collation) that works under "canonical closure",
90ce3da70b43 Initial load duke parents: diff changeset	109	* i.e., it treats precomposed characters and their decomposed equivalents the
90ce3da70b43 Initial load duke parents: diff changeset	110	* same.
90ce3da70b43 Initial load duke parents: diff changeset	111	*
90ce3da70b43 Initial load duke parents: diff changeset	112	* It is not a normalization form because it does not provide for uniqueness of
90ce3da70b43 Initial load duke parents: diff changeset	113	* representation. Multiple strings may be canonically equivalent (their NFDs
90ce3da70b43 Initial load duke parents: diff changeset	114	* are identical) and may all conform to FCD without being identical themselves.
90ce3da70b43 Initial load duke parents: diff changeset	115	*
90ce3da70b43 Initial load duke parents: diff changeset	116	* The form is defined such that the "raw decomposition", the recursive
90ce3da70b43 Initial load duke parents: diff changeset	117	* canonical decomposition of each character, results in a string that is
90ce3da70b43 Initial load duke parents: diff changeset	118	* canonically ordered. This means that precomposed characters are allowed for
90ce3da70b43 Initial load duke parents: diff changeset	119	* as long as their decompositions do not need canonical reordering.
90ce3da70b43 Initial load duke parents: diff changeset	120	*
90ce3da70b43 Initial load duke parents: diff changeset	121	* Its advantage for a process like collation is that all NFD and most NFC texts
90ce3da70b43 Initial load duke parents: diff changeset	122	* - and many unnormalized texts - already conform to FCD and do not need to be
90ce3da70b43 Initial load duke parents: diff changeset	123	* normalized (NFD) for such a process. The FCD quick check will return YES for
90ce3da70b43 Initial load duke parents: diff changeset	124	* most strings in practice.
90ce3da70b43 Initial load duke parents: diff changeset	125	*
90ce3da70b43 Initial load duke parents: diff changeset	126	* normalize(FCD) may be implemented with NFD.
90ce3da70b43 Initial load duke parents: diff changeset	127	*
90ce3da70b43 Initial load duke parents: diff changeset	128	* For more details on FCD see the collation design document:
2497 903fd9d785ef 6404304: RFE: Unicode 5.1 support peytoia parents: 2 diff changeset	129	* http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
2 90ce3da70b43 Initial load duke parents: diff changeset	130	*
90ce3da70b43 Initial load duke parents: diff changeset	131	* ICU collation performs either NFD or FCD normalization automatically if
90ce3da70b43 Initial load duke parents: diff changeset	132	* normalization is turned on for the collator object. Beyond collation and
90ce3da70b43 Initial load duke parents: diff changeset	133	* string search, normalized strings may be useful for string equivalence
90ce3da70b43 Initial load duke parents: diff changeset	134	* comparisons, transliteration/transcription, unique representations, etc.
90ce3da70b43 Initial load duke parents: diff changeset	135	*
90ce3da70b43 Initial load duke parents: diff changeset	136	* The W3C generally recommends to exchange texts in NFC.
90ce3da70b43 Initial load duke parents: diff changeset	137	* Note also that most legacy character encodings use only precomposed forms and
90ce3da70b43 Initial load duke parents: diff changeset	138	* often do not encode any combining marks by themselves. For conversion to such
90ce3da70b43 Initial load duke parents: diff changeset	139	* character encodings the Unicode text needs to be normalized to NFC.
90ce3da70b43 Initial load duke parents: diff changeset	140	* For more usage examples, see the Unicode Standard Annex.
90ce3da70b43 Initial load duke parents: diff changeset	141	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	142	*/
90ce3da70b43 Initial load duke parents: diff changeset	143
90ce3da70b43 Initial load duke parents: diff changeset	144	public final class NormalizerBase implements Cloneable {
90ce3da70b43 Initial load duke parents: diff changeset	145
90ce3da70b43 Initial load duke parents: diff changeset	146	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	147	// Private data
90ce3da70b43 Initial load duke parents: diff changeset	148	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	149	private char[] buffer = new char[100];
90ce3da70b43 Initial load duke parents: diff changeset	150	private int bufferStart = 0;
90ce3da70b43 Initial load duke parents: diff changeset	151	private int bufferPos = 0;
90ce3da70b43 Initial load duke parents: diff changeset	152	private int bufferLimit = 0;
90ce3da70b43 Initial load duke parents: diff changeset	153
90ce3da70b43 Initial load duke parents: diff changeset	154	// The input text and our position in it
90ce3da70b43 Initial load duke parents: diff changeset	155	private UCharacterIterator text;
90ce3da70b43 Initial load duke parents: diff changeset	156	private Mode mode = NFC;
90ce3da70b43 Initial load duke parents: diff changeset	157	private int options = 0;
90ce3da70b43 Initial load duke parents: diff changeset	158	private int currentIndex;
90ce3da70b43 Initial load duke parents: diff changeset	159	private int nextIndex;
90ce3da70b43 Initial load duke parents: diff changeset	160
90ce3da70b43 Initial load duke parents: diff changeset	161	/**
90ce3da70b43 Initial load duke parents: diff changeset	162	* Options bit set value to select Unicode 3.2 normalization
90ce3da70b43 Initial load duke parents: diff changeset	163	* (except NormalizationCorrections).
90ce3da70b43 Initial load duke parents: diff changeset	164	* At most one Unicode version can be selected at a time.
90ce3da70b43 Initial load duke parents: diff changeset	165	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	166	*/
90ce3da70b43 Initial load duke parents: diff changeset	167	public static final int UNICODE_3_2=0x20;
90ce3da70b43 Initial load duke parents: diff changeset	168
90ce3da70b43 Initial load duke parents: diff changeset	169	/**
90ce3da70b43 Initial load duke parents: diff changeset	170	* Constant indicating that the end of the iteration has been reached.
90ce3da70b43 Initial load duke parents: diff changeset	171	* This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
90ce3da70b43 Initial load duke parents: diff changeset	172	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	173	*/
90ce3da70b43 Initial load duke parents: diff changeset	174	public static final int DONE = UCharacterIterator.DONE;
90ce3da70b43 Initial load duke parents: diff changeset	175
90ce3da70b43 Initial load duke parents: diff changeset	176	/**
90ce3da70b43 Initial load duke parents: diff changeset	177	* Constants for normalization modes.
90ce3da70b43 Initial load duke parents: diff changeset	178	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	179	*/
90ce3da70b43 Initial load duke parents: diff changeset	180	public static class Mode {
90ce3da70b43 Initial load duke parents: diff changeset	181	private int modeValue;
90ce3da70b43 Initial load duke parents: diff changeset	182	private Mode(int value) {
90ce3da70b43 Initial load duke parents: diff changeset	183	modeValue = value;
90ce3da70b43 Initial load duke parents: diff changeset	184	}
90ce3da70b43 Initial load duke parents: diff changeset	185
90ce3da70b43 Initial load duke parents: diff changeset	186	/**
90ce3da70b43 Initial load duke parents: diff changeset	187	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	188	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	189	*/
90ce3da70b43 Initial load duke parents: diff changeset	190	protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	191	char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	192	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	193	int srcLen = (srcLimit - srcStart);
90ce3da70b43 Initial load duke parents: diff changeset	194	int destLen = (destLimit - destStart);
90ce3da70b43 Initial load duke parents: diff changeset	195	if( srcLen > destLen ) {
90ce3da70b43 Initial load duke parents: diff changeset	196	return srcLen;
90ce3da70b43 Initial load duke parents: diff changeset	197	}
90ce3da70b43 Initial load duke parents: diff changeset	198	System.arraycopy(src,srcStart,dest,destStart,srcLen);
90ce3da70b43 Initial load duke parents: diff changeset	199	return srcLen;
90ce3da70b43 Initial load duke parents: diff changeset	200	}
90ce3da70b43 Initial load duke parents: diff changeset	201
90ce3da70b43 Initial load duke parents: diff changeset	202	/**
90ce3da70b43 Initial load duke parents: diff changeset	203	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	204	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	205	*/
90ce3da70b43 Initial load duke parents: diff changeset	206	protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	207	char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	208	int options) {
90ce3da70b43 Initial load duke parents: diff changeset	209	return normalize( src, srcStart, srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	210	dest,destStart,destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	211	NormalizerImpl.getNX(options)
90ce3da70b43 Initial load duke parents: diff changeset	212	);
90ce3da70b43 Initial load duke parents: diff changeset	213	}
90ce3da70b43 Initial load duke parents: diff changeset	214
90ce3da70b43 Initial load duke parents: diff changeset	215	/**
90ce3da70b43 Initial load duke parents: diff changeset	216	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	217	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	218	*/
90ce3da70b43 Initial load duke parents: diff changeset	219	protected String normalize(String src, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	220	return src;
90ce3da70b43 Initial load duke parents: diff changeset	221	}
90ce3da70b43 Initial load duke parents: diff changeset	222
90ce3da70b43 Initial load duke parents: diff changeset	223	/**
90ce3da70b43 Initial load duke parents: diff changeset	224	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	225	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	226	*/
90ce3da70b43 Initial load duke parents: diff changeset	227	protected int getMinC() {
90ce3da70b43 Initial load duke parents: diff changeset	228	return -1;
90ce3da70b43 Initial load duke parents: diff changeset	229	}
90ce3da70b43 Initial load duke parents: diff changeset	230
90ce3da70b43 Initial load duke parents: diff changeset	231	/**
90ce3da70b43 Initial load duke parents: diff changeset	232	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	233	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	234	*/
90ce3da70b43 Initial load duke parents: diff changeset	235	protected int getMask() {
90ce3da70b43 Initial load duke parents: diff changeset	236	return -1;
90ce3da70b43 Initial load duke parents: diff changeset	237	}
90ce3da70b43 Initial load duke parents: diff changeset	238
90ce3da70b43 Initial load duke parents: diff changeset	239	/**
90ce3da70b43 Initial load duke parents: diff changeset	240	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	241	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	242	*/
90ce3da70b43 Initial load duke parents: diff changeset	243	protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	244	return null;
90ce3da70b43 Initial load duke parents: diff changeset	245	}
90ce3da70b43 Initial load duke parents: diff changeset	246
90ce3da70b43 Initial load duke parents: diff changeset	247	/**
90ce3da70b43 Initial load duke parents: diff changeset	248	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	249	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	250	*/
90ce3da70b43 Initial load duke parents: diff changeset	251	protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	252	return null;
90ce3da70b43 Initial load duke parents: diff changeset	253	}
90ce3da70b43 Initial load duke parents: diff changeset	254
90ce3da70b43 Initial load duke parents: diff changeset	255	/**
90ce3da70b43 Initial load duke parents: diff changeset	256	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	257	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	258	*/
90ce3da70b43 Initial load duke parents: diff changeset	259	protected QuickCheckResult quickCheck(char[] src,int start, int limit,
90ce3da70b43 Initial load duke parents: diff changeset	260	boolean allowMaybe,UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	261	if(allowMaybe) {
90ce3da70b43 Initial load duke parents: diff changeset	262	return MAYBE;
90ce3da70b43 Initial load duke parents: diff changeset	263	}
90ce3da70b43 Initial load duke parents: diff changeset	264	return NO;
90ce3da70b43 Initial load duke parents: diff changeset	265	}
90ce3da70b43 Initial load duke parents: diff changeset	266
90ce3da70b43 Initial load duke parents: diff changeset	267	/**
90ce3da70b43 Initial load duke parents: diff changeset	268	* This method is used for method dispatch
90ce3da70b43 Initial load duke parents: diff changeset	269	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	270	*/
90ce3da70b43 Initial load duke parents: diff changeset	271	protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	272	return true;
90ce3da70b43 Initial load duke parents: diff changeset	273	}
90ce3da70b43 Initial load duke parents: diff changeset	274	}
90ce3da70b43 Initial load duke parents: diff changeset	275
90ce3da70b43 Initial load duke parents: diff changeset	276	/**
90ce3da70b43 Initial load duke parents: diff changeset	277	* No decomposition/composition.
90ce3da70b43 Initial load duke parents: diff changeset	278	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	279	*/
90ce3da70b43 Initial load duke parents: diff changeset	280	public static final Mode NONE = new Mode(1);
90ce3da70b43 Initial load duke parents: diff changeset	281
90ce3da70b43 Initial load duke parents: diff changeset	282	/**
90ce3da70b43 Initial load duke parents: diff changeset	283	* Canonical decomposition.
90ce3da70b43 Initial load duke parents: diff changeset	284	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	285	*/
90ce3da70b43 Initial load duke parents: diff changeset	286	public static final Mode NFD = new NFDMode(2);
90ce3da70b43 Initial load duke parents: diff changeset	287
90ce3da70b43 Initial load duke parents: diff changeset	288	private static final class NFDMode extends Mode {
90ce3da70b43 Initial load duke parents: diff changeset	289	private NFDMode(int value) {
90ce3da70b43 Initial load duke parents: diff changeset	290	super(value);
90ce3da70b43 Initial load duke parents: diff changeset	291	}
90ce3da70b43 Initial load duke parents: diff changeset	292
90ce3da70b43 Initial load duke parents: diff changeset	293	protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	294	char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	295	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	296	int[] trailCC = new int[1];
90ce3da70b43 Initial load duke parents: diff changeset	297	return NormalizerImpl.decompose(src, srcStart,srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	298	dest, destStart,destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	299	false, trailCC,nx);
90ce3da70b43 Initial load duke parents: diff changeset	300	}
90ce3da70b43 Initial load duke parents: diff changeset	301
90ce3da70b43 Initial load duke parents: diff changeset	302	protected String normalize( String src, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	303	return decompose(src,false,options);
90ce3da70b43 Initial load duke parents: diff changeset	304	}
90ce3da70b43 Initial load duke parents: diff changeset	305
90ce3da70b43 Initial load duke parents: diff changeset	306	protected int getMinC() {
90ce3da70b43 Initial load duke parents: diff changeset	307	return NormalizerImpl.MIN_WITH_LEAD_CC;
90ce3da70b43 Initial load duke parents: diff changeset	308	}
90ce3da70b43 Initial load duke parents: diff changeset	309
90ce3da70b43 Initial load duke parents: diff changeset	310	protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	311	return new IsPrevNFDSafe();
90ce3da70b43 Initial load duke parents: diff changeset	312	}
90ce3da70b43 Initial load duke parents: diff changeset	313
90ce3da70b43 Initial load duke parents: diff changeset	314	protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	315	return new IsNextNFDSafe();
90ce3da70b43 Initial load duke parents: diff changeset	316	}
90ce3da70b43 Initial load duke parents: diff changeset	317
90ce3da70b43 Initial load duke parents: diff changeset	318	protected int getMask() {
90ce3da70b43 Initial load duke parents: diff changeset	319	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFD);
90ce3da70b43 Initial load duke parents: diff changeset	320	}
90ce3da70b43 Initial load duke parents: diff changeset	321
90ce3da70b43 Initial load duke parents: diff changeset	322	protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load duke parents: diff changeset	323	int limit,boolean allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	324	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	325	return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load duke parents: diff changeset	326	src, start,limit,
90ce3da70b43 Initial load duke parents: diff changeset	327	NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load duke parents: diff changeset	328	NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
90ce3da70b43 Initial load duke parents: diff changeset	329	),
90ce3da70b43 Initial load duke parents: diff changeset	330	NormalizerImpl.QC_NFD,
90ce3da70b43 Initial load duke parents: diff changeset	331	0,
90ce3da70b43 Initial load duke parents: diff changeset	332	allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	333	nx
90ce3da70b43 Initial load duke parents: diff changeset	334	);
90ce3da70b43 Initial load duke parents: diff changeset	335	}
90ce3da70b43 Initial load duke parents: diff changeset	336
90ce3da70b43 Initial load duke parents: diff changeset	337	protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	338	return NormalizerImpl.isNFSkippable(c,this,
90ce3da70b43 Initial load duke parents: diff changeset	339	(NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFD)
90ce3da70b43 Initial load duke parents: diff changeset	340	);
90ce3da70b43 Initial load duke parents: diff changeset	341	}
90ce3da70b43 Initial load duke parents: diff changeset	342	}
90ce3da70b43 Initial load duke parents: diff changeset	343
90ce3da70b43 Initial load duke parents: diff changeset	344	/**
90ce3da70b43 Initial load duke parents: diff changeset	345	* Compatibility decomposition.
90ce3da70b43 Initial load duke parents: diff changeset	346	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	347	*/
90ce3da70b43 Initial load duke parents: diff changeset	348	public static final Mode NFKD = new NFKDMode(3);
90ce3da70b43 Initial load duke parents: diff changeset	349
90ce3da70b43 Initial load duke parents: diff changeset	350	private static final class NFKDMode extends Mode {
90ce3da70b43 Initial load duke parents: diff changeset	351	private NFKDMode(int value) {
90ce3da70b43 Initial load duke parents: diff changeset	352	super(value);
90ce3da70b43 Initial load duke parents: diff changeset	353	}
90ce3da70b43 Initial load duke parents: diff changeset	354
90ce3da70b43 Initial load duke parents: diff changeset	355	protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	356	char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	357	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	358	int[] trailCC = new int[1];
90ce3da70b43 Initial load duke parents: diff changeset	359	return NormalizerImpl.decompose(src, srcStart,srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	360	dest, destStart,destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	361	true, trailCC, nx);
90ce3da70b43 Initial load duke parents: diff changeset	362	}
90ce3da70b43 Initial load duke parents: diff changeset	363
90ce3da70b43 Initial load duke parents: diff changeset	364	protected String normalize( String src, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	365	return decompose(src,true,options);
90ce3da70b43 Initial load duke parents: diff changeset	366	}
90ce3da70b43 Initial load duke parents: diff changeset	367
90ce3da70b43 Initial load duke parents: diff changeset	368	protected int getMinC() {
90ce3da70b43 Initial load duke parents: diff changeset	369	return NormalizerImpl.MIN_WITH_LEAD_CC;
90ce3da70b43 Initial load duke parents: diff changeset	370	}
90ce3da70b43 Initial load duke parents: diff changeset	371
90ce3da70b43 Initial load duke parents: diff changeset	372	protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	373	return new IsPrevNFDSafe();
90ce3da70b43 Initial load duke parents: diff changeset	374	}
90ce3da70b43 Initial load duke parents: diff changeset	375
90ce3da70b43 Initial load duke parents: diff changeset	376	protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	377	return new IsNextNFDSafe();
90ce3da70b43 Initial load duke parents: diff changeset	378	}
90ce3da70b43 Initial load duke parents: diff changeset	379
90ce3da70b43 Initial load duke parents: diff changeset	380	protected int getMask() {
90ce3da70b43 Initial load duke parents: diff changeset	381	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFKD);
90ce3da70b43 Initial load duke parents: diff changeset	382	}
90ce3da70b43 Initial load duke parents: diff changeset	383
90ce3da70b43 Initial load duke parents: diff changeset	384	protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load duke parents: diff changeset	385	int limit,boolean allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	386	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	387	return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load duke parents: diff changeset	388	src,start,limit,
90ce3da70b43 Initial load duke parents: diff changeset	389	NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load duke parents: diff changeset	390	NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
90ce3da70b43 Initial load duke parents: diff changeset	391	),
90ce3da70b43 Initial load duke parents: diff changeset	392	NormalizerImpl.QC_NFKD,
90ce3da70b43 Initial load duke parents: diff changeset	393	NormalizerImpl.OPTIONS_COMPAT,
90ce3da70b43 Initial load duke parents: diff changeset	394	allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	395	nx
90ce3da70b43 Initial load duke parents: diff changeset	396	);
90ce3da70b43 Initial load duke parents: diff changeset	397	}
90ce3da70b43 Initial load duke parents: diff changeset	398
90ce3da70b43 Initial load duke parents: diff changeset	399	protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	400	return NormalizerImpl.isNFSkippable(c, this,
90ce3da70b43 Initial load duke parents: diff changeset	401	(NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFKD)
90ce3da70b43 Initial load duke parents: diff changeset	402	);
90ce3da70b43 Initial load duke parents: diff changeset	403	}
90ce3da70b43 Initial load duke parents: diff changeset	404	}
90ce3da70b43 Initial load duke parents: diff changeset	405
90ce3da70b43 Initial load duke parents: diff changeset	406	/**
90ce3da70b43 Initial load duke parents: diff changeset	407	* Canonical decomposition followed by canonical composition.
90ce3da70b43 Initial load duke parents: diff changeset	408	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	409	*/
90ce3da70b43 Initial load duke parents: diff changeset	410	public static final Mode NFC = new NFCMode(4);
90ce3da70b43 Initial load duke parents: diff changeset	411
90ce3da70b43 Initial load duke parents: diff changeset	412	private static final class NFCMode extends Mode{
90ce3da70b43 Initial load duke parents: diff changeset	413	private NFCMode(int value) {
90ce3da70b43 Initial load duke parents: diff changeset	414	super(value);
90ce3da70b43 Initial load duke parents: diff changeset	415	}
90ce3da70b43 Initial load duke parents: diff changeset	416	protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	417	char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	418	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	419	return NormalizerImpl.compose( src, srcStart, srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	420	dest,destStart,destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	421	0, nx);
90ce3da70b43 Initial load duke parents: diff changeset	422	}
90ce3da70b43 Initial load duke parents: diff changeset	423
90ce3da70b43 Initial load duke parents: diff changeset	424	protected String normalize( String src, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	425	return compose(src, false, options);
90ce3da70b43 Initial load duke parents: diff changeset	426	}
90ce3da70b43 Initial load duke parents: diff changeset	427
90ce3da70b43 Initial load duke parents: diff changeset	428	protected int getMinC() {
90ce3da70b43 Initial load duke parents: diff changeset	429	return NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load duke parents: diff changeset	430	NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
90ce3da70b43 Initial load duke parents: diff changeset	431	);
90ce3da70b43 Initial load duke parents: diff changeset	432	}
90ce3da70b43 Initial load duke parents: diff changeset	433	protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	434	return new IsPrevTrueStarter();
90ce3da70b43 Initial load duke parents: diff changeset	435	}
90ce3da70b43 Initial load duke parents: diff changeset	436	protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	437	return new IsNextTrueStarter();
90ce3da70b43 Initial load duke parents: diff changeset	438	}
90ce3da70b43 Initial load duke parents: diff changeset	439	protected int getMask() {
90ce3da70b43 Initial load duke parents: diff changeset	440	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFC);
90ce3da70b43 Initial load duke parents: diff changeset	441	}
90ce3da70b43 Initial load duke parents: diff changeset	442	protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load duke parents: diff changeset	443	int limit,boolean allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	444	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	445	return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load duke parents: diff changeset	446	src,start,limit,
90ce3da70b43 Initial load duke parents: diff changeset	447	NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load duke parents: diff changeset	448	NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
90ce3da70b43 Initial load duke parents: diff changeset	449	),
90ce3da70b43 Initial load duke parents: diff changeset	450	NormalizerImpl.QC_NFC,
90ce3da70b43 Initial load duke parents: diff changeset	451	0,
90ce3da70b43 Initial load duke parents: diff changeset	452	allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	453	nx
90ce3da70b43 Initial load duke parents: diff changeset	454	);
90ce3da70b43 Initial load duke parents: diff changeset	455	}
90ce3da70b43 Initial load duke parents: diff changeset	456	protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	457	return NormalizerImpl.isNFSkippable(c,this,
90ce3da70b43 Initial load duke parents: diff changeset	458	( NormalizerImpl.CC_MASK\|NormalizerImpl.COMBINES_ANY\|
90ce3da70b43 Initial load duke parents: diff changeset	459	(NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
90ce3da70b43 Initial load duke parents: diff changeset	460	)
90ce3da70b43 Initial load duke parents: diff changeset	461	);
90ce3da70b43 Initial load duke parents: diff changeset	462	}
90ce3da70b43 Initial load duke parents: diff changeset	463	};
90ce3da70b43 Initial load duke parents: diff changeset	464
90ce3da70b43 Initial load duke parents: diff changeset	465	/**
90ce3da70b43 Initial load duke parents: diff changeset	466	* Compatibility decomposition followed by canonical composition.
90ce3da70b43 Initial load duke parents: diff changeset	467	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	468	*/
90ce3da70b43 Initial load duke parents: diff changeset	469	public static final Mode NFKC =new NFKCMode(5);
90ce3da70b43 Initial load duke parents: diff changeset	470
90ce3da70b43 Initial load duke parents: diff changeset	471	private static final class NFKCMode extends Mode{
90ce3da70b43 Initial load duke parents: diff changeset	472	private NFKCMode(int value) {
90ce3da70b43 Initial load duke parents: diff changeset	473	super(value);
90ce3da70b43 Initial load duke parents: diff changeset	474	}
90ce3da70b43 Initial load duke parents: diff changeset	475	protected int normalize(char[] src, int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	476	char[] dest,int destStart,int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	477	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	478	return NormalizerImpl.compose(src, srcStart,srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	479	dest, destStart,destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	480	NormalizerImpl.OPTIONS_COMPAT, nx);
90ce3da70b43 Initial load duke parents: diff changeset	481	}
90ce3da70b43 Initial load duke parents: diff changeset	482
90ce3da70b43 Initial load duke parents: diff changeset	483	protected String normalize( String src, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	484	return compose(src, true, options);
90ce3da70b43 Initial load duke parents: diff changeset	485	}
90ce3da70b43 Initial load duke parents: diff changeset	486	protected int getMinC() {
90ce3da70b43 Initial load duke parents: diff changeset	487	return NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load duke parents: diff changeset	488	NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
90ce3da70b43 Initial load duke parents: diff changeset	489	);
90ce3da70b43 Initial load duke parents: diff changeset	490	}
90ce3da70b43 Initial load duke parents: diff changeset	491	protected IsPrevBoundary getPrevBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	492	return new IsPrevTrueStarter();
90ce3da70b43 Initial load duke parents: diff changeset	493	}
90ce3da70b43 Initial load duke parents: diff changeset	494	protected IsNextBoundary getNextBoundary() {
90ce3da70b43 Initial load duke parents: diff changeset	495	return new IsNextTrueStarter();
90ce3da70b43 Initial load duke parents: diff changeset	496	}
90ce3da70b43 Initial load duke parents: diff changeset	497	protected int getMask() {
90ce3da70b43 Initial load duke parents: diff changeset	498	return (NormalizerImpl.CC_MASK\|NormalizerImpl.QC_NFKC);
90ce3da70b43 Initial load duke parents: diff changeset	499	}
90ce3da70b43 Initial load duke parents: diff changeset	500	protected QuickCheckResult quickCheck(char[] src,int start,
90ce3da70b43 Initial load duke parents: diff changeset	501	int limit,boolean allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	502	UnicodeSet nx) {
90ce3da70b43 Initial load duke parents: diff changeset	503	return NormalizerImpl.quickCheck(
90ce3da70b43 Initial load duke parents: diff changeset	504	src,start,limit,
90ce3da70b43 Initial load duke parents: diff changeset	505	NormalizerImpl.getFromIndexesArr(
90ce3da70b43 Initial load duke parents: diff changeset	506	NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
90ce3da70b43 Initial load duke parents: diff changeset	507	),
90ce3da70b43 Initial load duke parents: diff changeset	508	NormalizerImpl.QC_NFKC,
90ce3da70b43 Initial load duke parents: diff changeset	509	NormalizerImpl.OPTIONS_COMPAT,
90ce3da70b43 Initial load duke parents: diff changeset	510	allowMaybe,
90ce3da70b43 Initial load duke parents: diff changeset	511	nx
90ce3da70b43 Initial load duke parents: diff changeset	512	);
90ce3da70b43 Initial load duke parents: diff changeset	513	}
90ce3da70b43 Initial load duke parents: diff changeset	514	protected boolean isNFSkippable(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	515	return NormalizerImpl.isNFSkippable(c, this,
90ce3da70b43 Initial load duke parents: diff changeset	516	( NormalizerImpl.CC_MASK\|NormalizerImpl.COMBINES_ANY\|
90ce3da70b43 Initial load duke parents: diff changeset	517	(NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
90ce3da70b43 Initial load duke parents: diff changeset	518	)
90ce3da70b43 Initial load duke parents: diff changeset	519	);
90ce3da70b43 Initial load duke parents: diff changeset	520	}
90ce3da70b43 Initial load duke parents: diff changeset	521	};
90ce3da70b43 Initial load duke parents: diff changeset	522
90ce3da70b43 Initial load duke parents: diff changeset	523	/**
90ce3da70b43 Initial load duke parents: diff changeset	524	* Result values for quickCheck().
90ce3da70b43 Initial load duke parents: diff changeset	525	* For details see Unicode Technical Report 15.
90ce3da70b43 Initial load duke parents: diff changeset	526	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	527	*/
90ce3da70b43 Initial load duke parents: diff changeset	528	public static final class QuickCheckResult{
90ce3da70b43 Initial load duke parents: diff changeset	529	private int resultValue;
90ce3da70b43 Initial load duke parents: diff changeset	530	private QuickCheckResult(int value) {
90ce3da70b43 Initial load duke parents: diff changeset	531	resultValue=value;
90ce3da70b43 Initial load duke parents: diff changeset	532	}
90ce3da70b43 Initial load duke parents: diff changeset	533	}
90ce3da70b43 Initial load duke parents: diff changeset	534	/**
90ce3da70b43 Initial load duke parents: diff changeset	535	* Indicates that string is not in the normalized format
90ce3da70b43 Initial load duke parents: diff changeset	536	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	537	*/
90ce3da70b43 Initial load duke parents: diff changeset	538	public static final QuickCheckResult NO = new QuickCheckResult(0);
90ce3da70b43 Initial load duke parents: diff changeset	539
90ce3da70b43 Initial load duke parents: diff changeset	540	/**
90ce3da70b43 Initial load duke parents: diff changeset	541	* Indicates that string is in the normalized format
90ce3da70b43 Initial load duke parents: diff changeset	542	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	543	*/
90ce3da70b43 Initial load duke parents: diff changeset	544	public static final QuickCheckResult YES = new QuickCheckResult(1);
90ce3da70b43 Initial load duke parents: diff changeset	545
90ce3da70b43 Initial load duke parents: diff changeset	546	/**
90ce3da70b43 Initial load duke parents: diff changeset	547	* Indicates it cannot be determined if string is in the normalized
90ce3da70b43 Initial load duke parents: diff changeset	548	* format without further thorough checks.
90ce3da70b43 Initial load duke parents: diff changeset	549	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	550	*/
90ce3da70b43 Initial load duke parents: diff changeset	551	public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
90ce3da70b43 Initial load duke parents: diff changeset	552
90ce3da70b43 Initial load duke parents: diff changeset	553	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	554	// Constructors
90ce3da70b43 Initial load duke parents: diff changeset	555	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	556
90ce3da70b43 Initial load duke parents: diff changeset	557	/**
90ce3da70b43 Initial load duke parents: diff changeset	558	* Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load duke parents: diff changeset	559	* normalized form of a given string.
90ce3da70b43 Initial load duke parents: diff changeset	560	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	561	* The <tt>options</tt> parameter specifies which optional
90ce3da70b43 Initial load duke parents: diff changeset	562	* <tt>Normalizer</tt> features are to be enabled for this object.
90ce3da70b43 Initial load duke parents: diff changeset	563	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	564	* @param str The string to be normalized. The normalization
90ce3da70b43 Initial load duke parents: diff changeset	565	* will start at the beginning of the string.
90ce3da70b43 Initial load duke parents: diff changeset	566	*
90ce3da70b43 Initial load duke parents: diff changeset	567	* @param mode The normalization mode.
90ce3da70b43 Initial load duke parents: diff changeset	568	*
90ce3da70b43 Initial load duke parents: diff changeset	569	* @param opt Any optional features to be enabled.
90ce3da70b43 Initial load duke parents: diff changeset	570	* Currently the only available option is {@link #UNICODE_3_2}.
90ce3da70b43 Initial load duke parents: diff changeset	571	* If you want the default behavior corresponding to one of the
90ce3da70b43 Initial load duke parents: diff changeset	572	* standard Unicode Normalization Forms, use 0 for this argument.
90ce3da70b43 Initial load duke parents: diff changeset	573	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	574	*/
90ce3da70b43 Initial load duke parents: diff changeset	575	public NormalizerBase(String str, Mode mode, int opt) {
90ce3da70b43 Initial load duke parents: diff changeset	576	this.text = UCharacterIterator.getInstance(str);
90ce3da70b43 Initial load duke parents: diff changeset	577	this.mode = mode;
90ce3da70b43 Initial load duke parents: diff changeset	578	this.options=opt;
90ce3da70b43 Initial load duke parents: diff changeset	579	}
90ce3da70b43 Initial load duke parents: diff changeset	580
90ce3da70b43 Initial load duke parents: diff changeset	581	/**
90ce3da70b43 Initial load duke parents: diff changeset	582	* Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load duke parents: diff changeset	583	* normalized form of the given text.
90ce3da70b43 Initial load duke parents: diff changeset	584	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	585	* @param iter The input text to be normalized. The normalization
90ce3da70b43 Initial load duke parents: diff changeset	586	* will start at the beginning of the string.
90ce3da70b43 Initial load duke parents: diff changeset	587	*
90ce3da70b43 Initial load duke parents: diff changeset	588	* @param mode The normalization mode.
90ce3da70b43 Initial load duke parents: diff changeset	589	*/
90ce3da70b43 Initial load duke parents: diff changeset	590	public NormalizerBase(CharacterIterator iter, Mode mode) {
90ce3da70b43 Initial load duke parents: diff changeset	591	this(iter, mode, UNICODE_LATEST);
90ce3da70b43 Initial load duke parents: diff changeset	592	}
90ce3da70b43 Initial load duke parents: diff changeset	593
90ce3da70b43 Initial load duke parents: diff changeset	594	/**
90ce3da70b43 Initial load duke parents: diff changeset	595	* Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load duke parents: diff changeset	596	* normalized form of the given text.
90ce3da70b43 Initial load duke parents: diff changeset	597	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	598	* @param iter The input text to be normalized. The normalization
90ce3da70b43 Initial load duke parents: diff changeset	599	* will start at the beginning of the string.
90ce3da70b43 Initial load duke parents: diff changeset	600	*
90ce3da70b43 Initial load duke parents: diff changeset	601	* @param mode The normalization mode.
90ce3da70b43 Initial load duke parents: diff changeset	602	*
90ce3da70b43 Initial load duke parents: diff changeset	603	* @param opt Any optional features to be enabled.
90ce3da70b43 Initial load duke parents: diff changeset	604	* Currently the only available option is {@link #UNICODE_3_2}.
90ce3da70b43 Initial load duke parents: diff changeset	605	* If you want the default behavior corresponding to one of the
90ce3da70b43 Initial load duke parents: diff changeset	606	* standard Unicode Normalization Forms, use 0 for this argument.
90ce3da70b43 Initial load duke parents: diff changeset	607	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	608	*/
90ce3da70b43 Initial load duke parents: diff changeset	609	public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
90ce3da70b43 Initial load duke parents: diff changeset	610	this.text = UCharacterIterator.getInstance(
90ce3da70b43 Initial load duke parents: diff changeset	611	(CharacterIterator)iter.clone()
90ce3da70b43 Initial load duke parents: diff changeset	612	);
90ce3da70b43 Initial load duke parents: diff changeset	613	this.mode = mode;
90ce3da70b43 Initial load duke parents: diff changeset	614	this.options = opt;
90ce3da70b43 Initial load duke parents: diff changeset	615	}
90ce3da70b43 Initial load duke parents: diff changeset	616
90ce3da70b43 Initial load duke parents: diff changeset	617	/**
90ce3da70b43 Initial load duke parents: diff changeset	618	* Clones this <tt>Normalizer</tt> object. All properties of this
90ce3da70b43 Initial load duke parents: diff changeset	619	* object are duplicated in the new object, including the cloning of any
90ce3da70b43 Initial load duke parents: diff changeset	620	* {@link CharacterIterator} that was passed in to the constructor
90ce3da70b43 Initial load duke parents: diff changeset	621	* or to {@link #setText(CharacterIterator) setText}.
90ce3da70b43 Initial load duke parents: diff changeset	622	* However, the text storage underlying
90ce3da70b43 Initial load duke parents: diff changeset	623	* the <tt>CharacterIterator</tt> is not duplicated unless the
90ce3da70b43 Initial load duke parents: diff changeset	624	* iterator's <tt>clone</tt> method does so.
90ce3da70b43 Initial load duke parents: diff changeset	625	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	626	*/
90ce3da70b43 Initial load duke parents: diff changeset	627	public Object clone() {
90ce3da70b43 Initial load duke parents: diff changeset	628	try {
90ce3da70b43 Initial load duke parents: diff changeset	629	NormalizerBase copy = (NormalizerBase) super.clone();
90ce3da70b43 Initial load duke parents: diff changeset	630	copy.text = (UCharacterIterator) text.clone();
90ce3da70b43 Initial load duke parents: diff changeset	631	//clone the internal buffer
90ce3da70b43 Initial load duke parents: diff changeset	632	if (buffer != null) {
90ce3da70b43 Initial load duke parents: diff changeset	633	copy.buffer = new char[buffer.length];
90ce3da70b43 Initial load duke parents: diff changeset	634	System.arraycopy(buffer,0,copy.buffer,0,buffer.length);
90ce3da70b43 Initial load duke parents: diff changeset	635	}
90ce3da70b43 Initial load duke parents: diff changeset	636	return copy;
90ce3da70b43 Initial load duke parents: diff changeset	637	}
90ce3da70b43 Initial load duke parents: diff changeset	638	catch (CloneNotSupportedException e) {
10419 12c063b39232 7084245: Update usages of InternalError to use exception chaining sherman parents: 5506 diff changeset	639	throw new InternalError(e.toString(), e);
2 90ce3da70b43 Initial load duke parents: diff changeset	640	}
90ce3da70b43 Initial load duke parents: diff changeset	641	}
90ce3da70b43 Initial load duke parents: diff changeset	642
90ce3da70b43 Initial load duke parents: diff changeset	643	//--------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	644	// Static Utility methods
90ce3da70b43 Initial load duke parents: diff changeset	645	//--------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	646
90ce3da70b43 Initial load duke parents: diff changeset	647	/**
90ce3da70b43 Initial load duke parents: diff changeset	648	* Compose a string.
90ce3da70b43 Initial load duke parents: diff changeset	649	* The string will be composed to according the the specified mode.
90ce3da70b43 Initial load duke parents: diff changeset	650	* @param str The string to compose.
90ce3da70b43 Initial load duke parents: diff changeset	651	* @param compat If true the string will be composed accoding to
90ce3da70b43 Initial load duke parents: diff changeset	652	* NFKC rules and if false will be composed according to
90ce3da70b43 Initial load duke parents: diff changeset	653	* NFC rules.
90ce3da70b43 Initial load duke parents: diff changeset	654	* @param options The only recognized option is UNICODE_3_2
90ce3da70b43 Initial load duke parents: diff changeset	655	* @return String The composed string
90ce3da70b43 Initial load duke parents: diff changeset	656	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	657	*/
90ce3da70b43 Initial load duke parents: diff changeset	658	public static String compose(String str, boolean compat, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	659
90ce3da70b43 Initial load duke parents: diff changeset	660	char[] dest, src;
90ce3da70b43 Initial load duke parents: diff changeset	661	if (options == UNICODE_3_2_0_ORIGINAL) {
90ce3da70b43 Initial load duke parents: diff changeset	662	String mappedStr = NormalizerImpl.convert(str);
90ce3da70b43 Initial load duke parents: diff changeset	663	dest = new char[mappedStr.length()*MAX_BUF_SIZE_COMPOSE];
90ce3da70b43 Initial load duke parents: diff changeset	664	src = mappedStr.toCharArray();
90ce3da70b43 Initial load duke parents: diff changeset	665	} else {
90ce3da70b43 Initial load duke parents: diff changeset	666	dest = new char[str.length()*MAX_BUF_SIZE_COMPOSE];
90ce3da70b43 Initial load duke parents: diff changeset	667	src = str.toCharArray();
90ce3da70b43 Initial load duke parents: diff changeset	668	}
90ce3da70b43 Initial load duke parents: diff changeset	669	int destSize=0;
90ce3da70b43 Initial load duke parents: diff changeset	670
90ce3da70b43 Initial load duke parents: diff changeset	671	UnicodeSet nx = NormalizerImpl.getNX(options);
90ce3da70b43 Initial load duke parents: diff changeset	672
90ce3da70b43 Initial load duke parents: diff changeset	673	/* reset options bits that should only be set here or inside compose() */
90ce3da70b43 Initial load duke parents: diff changeset	674	options&=~(NormalizerImpl.OPTIONS_SETS_MASK\|NormalizerImpl.OPTIONS_COMPAT\|NormalizerImpl.OPTIONS_COMPOSE_CONTIGUOUS);
90ce3da70b43 Initial load duke parents: diff changeset	675
90ce3da70b43 Initial load duke parents: diff changeset	676	if(compat) {
90ce3da70b43 Initial load duke parents: diff changeset	677	options\|=NormalizerImpl.OPTIONS_COMPAT;
90ce3da70b43 Initial load duke parents: diff changeset	678	}
90ce3da70b43 Initial load duke parents: diff changeset	679
90ce3da70b43 Initial load duke parents: diff changeset	680	for(;;) {
90ce3da70b43 Initial load duke parents: diff changeset	681	destSize=NormalizerImpl.compose(src,0,src.length,
90ce3da70b43 Initial load duke parents: diff changeset	682	dest,0,dest.length,options,
90ce3da70b43 Initial load duke parents: diff changeset	683	nx);
90ce3da70b43 Initial load duke parents: diff changeset	684	if(destSize<=dest.length) {
90ce3da70b43 Initial load duke parents: diff changeset	685	return new String(dest,0,destSize);
90ce3da70b43 Initial load duke parents: diff changeset	686	} else {
90ce3da70b43 Initial load duke parents: diff changeset	687	dest = new char[destSize];
90ce3da70b43 Initial load duke parents: diff changeset	688	}
90ce3da70b43 Initial load duke parents: diff changeset	689	}
90ce3da70b43 Initial load duke parents: diff changeset	690	}
90ce3da70b43 Initial load duke parents: diff changeset	691
90ce3da70b43 Initial load duke parents: diff changeset	692	private static final int MAX_BUF_SIZE_COMPOSE = 2;
90ce3da70b43 Initial load duke parents: diff changeset	693	private static final int MAX_BUF_SIZE_DECOMPOSE = 3;
90ce3da70b43 Initial load duke parents: diff changeset	694
90ce3da70b43 Initial load duke parents: diff changeset	695	/**
90ce3da70b43 Initial load duke parents: diff changeset	696	* Decompose a string.
90ce3da70b43 Initial load duke parents: diff changeset	697	* The string will be decomposed to according the the specified mode.
90ce3da70b43 Initial load duke parents: diff changeset	698	* @param str The string to decompose.
90ce3da70b43 Initial load duke parents: diff changeset	699	* @param compat If true the string will be decomposed accoding to NFKD
90ce3da70b43 Initial load duke parents: diff changeset	700	* rules and if false will be decomposed according to NFD
90ce3da70b43 Initial load duke parents: diff changeset	701	* rules.
90ce3da70b43 Initial load duke parents: diff changeset	702	* @return String The decomposed string
90ce3da70b43 Initial load duke parents: diff changeset	703	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	704	*/
90ce3da70b43 Initial load duke parents: diff changeset	705	public static String decompose(String str, boolean compat) {
90ce3da70b43 Initial load duke parents: diff changeset	706	return decompose(str,compat,UNICODE_LATEST);
90ce3da70b43 Initial load duke parents: diff changeset	707	}
90ce3da70b43 Initial load duke parents: diff changeset	708
90ce3da70b43 Initial load duke parents: diff changeset	709	/**
90ce3da70b43 Initial load duke parents: diff changeset	710	* Decompose a string.
90ce3da70b43 Initial load duke parents: diff changeset	711	* The string will be decomposed to according the the specified mode.
90ce3da70b43 Initial load duke parents: diff changeset	712	* @param str The string to decompose.
90ce3da70b43 Initial load duke parents: diff changeset	713	* @param compat If true the string will be decomposed accoding to NFKD
90ce3da70b43 Initial load duke parents: diff changeset	714	* rules and if false will be decomposed according to NFD
90ce3da70b43 Initial load duke parents: diff changeset	715	* rules.
90ce3da70b43 Initial load duke parents: diff changeset	716	* @param options The normalization options, ORed together (0 for no options).
90ce3da70b43 Initial load duke parents: diff changeset	717	* @return String The decomposed string
90ce3da70b43 Initial load duke parents: diff changeset	718	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	719	*/
90ce3da70b43 Initial load duke parents: diff changeset	720	public static String decompose(String str, boolean compat, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	721
90ce3da70b43 Initial load duke parents: diff changeset	722	int[] trailCC = new int[1];
90ce3da70b43 Initial load duke parents: diff changeset	723	int destSize=0;
90ce3da70b43 Initial load duke parents: diff changeset	724	UnicodeSet nx = NormalizerImpl.getNX(options);
90ce3da70b43 Initial load duke parents: diff changeset	725	char[] dest;
90ce3da70b43 Initial load duke parents: diff changeset	726
90ce3da70b43 Initial load duke parents: diff changeset	727	if (options == UNICODE_3_2_0_ORIGINAL) {
90ce3da70b43 Initial load duke parents: diff changeset	728	String mappedStr = NormalizerImpl.convert(str);
90ce3da70b43 Initial load duke parents: diff changeset	729	dest = new char[mappedStr.length()*MAX_BUF_SIZE_DECOMPOSE];
90ce3da70b43 Initial load duke parents: diff changeset	730
90ce3da70b43 Initial load duke parents: diff changeset	731	for(;;) {
90ce3da70b43 Initial load duke parents: diff changeset	732	destSize=NormalizerImpl.decompose(mappedStr.toCharArray(),0,mappedStr.length(),
90ce3da70b43 Initial load duke parents: diff changeset	733	dest,0,dest.length,
90ce3da70b43 Initial load duke parents: diff changeset	734	compat,trailCC, nx);
90ce3da70b43 Initial load duke parents: diff changeset	735	if(destSize<=dest.length) {
90ce3da70b43 Initial load duke parents: diff changeset	736	return new String(dest,0,destSize);
90ce3da70b43 Initial load duke parents: diff changeset	737	} else {
90ce3da70b43 Initial load duke parents: diff changeset	738	dest = new char[destSize];
90ce3da70b43 Initial load duke parents: diff changeset	739	}
90ce3da70b43 Initial load duke parents: diff changeset	740	}
90ce3da70b43 Initial load duke parents: diff changeset	741	} else {
90ce3da70b43 Initial load duke parents: diff changeset	742	dest = new char[str.length()*MAX_BUF_SIZE_DECOMPOSE];
90ce3da70b43 Initial load duke parents: diff changeset	743
90ce3da70b43 Initial load duke parents: diff changeset	744	for(;;) {
90ce3da70b43 Initial load duke parents: diff changeset	745	destSize=NormalizerImpl.decompose(str.toCharArray(),0,str.length(),
90ce3da70b43 Initial load duke parents: diff changeset	746	dest,0,dest.length,
90ce3da70b43 Initial load duke parents: diff changeset	747	compat,trailCC, nx);
90ce3da70b43 Initial load duke parents: diff changeset	748	if(destSize<=dest.length) {
90ce3da70b43 Initial load duke parents: diff changeset	749	return new String(dest,0,destSize);
90ce3da70b43 Initial load duke parents: diff changeset	750	} else {
90ce3da70b43 Initial load duke parents: diff changeset	751	dest = new char[destSize];
90ce3da70b43 Initial load duke parents: diff changeset	752	}
90ce3da70b43 Initial load duke parents: diff changeset	753	}
90ce3da70b43 Initial load duke parents: diff changeset	754	}
90ce3da70b43 Initial load duke parents: diff changeset	755	}
90ce3da70b43 Initial load duke parents: diff changeset	756
90ce3da70b43 Initial load duke parents: diff changeset	757	/**
90ce3da70b43 Initial load duke parents: diff changeset	758	* Normalize a string.
90ce3da70b43 Initial load duke parents: diff changeset	759	* The string will be normalized according the the specified normalization
90ce3da70b43 Initial load duke parents: diff changeset	760	* mode and options.
90ce3da70b43 Initial load duke parents: diff changeset	761	* @param src The char array to compose.
90ce3da70b43 Initial load duke parents: diff changeset	762	* @param srcStart Start index of the source
90ce3da70b43 Initial load duke parents: diff changeset	763	* @param srcLimit Limit index of the source
90ce3da70b43 Initial load duke parents: diff changeset	764	* @param dest The char buffer to fill in
90ce3da70b43 Initial load duke parents: diff changeset	765	* @param destStart Start index of the destination buffer
90ce3da70b43 Initial load duke parents: diff changeset	766	* @param destLimit End index of the destination buffer
90ce3da70b43 Initial load duke parents: diff changeset	767	* @param mode The normalization mode; one of Normalizer.NONE,
90ce3da70b43 Initial load duke parents: diff changeset	768	* Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
90ce3da70b43 Initial load duke parents: diff changeset	769	* Normalizer.NFKD, Normalizer.DEFAULT
90ce3da70b43 Initial load duke parents: diff changeset	770	* @param options The normalization options, ORed together (0 for no options).
90ce3da70b43 Initial load duke parents: diff changeset	771	* @return int The total buffer size needed;if greater than length of
90ce3da70b43 Initial load duke parents: diff changeset	772	* result, the output was truncated.
90ce3da70b43 Initial load duke parents: diff changeset	773	* @exception IndexOutOfBoundsException if the target capacity is
90ce3da70b43 Initial load duke parents: diff changeset	774	* less than the required length
90ce3da70b43 Initial load duke parents: diff changeset	775	* @stable ICU 2.6
90ce3da70b43 Initial load duke parents: diff changeset	776	*/
90ce3da70b43 Initial load duke parents: diff changeset	777	public static int normalize(char[] src,int srcStart, int srcLimit,
90ce3da70b43 Initial load duke parents: diff changeset	778	char[] dest,int destStart, int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	779	Mode mode, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	780	int length = mode.normalize(src,srcStart,srcLimit,dest,destStart,destLimit, options);
90ce3da70b43 Initial load duke parents: diff changeset	781
90ce3da70b43 Initial load duke parents: diff changeset	782	if(length<=(destLimit-destStart)) {
90ce3da70b43 Initial load duke parents: diff changeset	783	return length;
90ce3da70b43 Initial load duke parents: diff changeset	784	} else {
90ce3da70b43 Initial load duke parents: diff changeset	785	throw new IndexOutOfBoundsException(Integer.toString(length));
90ce3da70b43 Initial load duke parents: diff changeset	786	}
90ce3da70b43 Initial load duke parents: diff changeset	787	}
90ce3da70b43 Initial load duke parents: diff changeset	788
90ce3da70b43 Initial load duke parents: diff changeset	789	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	790	// Iteration API
90ce3da70b43 Initial load duke parents: diff changeset	791	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	792
90ce3da70b43 Initial load duke parents: diff changeset	793	/**
90ce3da70b43 Initial load duke parents: diff changeset	794	* Return the current character in the normalized text->
90ce3da70b43 Initial load duke parents: diff changeset	795	* @return The codepoint as an int
90ce3da70b43 Initial load duke parents: diff changeset	796	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	797	*/
90ce3da70b43 Initial load duke parents: diff changeset	798	public int current() {
90ce3da70b43 Initial load duke parents: diff changeset	799	if(bufferPos<bufferLimit \|\| nextNormalize()) {
90ce3da70b43 Initial load duke parents: diff changeset	800	return getCodePointAt(bufferPos);
90ce3da70b43 Initial load duke parents: diff changeset	801	} else {
90ce3da70b43 Initial load duke parents: diff changeset	802	return DONE;
90ce3da70b43 Initial load duke parents: diff changeset	803	}
90ce3da70b43 Initial load duke parents: diff changeset	804	}
90ce3da70b43 Initial load duke parents: diff changeset	805
90ce3da70b43 Initial load duke parents: diff changeset	806	/**
90ce3da70b43 Initial load duke parents: diff changeset	807	* Return the next character in the normalized text and advance
90ce3da70b43 Initial load duke parents: diff changeset	808	* the iteration position by one. If the end
90ce3da70b43 Initial load duke parents: diff changeset	809	* of the text has already been reached, {@link #DONE} is returned.
90ce3da70b43 Initial load duke parents: diff changeset	810	* @return The codepoint as an int
90ce3da70b43 Initial load duke parents: diff changeset	811	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	812	*/
90ce3da70b43 Initial load duke parents: diff changeset	813	public int next() {
90ce3da70b43 Initial load duke parents: diff changeset	814	if(bufferPos<bufferLimit \|\| nextNormalize()) {
90ce3da70b43 Initial load duke parents: diff changeset	815	int c=getCodePointAt(bufferPos);
90ce3da70b43 Initial load duke parents: diff changeset	816	bufferPos+=(c>0xFFFF) ? 2 : 1;
90ce3da70b43 Initial load duke parents: diff changeset	817	return c;
90ce3da70b43 Initial load duke parents: diff changeset	818	} else {
90ce3da70b43 Initial load duke parents: diff changeset	819	return DONE;
90ce3da70b43 Initial load duke parents: diff changeset	820	}
90ce3da70b43 Initial load duke parents: diff changeset	821	}
90ce3da70b43 Initial load duke parents: diff changeset	822
90ce3da70b43 Initial load duke parents: diff changeset	823
90ce3da70b43 Initial load duke parents: diff changeset	824	/**
90ce3da70b43 Initial load duke parents: diff changeset	825	* Return the previous character in the normalized text and decrement
90ce3da70b43 Initial load duke parents: diff changeset	826	* the iteration position by one. If the beginning
90ce3da70b43 Initial load duke parents: diff changeset	827	* of the text has already been reached, {@link #DONE} is returned.
90ce3da70b43 Initial load duke parents: diff changeset	828	* @return The codepoint as an int
90ce3da70b43 Initial load duke parents: diff changeset	829	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	830	*/
90ce3da70b43 Initial load duke parents: diff changeset	831	public int previous() {
90ce3da70b43 Initial load duke parents: diff changeset	832	if(bufferPos>0 \|\| previousNormalize()) {
90ce3da70b43 Initial load duke parents: diff changeset	833	int c=getCodePointAt(bufferPos-1);
90ce3da70b43 Initial load duke parents: diff changeset	834	bufferPos-=(c>0xFFFF) ? 2 : 1;
90ce3da70b43 Initial load duke parents: diff changeset	835	return c;
90ce3da70b43 Initial load duke parents: diff changeset	836	} else {
90ce3da70b43 Initial load duke parents: diff changeset	837	return DONE;
90ce3da70b43 Initial load duke parents: diff changeset	838	}
90ce3da70b43 Initial load duke parents: diff changeset	839	}
90ce3da70b43 Initial load duke parents: diff changeset	840
90ce3da70b43 Initial load duke parents: diff changeset	841	/**
90ce3da70b43 Initial load duke parents: diff changeset	842	* Reset the index to the beginning of the text.
90ce3da70b43 Initial load duke parents: diff changeset	843	* This is equivalent to setIndexOnly(startIndex)).
90ce3da70b43 Initial load duke parents: diff changeset	844	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	845	*/
90ce3da70b43 Initial load duke parents: diff changeset	846	public void reset() {
90ce3da70b43 Initial load duke parents: diff changeset	847	text.setIndex(0);
90ce3da70b43 Initial load duke parents: diff changeset	848	currentIndex=nextIndex=0;
90ce3da70b43 Initial load duke parents: diff changeset	849	clearBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	850	}
90ce3da70b43 Initial load duke parents: diff changeset	851
90ce3da70b43 Initial load duke parents: diff changeset	852	/**
90ce3da70b43 Initial load duke parents: diff changeset	853	* Set the iteration position in the input text that is being normalized,
90ce3da70b43 Initial load duke parents: diff changeset	854	* without any immediate normalization.
90ce3da70b43 Initial load duke parents: diff changeset	855	* After setIndexOnly(), getIndex() will return the same index that is
90ce3da70b43 Initial load duke parents: diff changeset	856	* specified here.
90ce3da70b43 Initial load duke parents: diff changeset	857	*
90ce3da70b43 Initial load duke parents: diff changeset	858	* @param index the desired index in the input text.
90ce3da70b43 Initial load duke parents: diff changeset	859	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	860	*/
90ce3da70b43 Initial load duke parents: diff changeset	861	public void setIndexOnly(int index) {
90ce3da70b43 Initial load duke parents: diff changeset	862	text.setIndex(index);
90ce3da70b43 Initial load duke parents: diff changeset	863	currentIndex=nextIndex=index; // validates index
90ce3da70b43 Initial load duke parents: diff changeset	864	clearBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	865	}
90ce3da70b43 Initial load duke parents: diff changeset	866
90ce3da70b43 Initial load duke parents: diff changeset	867	/**
90ce3da70b43 Initial load duke parents: diff changeset	868	* Set the iteration position in the input text that is being normalized
90ce3da70b43 Initial load duke parents: diff changeset	869	* and return the first normalized character at that position.
90ce3da70b43 Initial load duke parents: diff changeset	870	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	871	* <b>Note:</b> This method sets the position in the <em>input</em> text,
90ce3da70b43 Initial load duke parents: diff changeset	872	* while {@link #next} and {@link #previous} iterate through characters
90ce3da70b43 Initial load duke parents: diff changeset	873	* in the normalized <em>output</em>. This means that there is not
90ce3da70b43 Initial load duke parents: diff changeset	874	* necessarily a one-to-one correspondence between characters returned
90ce3da70b43 Initial load duke parents: diff changeset	875	* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
90ce3da70b43 Initial load duke parents: diff changeset	876	* returned from <tt>setIndex</tt> and {@link #getIndex}.
90ce3da70b43 Initial load duke parents: diff changeset	877	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	878	* @param index the desired index in the input text->
90ce3da70b43 Initial load duke parents: diff changeset	879	*
90ce3da70b43 Initial load duke parents: diff changeset	880	* @return the first normalized character that is the result of iterating
90ce3da70b43 Initial load duke parents: diff changeset	881	* forward starting at the given index.
90ce3da70b43 Initial load duke parents: diff changeset	882	*
90ce3da70b43 Initial load duke parents: diff changeset	883	* @throws IllegalArgumentException if the given index is less than
90ce3da70b43 Initial load duke parents: diff changeset	884	* {@link #getBeginIndex} or greater than {@link #getEndIndex}.
90ce3da70b43 Initial load duke parents: diff changeset	885	* @return The codepoint as an int
90ce3da70b43 Initial load duke parents: diff changeset	886	* @deprecated ICU 3.2
90ce3da70b43 Initial load duke parents: diff changeset	887	* @obsolete ICU 3.2
90ce3da70b43 Initial load duke parents: diff changeset	888	*/
90ce3da70b43 Initial load duke parents: diff changeset	889	public int setIndex(int index) {
90ce3da70b43 Initial load duke parents: diff changeset	890	setIndexOnly(index);
90ce3da70b43 Initial load duke parents: diff changeset	891	return current();
90ce3da70b43 Initial load duke parents: diff changeset	892	}
90ce3da70b43 Initial load duke parents: diff changeset	893
90ce3da70b43 Initial load duke parents: diff changeset	894	/**
90ce3da70b43 Initial load duke parents: diff changeset	895	* Retrieve the index of the start of the input text. This is the begin
90ce3da70b43 Initial load duke parents: diff changeset	896	* index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
90ce3da70b43 Initial load duke parents: diff changeset	897	* <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
90ce3da70b43 Initial load duke parents: diff changeset	898	* @deprecated ICU 2.2. Use startIndex() instead.
90ce3da70b43 Initial load duke parents: diff changeset	899	* @return The codepoint as an int
90ce3da70b43 Initial load duke parents: diff changeset	900	* @see #startIndex
90ce3da70b43 Initial load duke parents: diff changeset	901	*/
90ce3da70b43 Initial load duke parents: diff changeset	902	public int getBeginIndex() {
90ce3da70b43 Initial load duke parents: diff changeset	903	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	904	}
90ce3da70b43 Initial load duke parents: diff changeset	905
90ce3da70b43 Initial load duke parents: diff changeset	906	/**
90ce3da70b43 Initial load duke parents: diff changeset	907	* Retrieve the index of the end of the input text. This is the end index
90ce3da70b43 Initial load duke parents: diff changeset	908	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
90ce3da70b43 Initial load duke parents: diff changeset	909	* over which this <tt>Normalizer</tt> is iterating
90ce3da70b43 Initial load duke parents: diff changeset	910	* @deprecated ICU 2.2. Use endIndex() instead.
90ce3da70b43 Initial load duke parents: diff changeset	911	* @return The codepoint as an int
90ce3da70b43 Initial load duke parents: diff changeset	912	* @see #endIndex
90ce3da70b43 Initial load duke parents: diff changeset	913	*/
90ce3da70b43 Initial load duke parents: diff changeset	914	public int getEndIndex() {
90ce3da70b43 Initial load duke parents: diff changeset	915	return endIndex();
90ce3da70b43 Initial load duke parents: diff changeset	916	}
90ce3da70b43 Initial load duke parents: diff changeset	917
90ce3da70b43 Initial load duke parents: diff changeset	918	/**
90ce3da70b43 Initial load duke parents: diff changeset	919	* Retrieve the current iteration position in the input text that is
90ce3da70b43 Initial load duke parents: diff changeset	920	* being normalized. This method is useful in applications such as
90ce3da70b43 Initial load duke parents: diff changeset	921	* searching, where you need to be able to determine the position in
90ce3da70b43 Initial load duke parents: diff changeset	922	* the input text that corresponds to a given normalized output character.
90ce3da70b43 Initial load duke parents: diff changeset	923	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	924	* <b>Note:</b> This method sets the position in the <em>input</em>, while
90ce3da70b43 Initial load duke parents: diff changeset	925	* {@link #next} and {@link #previous} iterate through characters in the
90ce3da70b43 Initial load duke parents: diff changeset	926	* <em>output</em>. This means that there is not necessarily a one-to-one
90ce3da70b43 Initial load duke parents: diff changeset	927	* correspondence between characters returned by <tt>next</tt> and
90ce3da70b43 Initial load duke parents: diff changeset	928	* <tt>previous</tt> and the indices passed to and returned from
90ce3da70b43 Initial load duke parents: diff changeset	929	* <tt>setIndex</tt> and {@link #getIndex}.
90ce3da70b43 Initial load duke parents: diff changeset	930	* @return The current iteration position
90ce3da70b43 Initial load duke parents: diff changeset	931	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	932	*/
90ce3da70b43 Initial load duke parents: diff changeset	933	public int getIndex() {
90ce3da70b43 Initial load duke parents: diff changeset	934	if(bufferPos<bufferLimit) {
90ce3da70b43 Initial load duke parents: diff changeset	935	return currentIndex;
90ce3da70b43 Initial load duke parents: diff changeset	936	} else {
90ce3da70b43 Initial load duke parents: diff changeset	937	return nextIndex;
90ce3da70b43 Initial load duke parents: diff changeset	938	}
90ce3da70b43 Initial load duke parents: diff changeset	939	}
90ce3da70b43 Initial load duke parents: diff changeset	940
90ce3da70b43 Initial load duke parents: diff changeset	941	/**
90ce3da70b43 Initial load duke parents: diff changeset	942	* Retrieve the index of the end of the input text-> This is the end index
90ce3da70b43 Initial load duke parents: diff changeset	943	* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
90ce3da70b43 Initial load duke parents: diff changeset	944	* over which this <tt>Normalizer</tt> is iterating
90ce3da70b43 Initial load duke parents: diff changeset	945	* @return The current iteration position
90ce3da70b43 Initial load duke parents: diff changeset	946	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	947	*/
90ce3da70b43 Initial load duke parents: diff changeset	948	public int endIndex() {
90ce3da70b43 Initial load duke parents: diff changeset	949	return text.getLength();
90ce3da70b43 Initial load duke parents: diff changeset	950	}
90ce3da70b43 Initial load duke parents: diff changeset	951
90ce3da70b43 Initial load duke parents: diff changeset	952	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	953	// Property access methods
90ce3da70b43 Initial load duke parents: diff changeset	954	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	955	/**
90ce3da70b43 Initial load duke parents: diff changeset	956	* Set the normalization mode for this object.
90ce3da70b43 Initial load duke parents: diff changeset	957	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	958	* <b>Note:</b>If the normalization mode is changed while iterating
90ce3da70b43 Initial load duke parents: diff changeset	959	* over a string, calls to {@link #next} and {@link #previous} may
90ce3da70b43 Initial load duke parents: diff changeset	960	* return previously buffers characters in the old normalization mode
90ce3da70b43 Initial load duke parents: diff changeset	961	* until the iteration is able to re-sync at the next base character.
90ce3da70b43 Initial load duke parents: diff changeset	962	* It is safest to call {@link #setText setText()}, {@link #first},
90ce3da70b43 Initial load duke parents: diff changeset	963	* {@link #last}, etc. after calling <tt>setMode</tt>.
90ce3da70b43 Initial load duke parents: diff changeset	964	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	965	* @param newMode the new mode for this <tt>Normalizer</tt>.
90ce3da70b43 Initial load duke parents: diff changeset	966	* The supported modes are:
90ce3da70b43 Initial load duke parents: diff changeset	967	* <ul>
90ce3da70b43 Initial load duke parents: diff changeset	968	* <li>{@link #COMPOSE} - Unicode canonical decompositiion
90ce3da70b43 Initial load duke parents: diff changeset	969	* followed by canonical composition.
90ce3da70b43 Initial load duke parents: diff changeset	970	* <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
90ce3da70b43 Initial load duke parents: diff changeset	971	* follwed by canonical composition.
90ce3da70b43 Initial load duke parents: diff changeset	972	* <li>{@link #DECOMP} - Unicode canonical decomposition
90ce3da70b43 Initial load duke parents: diff changeset	973	* <li>{@link #DECOMP_COMPAT} - Unicode compatibility decomposition.
90ce3da70b43 Initial load duke parents: diff changeset	974	* <li>{@link #NO_OP} - Do nothing but return characters
90ce3da70b43 Initial load duke parents: diff changeset	975	* from the underlying input text.
90ce3da70b43 Initial load duke parents: diff changeset	976	* </ul>
90ce3da70b43 Initial load duke parents: diff changeset	977	*
90ce3da70b43 Initial load duke parents: diff changeset	978	* @see #getMode
90ce3da70b43 Initial load duke parents: diff changeset	979	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	980	*/
90ce3da70b43 Initial load duke parents: diff changeset	981	public void setMode(Mode newMode) {
90ce3da70b43 Initial load duke parents: diff changeset	982	mode = newMode;
90ce3da70b43 Initial load duke parents: diff changeset	983	}
90ce3da70b43 Initial load duke parents: diff changeset	984	/**
90ce3da70b43 Initial load duke parents: diff changeset	985	* Return the basic operation performed by this <tt>Normalizer</tt>
90ce3da70b43 Initial load duke parents: diff changeset	986	*
90ce3da70b43 Initial load duke parents: diff changeset	987	* @see #setMode
90ce3da70b43 Initial load duke parents: diff changeset	988	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	989	*/
90ce3da70b43 Initial load duke parents: diff changeset	990	public Mode getMode() {
90ce3da70b43 Initial load duke parents: diff changeset	991	return mode;
90ce3da70b43 Initial load duke parents: diff changeset	992	}
90ce3da70b43 Initial load duke parents: diff changeset	993
90ce3da70b43 Initial load duke parents: diff changeset	994	/**
90ce3da70b43 Initial load duke parents: diff changeset	995	* Set the input text over which this <tt>Normalizer</tt> will iterate.
90ce3da70b43 Initial load duke parents: diff changeset	996	* The iteration position is set to the beginning of the input text->
90ce3da70b43 Initial load duke parents: diff changeset	997	* @param newText The new string to be normalized.
90ce3da70b43 Initial load duke parents: diff changeset	998	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	999	*/
90ce3da70b43 Initial load duke parents: diff changeset	1000	public void setText(String newText) {
90ce3da70b43 Initial load duke parents: diff changeset	1001
90ce3da70b43 Initial load duke parents: diff changeset	1002	UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
90ce3da70b43 Initial load duke parents: diff changeset	1003	if (newIter == null) {
90ce3da70b43 Initial load duke parents: diff changeset	1004	throw new InternalError("Could not create a new UCharacterIterator");
90ce3da70b43 Initial load duke parents: diff changeset	1005	}
90ce3da70b43 Initial load duke parents: diff changeset	1006	text = newIter;
90ce3da70b43 Initial load duke parents: diff changeset	1007	reset();
90ce3da70b43 Initial load duke parents: diff changeset	1008	}
90ce3da70b43 Initial load duke parents: diff changeset	1009
90ce3da70b43 Initial load duke parents: diff changeset	1010	/**
90ce3da70b43 Initial load duke parents: diff changeset	1011	* Set the input text over which this <tt>Normalizer</tt> will iterate.
90ce3da70b43 Initial load duke parents: diff changeset	1012	* The iteration position is set to the beginning of the input text->
90ce3da70b43 Initial load duke parents: diff changeset	1013	* @param newText The new string to be normalized.
90ce3da70b43 Initial load duke parents: diff changeset	1014	* @stable ICU 2.8
90ce3da70b43 Initial load duke parents: diff changeset	1015	*/
90ce3da70b43 Initial load duke parents: diff changeset	1016	public void setText(CharacterIterator newText) {
90ce3da70b43 Initial load duke parents: diff changeset	1017
90ce3da70b43 Initial load duke parents: diff changeset	1018	UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
90ce3da70b43 Initial load duke parents: diff changeset	1019	if (newIter == null) {
90ce3da70b43 Initial load duke parents: diff changeset	1020	throw new InternalError("Could not create a new UCharacterIterator");
90ce3da70b43 Initial load duke parents: diff changeset	1021	}
90ce3da70b43 Initial load duke parents: diff changeset	1022	text = newIter;
90ce3da70b43 Initial load duke parents: diff changeset	1023	currentIndex=nextIndex=0;
90ce3da70b43 Initial load duke parents: diff changeset	1024	clearBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	1025	}
90ce3da70b43 Initial load duke parents: diff changeset	1026
90ce3da70b43 Initial load duke parents: diff changeset	1027	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	1028	// Private utility methods
90ce3da70b43 Initial load duke parents: diff changeset	1029	//-------------------------------------------------------------------------
90ce3da70b43 Initial load duke parents: diff changeset	1030
90ce3da70b43 Initial load duke parents: diff changeset	1031
90ce3da70b43 Initial load duke parents: diff changeset	1032	/* backward iteration --------------------------------------------------- */
90ce3da70b43 Initial load duke parents: diff changeset	1033
90ce3da70b43 Initial load duke parents: diff changeset	1034	/*
90ce3da70b43 Initial load duke parents: diff changeset	1035	* read backwards and get norm32
90ce3da70b43 Initial load duke parents: diff changeset	1036	* return 0 if the character is <minC
90ce3da70b43 Initial load duke parents: diff changeset	1037	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
90ce3da70b43 Initial load duke parents: diff changeset	1038	* surrogate but read second!)
90ce3da70b43 Initial load duke parents: diff changeset	1039	*/
90ce3da70b43 Initial load duke parents: diff changeset	1040
90ce3da70b43 Initial load duke parents: diff changeset	1041	private static long getPrevNorm32(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1042	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1043	int/unsigned/ mask,
90ce3da70b43 Initial load duke parents: diff changeset	1044	char[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	1045	long norm32;
90ce3da70b43 Initial load duke parents: diff changeset	1046	int ch=0;
90ce3da70b43 Initial load duke parents: diff changeset	1047	/* need src.hasPrevious() */
90ce3da70b43 Initial load duke parents: diff changeset	1048	if((ch=src.previous()) == UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1049	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1050	}
90ce3da70b43 Initial load duke parents: diff changeset	1051	chars[0]=(char)ch;
90ce3da70b43 Initial load duke parents: diff changeset	1052	chars[1]=0;
90ce3da70b43 Initial load duke parents: diff changeset	1053
90ce3da70b43 Initial load duke parents: diff changeset	1054	/* check for a surrogate before getting norm32 to see if we need to
90ce3da70b43 Initial load duke parents: diff changeset	1055	* predecrement further */
90ce3da70b43 Initial load duke parents: diff changeset	1056	if(chars[0]<minC) {
90ce3da70b43 Initial load duke parents: diff changeset	1057	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1058	} else if(!UTF16.isSurrogate(chars[0])) {
90ce3da70b43 Initial load duke parents: diff changeset	1059	return NormalizerImpl.getNorm32(chars[0]);
90ce3da70b43 Initial load duke parents: diff changeset	1060	} else if(UTF16.isLeadSurrogate(chars[0]) \|\| (src.getIndex()==0)) {
90ce3da70b43 Initial load duke parents: diff changeset	1061	/* unpaired surrogate */
90ce3da70b43 Initial load duke parents: diff changeset	1062	chars[1]=(char)src.current();
90ce3da70b43 Initial load duke parents: diff changeset	1063	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1064	} else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
90ce3da70b43 Initial load duke parents: diff changeset	1065	norm32=NormalizerImpl.getNorm32(chars[1]);
90ce3da70b43 Initial load duke parents: diff changeset	1066	if((norm32&mask)==0) {
90ce3da70b43 Initial load duke parents: diff changeset	1067	/* all surrogate pairs with this lead surrogate have irrelevant
90ce3da70b43 Initial load duke parents: diff changeset	1068	* data */
90ce3da70b43 Initial load duke parents: diff changeset	1069	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1070	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1071	/* norm32 must be a surrogate special */
90ce3da70b43 Initial load duke parents: diff changeset	1072	return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
90ce3da70b43 Initial load duke parents: diff changeset	1073	}
90ce3da70b43 Initial load duke parents: diff changeset	1074	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1075	/* unpaired second surrogate, undo the c2=src.previous() movement */
90ce3da70b43 Initial load duke parents: diff changeset	1076	src.moveIndex( 1);
90ce3da70b43 Initial load duke parents: diff changeset	1077	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1078	}
90ce3da70b43 Initial load duke parents: diff changeset	1079	}
90ce3da70b43 Initial load duke parents: diff changeset	1080
90ce3da70b43 Initial load duke parents: diff changeset	1081	private interface IsPrevBoundary{
90ce3da70b43 Initial load duke parents: diff changeset	1082	public boolean isPrevBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1083	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1084	int/unsigned/ mask,
90ce3da70b43 Initial load duke parents: diff changeset	1085	char[] chars);
90ce3da70b43 Initial load duke parents: diff changeset	1086	}
90ce3da70b43 Initial load duke parents: diff changeset	1087	private static final class IsPrevNFDSafe implements IsPrevBoundary{
90ce3da70b43 Initial load duke parents: diff changeset	1088	/*
90ce3da70b43 Initial load duke parents: diff changeset	1089	* for NF*D:
90ce3da70b43 Initial load duke parents: diff changeset	1090	* read backwards and check if the lead combining class is 0
90ce3da70b43 Initial load duke parents: diff changeset	1091	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
90ce3da70b43 Initial load duke parents: diff changeset	1092	* surrogate but read second!)
90ce3da70b43 Initial load duke parents: diff changeset	1093	*/
90ce3da70b43 Initial load duke parents: diff changeset	1094	public boolean isPrevBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1095	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1096	int/unsigned/ ccOrQCMask,
90ce3da70b43 Initial load duke parents: diff changeset	1097	char[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	1098
90ce3da70b43 Initial load duke parents: diff changeset	1099	return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
90ce3da70b43 Initial load duke parents: diff changeset	1100	ccOrQCMask, chars),
90ce3da70b43 Initial load duke parents: diff changeset	1101	ccOrQCMask,
90ce3da70b43 Initial load duke parents: diff changeset	1102	ccOrQCMask& NormalizerImpl.QC_MASK);
90ce3da70b43 Initial load duke parents: diff changeset	1103	}
90ce3da70b43 Initial load duke parents: diff changeset	1104	}
90ce3da70b43 Initial load duke parents: diff changeset	1105
90ce3da70b43 Initial load duke parents: diff changeset	1106	private static final class IsPrevTrueStarter implements IsPrevBoundary{
90ce3da70b43 Initial load duke parents: diff changeset	1107	/*
90ce3da70b43 Initial load duke parents: diff changeset	1108	* read backwards and check if the character is (or its decomposition
90ce3da70b43 Initial load duke parents: diff changeset	1109	* begins with) a "true starter" (cc==0 and NF*C_YES)
90ce3da70b43 Initial load duke parents: diff changeset	1110	* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
90ce3da70b43 Initial load duke parents: diff changeset	1111	* surrogate but read second!)
90ce3da70b43 Initial load duke parents: diff changeset	1112	*/
90ce3da70b43 Initial load duke parents: diff changeset	1113	public boolean isPrevBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1114	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1115	int/unsigned/ ccOrQCMask,
90ce3da70b43 Initial load duke parents: diff changeset	1116	char[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	1117	long norm32;
90ce3da70b43 Initial load duke parents: diff changeset	1118	int/unsigned/ decompQCMask;
90ce3da70b43 Initial load duke parents: diff changeset	1119
90ce3da70b43 Initial load duke parents: diff changeset	1120	decompQCMask=(ccOrQCMask<<2)&0xf; /decomposition quick check mask/
90ce3da70b43 Initial load duke parents: diff changeset	1121	norm32=getPrevNorm32(src, minC, ccOrQCMask\|decompQCMask, chars);
90ce3da70b43 Initial load duke parents: diff changeset	1122	return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
90ce3da70b43 Initial load duke parents: diff changeset	1123	}
90ce3da70b43 Initial load duke parents: diff changeset	1124	}
90ce3da70b43 Initial load duke parents: diff changeset	1125
90ce3da70b43 Initial load duke parents: diff changeset	1126	private static int findPreviousIterationBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1127	IsPrevBoundary obj,
90ce3da70b43 Initial load duke parents: diff changeset	1128	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1129	int/mask/ mask,
90ce3da70b43 Initial load duke parents: diff changeset	1130	char[] buffer,
90ce3da70b43 Initial load duke parents: diff changeset	1131	int[] startIndex) {
90ce3da70b43 Initial load duke parents: diff changeset	1132	char[] chars=new char[2];
90ce3da70b43 Initial load duke parents: diff changeset	1133	boolean isBoundary;
90ce3da70b43 Initial load duke parents: diff changeset	1134
90ce3da70b43 Initial load duke parents: diff changeset	1135	/* fill the buffer from the end backwards */
90ce3da70b43 Initial load duke parents: diff changeset	1136	startIndex[0] = buffer.length;
90ce3da70b43 Initial load duke parents: diff changeset	1137	chars[0]=0;
90ce3da70b43 Initial load duke parents: diff changeset	1138	while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1139	isBoundary=obj.isPrevBoundary(src, minC, mask, chars);
90ce3da70b43 Initial load duke parents: diff changeset	1140
90ce3da70b43 Initial load duke parents: diff changeset	1141	/* always write this character to the front of the buffer */
90ce3da70b43 Initial load duke parents: diff changeset	1142	/* make sure there is enough space in the buffer */
90ce3da70b43 Initial load duke parents: diff changeset	1143	if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {
90ce3da70b43 Initial load duke parents: diff changeset	1144
90ce3da70b43 Initial load duke parents: diff changeset	1145	// grow the buffer
90ce3da70b43 Initial load duke parents: diff changeset	1146	char[] newBuf = new char[buffer.length*2];
90ce3da70b43 Initial load duke parents: diff changeset	1147	/* move the current buffer contents up */
90ce3da70b43 Initial load duke parents: diff changeset	1148	System.arraycopy(buffer,startIndex[0],newBuf,
90ce3da70b43 Initial load duke parents: diff changeset	1149	newBuf.length-(buffer.length-startIndex[0]),
90ce3da70b43 Initial load duke parents: diff changeset	1150	buffer.length-startIndex[0]);
90ce3da70b43 Initial load duke parents: diff changeset	1151	//adjust the startIndex
90ce3da70b43 Initial load duke parents: diff changeset	1152	startIndex[0]+=newBuf.length-buffer.length;
90ce3da70b43 Initial load duke parents: diff changeset	1153
90ce3da70b43 Initial load duke parents: diff changeset	1154	buffer=newBuf;
90ce3da70b43 Initial load duke parents: diff changeset	1155	newBuf=null;
90ce3da70b43 Initial load duke parents: diff changeset	1156
90ce3da70b43 Initial load duke parents: diff changeset	1157	}
90ce3da70b43 Initial load duke parents: diff changeset	1158
90ce3da70b43 Initial load duke parents: diff changeset	1159	buffer[--startIndex[0]]=chars[0];
90ce3da70b43 Initial load duke parents: diff changeset	1160	if(chars[1]!=0) {
90ce3da70b43 Initial load duke parents: diff changeset	1161	buffer[--startIndex[0]]=chars[1];
90ce3da70b43 Initial load duke parents: diff changeset	1162	}
90ce3da70b43 Initial load duke parents: diff changeset	1163
90ce3da70b43 Initial load duke parents: diff changeset	1164	/* stop if this just-copied character is a boundary */
90ce3da70b43 Initial load duke parents: diff changeset	1165	if(isBoundary) {
90ce3da70b43 Initial load duke parents: diff changeset	1166	break;
90ce3da70b43 Initial load duke parents: diff changeset	1167	}
90ce3da70b43 Initial load duke parents: diff changeset	1168	}
90ce3da70b43 Initial load duke parents: diff changeset	1169
90ce3da70b43 Initial load duke parents: diff changeset	1170	/* return the length of the buffer contents */
90ce3da70b43 Initial load duke parents: diff changeset	1171	return buffer.length-startIndex[0];
90ce3da70b43 Initial load duke parents: diff changeset	1172	}
90ce3da70b43 Initial load duke parents: diff changeset	1173
90ce3da70b43 Initial load duke parents: diff changeset	1174	private static int previous(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1175	char[] dest, int destStart, int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	1176	Mode mode,
90ce3da70b43 Initial load duke parents: diff changeset	1177	boolean doNormalize,
90ce3da70b43 Initial load duke parents: diff changeset	1178	boolean[] pNeededToNormalize,
90ce3da70b43 Initial load duke parents: diff changeset	1179	int options) {
90ce3da70b43 Initial load duke parents: diff changeset	1180
90ce3da70b43 Initial load duke parents: diff changeset	1181	IsPrevBoundary isPreviousBoundary;
90ce3da70b43 Initial load duke parents: diff changeset	1182	int destLength, bufferLength;
90ce3da70b43 Initial load duke parents: diff changeset	1183	int/unsigned/ mask;
90ce3da70b43 Initial load duke parents: diff changeset	1184	int c,c2;
90ce3da70b43 Initial load duke parents: diff changeset	1185
90ce3da70b43 Initial load duke parents: diff changeset	1186	char minC;
90ce3da70b43 Initial load duke parents: diff changeset	1187	int destCapacity = destLimit-destStart;
90ce3da70b43 Initial load duke parents: diff changeset	1188	destLength=0;
90ce3da70b43 Initial load duke parents: diff changeset	1189
90ce3da70b43 Initial load duke parents: diff changeset	1190	if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	1191	pNeededToNormalize[0]=false;
90ce3da70b43 Initial load duke parents: diff changeset	1192	}
90ce3da70b43 Initial load duke parents: diff changeset	1193	minC = (char)mode.getMinC();
90ce3da70b43 Initial load duke parents: diff changeset	1194	mask = mode.getMask();
90ce3da70b43 Initial load duke parents: diff changeset	1195	isPreviousBoundary = mode.getPrevBoundary();
90ce3da70b43 Initial load duke parents: diff changeset	1196
90ce3da70b43 Initial load duke parents: diff changeset	1197	if(isPreviousBoundary==null) {
90ce3da70b43 Initial load duke parents: diff changeset	1198	destLength=0;
90ce3da70b43 Initial load duke parents: diff changeset	1199	if((c=src.previous())>=0) {
90ce3da70b43 Initial load duke parents: diff changeset	1200	destLength=1;
90ce3da70b43 Initial load duke parents: diff changeset	1201	if(UTF16.isTrailSurrogate((char)c)) {
90ce3da70b43 Initial load duke parents: diff changeset	1202	c2= src.previous();
90ce3da70b43 Initial load duke parents: diff changeset	1203	if(c2!= UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1204	if(UTF16.isLeadSurrogate((char)c2)) {
90ce3da70b43 Initial load duke parents: diff changeset	1205	if(destCapacity>=2) {
90ce3da70b43 Initial load duke parents: diff changeset	1206	dest[1]=(char)c; // trail surrogate
90ce3da70b43 Initial load duke parents: diff changeset	1207	destLength=2;
90ce3da70b43 Initial load duke parents: diff changeset	1208	}
90ce3da70b43 Initial load duke parents: diff changeset	1209	// lead surrogate to be written below
90ce3da70b43 Initial load duke parents: diff changeset	1210	c=c2;
90ce3da70b43 Initial load duke parents: diff changeset	1211	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1212	src.moveIndex(1);
90ce3da70b43 Initial load duke parents: diff changeset	1213	}
90ce3da70b43 Initial load duke parents: diff changeset	1214	}
90ce3da70b43 Initial load duke parents: diff changeset	1215	}
90ce3da70b43 Initial load duke parents: diff changeset	1216
90ce3da70b43 Initial load duke parents: diff changeset	1217	if(destCapacity>0) {
90ce3da70b43 Initial load duke parents: diff changeset	1218	dest[0]=(char)c;
90ce3da70b43 Initial load duke parents: diff changeset	1219	}
90ce3da70b43 Initial load duke parents: diff changeset	1220	}
90ce3da70b43 Initial load duke parents: diff changeset	1221	return destLength;
90ce3da70b43 Initial load duke parents: diff changeset	1222	}
90ce3da70b43 Initial load duke parents: diff changeset	1223
90ce3da70b43 Initial load duke parents: diff changeset	1224	char[] buffer = new char[100];
90ce3da70b43 Initial load duke parents: diff changeset	1225	int[] startIndex= new int[1];
90ce3da70b43 Initial load duke parents: diff changeset	1226	bufferLength=findPreviousIterationBoundary(src,
90ce3da70b43 Initial load duke parents: diff changeset	1227	isPreviousBoundary,
90ce3da70b43 Initial load duke parents: diff changeset	1228	minC, mask,buffer,
90ce3da70b43 Initial load duke parents: diff changeset	1229	startIndex);
90ce3da70b43 Initial load duke parents: diff changeset	1230	if(bufferLength>0) {
90ce3da70b43 Initial load duke parents: diff changeset	1231	if(doNormalize) {
90ce3da70b43 Initial load duke parents: diff changeset	1232	destLength=NormalizerBase.normalize(buffer,startIndex[0],
90ce3da70b43 Initial load duke parents: diff changeset	1233	startIndex[0]+bufferLength,
90ce3da70b43 Initial load duke parents: diff changeset	1234	dest, destStart,destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	1235	mode, options);
90ce3da70b43 Initial load duke parents: diff changeset	1236
90ce3da70b43 Initial load duke parents: diff changeset	1237	if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	1238	pNeededToNormalize[0]=(boolean)(destLength!=bufferLength \|\|
90ce3da70b43 Initial load duke parents: diff changeset	1239	Utility.arrayRegionMatches(
90ce3da70b43 Initial load duke parents: diff changeset	1240	buffer,0,dest,
90ce3da70b43 Initial load duke parents: diff changeset	1241	destStart,destLimit
90ce3da70b43 Initial load duke parents: diff changeset	1242	));
90ce3da70b43 Initial load duke parents: diff changeset	1243	}
90ce3da70b43 Initial load duke parents: diff changeset	1244	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1245	/* just copy the source characters */
90ce3da70b43 Initial load duke parents: diff changeset	1246	if(destCapacity>0) {
90ce3da70b43 Initial load duke parents: diff changeset	1247	System.arraycopy(buffer,startIndex[0],dest,0,
90ce3da70b43 Initial load duke parents: diff changeset	1248	(bufferLength<destCapacity) ?
90ce3da70b43 Initial load duke parents: diff changeset	1249	bufferLength : destCapacity
90ce3da70b43 Initial load duke parents: diff changeset	1250	);
90ce3da70b43 Initial load duke parents: diff changeset	1251	}
90ce3da70b43 Initial load duke parents: diff changeset	1252	}
90ce3da70b43 Initial load duke parents: diff changeset	1253	}
90ce3da70b43 Initial load duke parents: diff changeset	1254
90ce3da70b43 Initial load duke parents: diff changeset	1255
90ce3da70b43 Initial load duke parents: diff changeset	1256	return destLength;
90ce3da70b43 Initial load duke parents: diff changeset	1257	}
90ce3da70b43 Initial load duke parents: diff changeset	1258
90ce3da70b43 Initial load duke parents: diff changeset	1259
90ce3da70b43 Initial load duke parents: diff changeset	1260
90ce3da70b43 Initial load duke parents: diff changeset	1261	/* forward iteration ---------------------------------------------------- */
90ce3da70b43 Initial load duke parents: diff changeset	1262	/*
90ce3da70b43 Initial load duke parents: diff changeset	1263	* read forward and check if the character is a next-iteration boundary
90ce3da70b43 Initial load duke parents: diff changeset	1264	* if c2!=0 then (c, c2) is a surrogate pair
90ce3da70b43 Initial load duke parents: diff changeset	1265	*/
90ce3da70b43 Initial load duke parents: diff changeset	1266	private interface IsNextBoundary{
90ce3da70b43 Initial load duke parents: diff changeset	1267	boolean isNextBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1268	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1269	int/unsigned/ mask,
90ce3da70b43 Initial load duke parents: diff changeset	1270	int[] chars);
90ce3da70b43 Initial load duke parents: diff changeset	1271	}
90ce3da70b43 Initial load duke parents: diff changeset	1272	/*
90ce3da70b43 Initial load duke parents: diff changeset	1273	* read forward and get norm32
90ce3da70b43 Initial load duke parents: diff changeset	1274	* return 0 if the character is <minC
90ce3da70b43 Initial load duke parents: diff changeset	1275	* if c2!=0 then (c2, c) is a surrogate pair
90ce3da70b43 Initial load duke parents: diff changeset	1276	* always reads complete characters
90ce3da70b43 Initial load duke parents: diff changeset	1277	*/
90ce3da70b43 Initial load duke parents: diff changeset	1278	private static long /unsigned/ getNextNorm32(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1279	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1280	int/unsigned/ mask,
90ce3da70b43 Initial load duke parents: diff changeset	1281	int[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	1282	long norm32;
90ce3da70b43 Initial load duke parents: diff changeset	1283
90ce3da70b43 Initial load duke parents: diff changeset	1284	/* need src.hasNext() to be true */
90ce3da70b43 Initial load duke parents: diff changeset	1285	chars[0]=src.next();
90ce3da70b43 Initial load duke parents: diff changeset	1286	chars[1]=0;
90ce3da70b43 Initial load duke parents: diff changeset	1287
90ce3da70b43 Initial load duke parents: diff changeset	1288	if(chars[0]<minC) {
90ce3da70b43 Initial load duke parents: diff changeset	1289	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1290	}
90ce3da70b43 Initial load duke parents: diff changeset	1291
90ce3da70b43 Initial load duke parents: diff changeset	1292	norm32=NormalizerImpl.getNorm32((char)chars[0]);
90ce3da70b43 Initial load duke parents: diff changeset	1293	if(UTF16.isLeadSurrogate((char)chars[0])) {
90ce3da70b43 Initial load duke parents: diff changeset	1294	if(src.current()!=UCharacterIterator.DONE &&
90ce3da70b43 Initial load duke parents: diff changeset	1295	UTF16.isTrailSurrogate((char)(chars[1]=src.current()))) {
90ce3da70b43 Initial load duke parents: diff changeset	1296	src.moveIndex(1); /* skip the c2 surrogate */
90ce3da70b43 Initial load duke parents: diff changeset	1297	if((norm32&mask)==0) {
90ce3da70b43 Initial load duke parents: diff changeset	1298	/* irrelevant data */
90ce3da70b43 Initial load duke parents: diff changeset	1299	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1300	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1301	/* norm32 must be a surrogate special */
90ce3da70b43 Initial load duke parents: diff changeset	1302	return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
90ce3da70b43 Initial load duke parents: diff changeset	1303	}
90ce3da70b43 Initial load duke parents: diff changeset	1304	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1305	/* unmatched surrogate */
90ce3da70b43 Initial load duke parents: diff changeset	1306	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1307	}
90ce3da70b43 Initial load duke parents: diff changeset	1308	}
90ce3da70b43 Initial load duke parents: diff changeset	1309	return norm32;
90ce3da70b43 Initial load duke parents: diff changeset	1310	}
90ce3da70b43 Initial load duke parents: diff changeset	1311
90ce3da70b43 Initial load duke parents: diff changeset	1312
90ce3da70b43 Initial load duke parents: diff changeset	1313	/*
90ce3da70b43 Initial load duke parents: diff changeset	1314	* for NF*D:
90ce3da70b43 Initial load duke parents: diff changeset	1315	* read forward and check if the lead combining class is 0
90ce3da70b43 Initial load duke parents: diff changeset	1316	* if c2!=0 then (c, c2) is a surrogate pair
90ce3da70b43 Initial load duke parents: diff changeset	1317	*/
90ce3da70b43 Initial load duke parents: diff changeset	1318	private static final class IsNextNFDSafe implements IsNextBoundary{
90ce3da70b43 Initial load duke parents: diff changeset	1319	public boolean isNextBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1320	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1321	int/unsigned/ ccOrQCMask,
90ce3da70b43 Initial load duke parents: diff changeset	1322	int[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	1323	return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
90ce3da70b43 Initial load duke parents: diff changeset	1324	ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
90ce3da70b43 Initial load duke parents: diff changeset	1325	}
90ce3da70b43 Initial load duke parents: diff changeset	1326	}
90ce3da70b43 Initial load duke parents: diff changeset	1327
90ce3da70b43 Initial load duke parents: diff changeset	1328	/*
90ce3da70b43 Initial load duke parents: diff changeset	1329	* for NF*C:
90ce3da70b43 Initial load duke parents: diff changeset	1330	* read forward and check if the character is (or its decomposition begins
90ce3da70b43 Initial load duke parents: diff changeset	1331	* with) a "true starter" (cc==0 and NF*C_YES)
90ce3da70b43 Initial load duke parents: diff changeset	1332	* if c2!=0 then (c, c2) is a surrogate pair
90ce3da70b43 Initial load duke parents: diff changeset	1333	*/
90ce3da70b43 Initial load duke parents: diff changeset	1334	private static final class IsNextTrueStarter implements IsNextBoundary{
90ce3da70b43 Initial load duke parents: diff changeset	1335	public boolean isNextBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1336	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1337	int/unsigned/ ccOrQCMask,
90ce3da70b43 Initial load duke parents: diff changeset	1338	int[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	1339	long norm32;
90ce3da70b43 Initial load duke parents: diff changeset	1340	int/unsigned/ decompQCMask;
90ce3da70b43 Initial load duke parents: diff changeset	1341
90ce3da70b43 Initial load duke parents: diff changeset	1342	decompQCMask=(ccOrQCMask<<2)&0xf; /decomposition quick check mask/
90ce3da70b43 Initial load duke parents: diff changeset	1343	norm32=getNextNorm32(src, minC, ccOrQCMask\|decompQCMask, chars);
90ce3da70b43 Initial load duke parents: diff changeset	1344	return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
90ce3da70b43 Initial load duke parents: diff changeset	1345	}
90ce3da70b43 Initial load duke parents: diff changeset	1346	}
90ce3da70b43 Initial load duke parents: diff changeset	1347
90ce3da70b43 Initial load duke parents: diff changeset	1348	private static int findNextIterationBoundary(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1349	IsNextBoundary obj,
90ce3da70b43 Initial load duke parents: diff changeset	1350	int/unsigned/ minC,
90ce3da70b43 Initial load duke parents: diff changeset	1351	int/unsigned/ mask,
90ce3da70b43 Initial load duke parents: diff changeset	1352	char[] buffer) {
90ce3da70b43 Initial load duke parents: diff changeset	1353	if(src.current()==UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1354	return 0;
90ce3da70b43 Initial load duke parents: diff changeset	1355	}
90ce3da70b43 Initial load duke parents: diff changeset	1356
90ce3da70b43 Initial load duke parents: diff changeset	1357	/* get one character and ignore its properties */
90ce3da70b43 Initial load duke parents: diff changeset	1358	int[] chars = new int[2];
90ce3da70b43 Initial load duke parents: diff changeset	1359	chars[0]=src.next();
90ce3da70b43 Initial load duke parents: diff changeset	1360	buffer[0]=(char)chars[0];
90ce3da70b43 Initial load duke parents: diff changeset	1361	int bufferIndex = 1;
90ce3da70b43 Initial load duke parents: diff changeset	1362
90ce3da70b43 Initial load duke parents: diff changeset	1363	if(UTF16.isLeadSurrogate((char)chars[0])&&
90ce3da70b43 Initial load duke parents: diff changeset	1364	src.current()!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1365	if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))) {
90ce3da70b43 Initial load duke parents: diff changeset	1366	buffer[bufferIndex++]=(char)chars[1];
90ce3da70b43 Initial load duke parents: diff changeset	1367	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1368	src.moveIndex(-1); /* back out the non-trail-surrogate */
90ce3da70b43 Initial load duke parents: diff changeset	1369	}
90ce3da70b43 Initial load duke parents: diff changeset	1370	}
90ce3da70b43 Initial load duke parents: diff changeset	1371
90ce3da70b43 Initial load duke parents: diff changeset	1372	/* get all following characters until we see a boundary */
90ce3da70b43 Initial load duke parents: diff changeset	1373	/* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
90ce3da70b43 Initial load duke parents: diff changeset	1374	* is part of the string */
90ce3da70b43 Initial load duke parents: diff changeset	1375	while( src.current()!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1376	if(obj.isNextBoundary(src, minC, mask, chars)) {
90ce3da70b43 Initial load duke parents: diff changeset	1377	/* back out the latest movement to stop at the boundary */
90ce3da70b43 Initial load duke parents: diff changeset	1378	src.moveIndex(chars[1]==0 ? -1 : -2);
90ce3da70b43 Initial load duke parents: diff changeset	1379	break;
90ce3da70b43 Initial load duke parents: diff changeset	1380	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1381	if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
90ce3da70b43 Initial load duke parents: diff changeset	1382	buffer[bufferIndex++]=(char)chars[0];
90ce3da70b43 Initial load duke parents: diff changeset	1383	if(chars[1]!=0) {
90ce3da70b43 Initial load duke parents: diff changeset	1384	buffer[bufferIndex++]=(char)chars[1];
90ce3da70b43 Initial load duke parents: diff changeset	1385	}
90ce3da70b43 Initial load duke parents: diff changeset	1386	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1387	char[] newBuf = new char[buffer.length*2];
90ce3da70b43 Initial load duke parents: diff changeset	1388	System.arraycopy(buffer,0,newBuf,0,bufferIndex);
90ce3da70b43 Initial load duke parents: diff changeset	1389	buffer = newBuf;
90ce3da70b43 Initial load duke parents: diff changeset	1390	buffer[bufferIndex++]=(char)chars[0];
90ce3da70b43 Initial load duke parents: diff changeset	1391	if(chars[1]!=0) {
90ce3da70b43 Initial load duke parents: diff changeset	1392	buffer[bufferIndex++]=(char)chars[1];
90ce3da70b43 Initial load duke parents: diff changeset	1393	}
90ce3da70b43 Initial load duke parents: diff changeset	1394	}
90ce3da70b43 Initial load duke parents: diff changeset	1395	}
90ce3da70b43 Initial load duke parents: diff changeset	1396	}
90ce3da70b43 Initial load duke parents: diff changeset	1397
90ce3da70b43 Initial load duke parents: diff changeset	1398	/* return the length of the buffer contents */
90ce3da70b43 Initial load duke parents: diff changeset	1399	return bufferIndex;
90ce3da70b43 Initial load duke parents: diff changeset	1400	}
90ce3da70b43 Initial load duke parents: diff changeset	1401
90ce3da70b43 Initial load duke parents: diff changeset	1402	private static int next(UCharacterIterator src,
90ce3da70b43 Initial load duke parents: diff changeset	1403	char[] dest, int destStart, int destLimit,
90ce3da70b43 Initial load duke parents: diff changeset	1404	NormalizerBase.Mode mode,
90ce3da70b43 Initial load duke parents: diff changeset	1405	boolean doNormalize,
90ce3da70b43 Initial load duke parents: diff changeset	1406	boolean[] pNeededToNormalize,
90ce3da70b43 Initial load duke parents: diff changeset	1407	int options) {
90ce3da70b43 Initial load duke parents: diff changeset	1408
90ce3da70b43 Initial load duke parents: diff changeset	1409	IsNextBoundary isNextBoundary;
90ce3da70b43 Initial load duke parents: diff changeset	1410	int /unsigned/ mask;
90ce3da70b43 Initial load duke parents: diff changeset	1411	int /unsigned/ bufferLength;
90ce3da70b43 Initial load duke parents: diff changeset	1412	int c,c2;
90ce3da70b43 Initial load duke parents: diff changeset	1413	char minC;
90ce3da70b43 Initial load duke parents: diff changeset	1414	int destCapacity = destLimit - destStart;
90ce3da70b43 Initial load duke parents: diff changeset	1415	int destLength = 0;
90ce3da70b43 Initial load duke parents: diff changeset	1416	if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	1417	pNeededToNormalize[0]=false;
90ce3da70b43 Initial load duke parents: diff changeset	1418	}
90ce3da70b43 Initial load duke parents: diff changeset	1419
90ce3da70b43 Initial load duke parents: diff changeset	1420	minC = (char)mode.getMinC();
90ce3da70b43 Initial load duke parents: diff changeset	1421	mask = mode.getMask();
90ce3da70b43 Initial load duke parents: diff changeset	1422	isNextBoundary = mode.getNextBoundary();
90ce3da70b43 Initial load duke parents: diff changeset	1423
90ce3da70b43 Initial load duke parents: diff changeset	1424	if(isNextBoundary==null) {
90ce3da70b43 Initial load duke parents: diff changeset	1425	destLength=0;
90ce3da70b43 Initial load duke parents: diff changeset	1426	c=src.next();
90ce3da70b43 Initial load duke parents: diff changeset	1427	if(c!=UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1428	destLength=1;
90ce3da70b43 Initial load duke parents: diff changeset	1429	if(UTF16.isLeadSurrogate((char)c)) {
90ce3da70b43 Initial load duke parents: diff changeset	1430	c2= src.next();
90ce3da70b43 Initial load duke parents: diff changeset	1431	if(c2!= UCharacterIterator.DONE) {
90ce3da70b43 Initial load duke parents: diff changeset	1432	if(UTF16.isTrailSurrogate((char)c2)) {
90ce3da70b43 Initial load duke parents: diff changeset	1433	if(destCapacity>=2) {
90ce3da70b43 Initial load duke parents: diff changeset	1434	dest[1]=(char)c2; // trail surrogate
90ce3da70b43 Initial load duke parents: diff changeset	1435	destLength=2;
90ce3da70b43 Initial load duke parents: diff changeset	1436	}
90ce3da70b43 Initial load duke parents: diff changeset	1437	// lead surrogate to be written below
90ce3da70b43 Initial load duke parents: diff changeset	1438	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1439	src.moveIndex(-1);
90ce3da70b43 Initial load duke parents: diff changeset	1440	}
90ce3da70b43 Initial load duke parents: diff changeset	1441	}
90ce3da70b43 Initial load duke parents: diff changeset	1442	}
90ce3da70b43 Initial load duke parents: diff changeset	1443
90ce3da70b43 Initial load duke parents: diff changeset	1444	if(destCapacity>0) {
90ce3da70b43 Initial load duke parents: diff changeset	1445	dest[0]=(char)c;
90ce3da70b43 Initial load duke parents: diff changeset	1446	}
90ce3da70b43 Initial load duke parents: diff changeset	1447	}
90ce3da70b43 Initial load duke parents: diff changeset	1448	return destLength;
90ce3da70b43 Initial load duke parents: diff changeset	1449	}
90ce3da70b43 Initial load duke parents: diff changeset	1450
90ce3da70b43 Initial load duke parents: diff changeset	1451	char[] buffer=new char[100];
90ce3da70b43 Initial load duke parents: diff changeset	1452	int[] startIndex = new int[1];
90ce3da70b43 Initial load duke parents: diff changeset	1453	bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
90ce3da70b43 Initial load duke parents: diff changeset	1454	buffer);
90ce3da70b43 Initial load duke parents: diff changeset	1455	if(bufferLength>0) {
90ce3da70b43 Initial load duke parents: diff changeset	1456	if(doNormalize) {
90ce3da70b43 Initial load duke parents: diff changeset	1457	destLength=mode.normalize(buffer,startIndex[0],bufferLength,
90ce3da70b43 Initial load duke parents: diff changeset	1458	dest,destStart,destLimit, options);
90ce3da70b43 Initial load duke parents: diff changeset	1459
90ce3da70b43 Initial load duke parents: diff changeset	1460	if(pNeededToNormalize!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	1461	pNeededToNormalize[0]=(boolean)(destLength!=bufferLength \|\|
90ce3da70b43 Initial load duke parents: diff changeset	1462	Utility.arrayRegionMatches(buffer,startIndex[0],
90ce3da70b43 Initial load duke parents: diff changeset	1463	dest,destStart,
90ce3da70b43 Initial load duke parents: diff changeset	1464	destLength));
90ce3da70b43 Initial load duke parents: diff changeset	1465	}
90ce3da70b43 Initial load duke parents: diff changeset	1466	} else {
90ce3da70b43 Initial load duke parents: diff changeset	1467	/* just copy the source characters */
90ce3da70b43 Initial load duke parents: diff changeset	1468	if(destCapacity>0) {
90ce3da70b43 Initial load duke parents: diff changeset	1469	System.arraycopy(buffer,0,dest,destStart,
90ce3da70b43 Initial load duke parents: diff changeset	1470	Math.min(bufferLength,destCapacity)
90ce3da70b43 Initial load duke parents: diff changeset	1471	);
90ce3da70b43 Initial load duke parents: diff changeset	1472	}
90ce3da70b43 Initial load duke parents: diff changeset	1473
90ce3da70b43 Initial load duke parents: diff changeset	1474
90ce3da70b43 Initial load duke parents: diff changeset	1475	}
90ce3da70b43 Initial load duke parents: diff changeset	1476	}
90ce3da70b43 Initial load duke parents: diff changeset	1477	return destLength;
90ce3da70b43 Initial load duke parents: diff changeset	1478	}
90ce3da70b43 Initial load duke parents: diff changeset	1479
90ce3da70b43 Initial load duke parents: diff changeset	1480	private void clearBuffer() {
90ce3da70b43 Initial load duke parents: diff changeset	1481	bufferLimit=bufferStart=bufferPos=0;
90ce3da70b43 Initial load duke parents: diff changeset	1482	}
90ce3da70b43 Initial load duke parents: diff changeset	1483
90ce3da70b43 Initial load duke parents: diff changeset	1484	private boolean nextNormalize() {
90ce3da70b43 Initial load duke parents: diff changeset	1485
90ce3da70b43 Initial load duke parents: diff changeset	1486	clearBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	1487	currentIndex=nextIndex;
90ce3da70b43 Initial load duke parents: diff changeset	1488	text.setIndex(nextIndex);
90ce3da70b43 Initial load duke parents: diff changeset	1489
90ce3da70b43 Initial load duke parents: diff changeset	1490	bufferLimit=next(text,buffer,bufferStart,buffer.length,mode,true,null,options);
90ce3da70b43 Initial load duke parents: diff changeset	1491
90ce3da70b43 Initial load duke parents: diff changeset	1492	nextIndex=text.getIndex();
90ce3da70b43 Initial load duke parents: diff changeset	1493	return (bufferLimit>0);
90ce3da70b43 Initial load duke parents: diff changeset	1494	}
90ce3da70b43 Initial load duke parents: diff changeset	1495
90ce3da70b43 Initial load duke parents: diff changeset	1496	private boolean previousNormalize() {
90ce3da70b43 Initial load duke parents: diff changeset	1497
90ce3da70b43 Initial load duke parents: diff changeset	1498	clearBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	1499	nextIndex=currentIndex;
90ce3da70b43 Initial load duke parents: diff changeset	1500	text.setIndex(currentIndex);
90ce3da70b43 Initial load duke parents: diff changeset	1501	bufferLimit=previous(text,buffer,bufferStart,buffer.length,mode,true,null,options);
90ce3da70b43 Initial load duke parents: diff changeset	1502
90ce3da70b43 Initial load duke parents: diff changeset	1503	currentIndex=text.getIndex();
90ce3da70b43 Initial load duke parents: diff changeset	1504	bufferPos = bufferLimit;
90ce3da70b43 Initial load duke parents: diff changeset	1505	return bufferLimit>0;
90ce3da70b43 Initial load duke parents: diff changeset	1506	}
90ce3da70b43 Initial load duke parents: diff changeset	1507
90ce3da70b43 Initial load duke parents: diff changeset	1508	private int getCodePointAt(int index) {
90ce3da70b43 Initial load duke parents: diff changeset	1509	if( UTF16.isSurrogate(buffer[index])) {
90ce3da70b43 Initial load duke parents: diff changeset	1510	if(UTF16.isLeadSurrogate(buffer[index])) {
90ce3da70b43 Initial load duke parents: diff changeset	1511	if((index+1)<bufferLimit &&
90ce3da70b43 Initial load duke parents: diff changeset	1512	UTF16.isTrailSurrogate(buffer[index+1])) {
90ce3da70b43 Initial load duke parents: diff changeset	1513	return UCharacterProperty.getRawSupplementary(
90ce3da70b43 Initial load duke parents: diff changeset	1514	buffer[index],
90ce3da70b43 Initial load duke parents: diff changeset	1515	buffer[index+1]
90ce3da70b43 Initial load duke parents: diff changeset	1516	);
90ce3da70b43 Initial load duke parents: diff changeset	1517	}
90ce3da70b43 Initial load duke parents: diff changeset	1518	}else if(UTF16.isTrailSurrogate(buffer[index])) {
90ce3da70b43 Initial load duke parents: diff changeset	1519	if(index>0 && UTF16.isLeadSurrogate(buffer[index-1])) {
90ce3da70b43 Initial load duke parents: diff changeset	1520	return UCharacterProperty.getRawSupplementary(
90ce3da70b43 Initial load duke parents: diff changeset	1521	buffer[index-1],
90ce3da70b43 Initial load duke parents: diff changeset	1522	buffer[index]
90ce3da70b43 Initial load duke parents: diff changeset	1523	);
90ce3da70b43 Initial load duke parents: diff changeset	1524	}
90ce3da70b43 Initial load duke parents: diff changeset	1525	}
90ce3da70b43 Initial load duke parents: diff changeset	1526	}
90ce3da70b43 Initial load duke parents: diff changeset	1527	return buffer[index];
90ce3da70b43 Initial load duke parents: diff changeset	1528
90ce3da70b43 Initial load duke parents: diff changeset	1529	}
90ce3da70b43 Initial load duke parents: diff changeset	1530
90ce3da70b43 Initial load duke parents: diff changeset	1531	/**
90ce3da70b43 Initial load duke parents: diff changeset	1532	* Internal API
90ce3da70b43 Initial load duke parents: diff changeset	1533	* @internal
90ce3da70b43 Initial load duke parents: diff changeset	1534	*/
90ce3da70b43 Initial load duke parents: diff changeset	1535	public static boolean isNFSkippable(int c, Mode mode) {
90ce3da70b43 Initial load duke parents: diff changeset	1536	return mode.isNFSkippable(c);
90ce3da70b43 Initial load duke parents: diff changeset	1537	}
90ce3da70b43 Initial load duke parents: diff changeset	1538
90ce3da70b43 Initial load duke parents: diff changeset	1539	//
90ce3da70b43 Initial load duke parents: diff changeset	1540	// Options
90ce3da70b43 Initial load duke parents: diff changeset	1541	//
90ce3da70b43 Initial load duke parents: diff changeset	1542
90ce3da70b43 Initial load duke parents: diff changeset	1543	/*
90ce3da70b43 Initial load duke parents: diff changeset	1544	* Default option for Unicode 3.2.0 normalization.
90ce3da70b43 Initial load duke parents: diff changeset	1545	* Corrigendum 4 was fixed in Unicode 3.2.0 but isn't supported in
90ce3da70b43 Initial load duke parents: diff changeset	1546	* IDNA/StringPrep.
90ce3da70b43 Initial load duke parents: diff changeset	1547	* The public review issue #29 was fixed in Unicode 4.1.0. Corrigendum 5
90ce3da70b43 Initial load duke parents: diff changeset	1548	* allowed Unicode 3.2 to 4.0.1 to apply the fix for PRI #29, but it isn't
90ce3da70b43 Initial load duke parents: diff changeset	1549	* supported by IDNA/StringPrep as well as Corrigendum 4.
90ce3da70b43 Initial load duke parents: diff changeset	1550	*/
90ce3da70b43 Initial load duke parents: diff changeset	1551	public static final int UNICODE_3_2_0_ORIGINAL =
90ce3da70b43 Initial load duke parents: diff changeset	1552	UNICODE_3_2 \|
90ce3da70b43 Initial load duke parents: diff changeset	1553	NormalizerImpl.WITHOUT_CORRIGENDUM4_CORRECTIONS \|
90ce3da70b43 Initial load duke parents: diff changeset	1554	NormalizerImpl.BEFORE_PRI_29;
90ce3da70b43 Initial load duke parents: diff changeset	1555
90ce3da70b43 Initial load duke parents: diff changeset	1556	/*
90ce3da70b43 Initial load duke parents: diff changeset	1557	* Default option for the latest Unicode normalization. This option is
90ce3da70b43 Initial load duke parents: diff changeset	1558	* provided mainly for testing.
90ce3da70b43 Initial load duke parents: diff changeset	1559	* The value zero means that normalization is done with the fixes for
90ce3da70b43 Initial load duke parents: diff changeset	1560	* - Corrigendum 4 (Five CJK Canonical Mapping Errors)
90ce3da70b43 Initial load duke parents: diff changeset	1561	* - Corrigendum 5 (Normalization Idempotency)
90ce3da70b43 Initial load duke parents: diff changeset	1562	*/
90ce3da70b43 Initial load duke parents: diff changeset	1563	public static final int UNICODE_LATEST = 0x00;
90ce3da70b43 Initial load duke parents: diff changeset	1564
90ce3da70b43 Initial load duke parents: diff changeset	1565	//
90ce3da70b43 Initial load duke parents: diff changeset	1566	// public constructor and methods for java.text.Normalizer and
90ce3da70b43 Initial load duke parents: diff changeset	1567	// sun.text.Normalizer
90ce3da70b43 Initial load duke parents: diff changeset	1568	//
90ce3da70b43 Initial load duke parents: diff changeset	1569
90ce3da70b43 Initial load duke parents: diff changeset	1570	/**
90ce3da70b43 Initial load duke parents: diff changeset	1571	* Creates a new <tt>Normalizer</tt> object for iterating over the
90ce3da70b43 Initial load duke parents: diff changeset	1572	* normalized form of a given string.
90ce3da70b43 Initial load duke parents: diff changeset	1573	*
90ce3da70b43 Initial load duke parents: diff changeset	1574	* @param str The string to be normalized. The normalization
90ce3da70b43 Initial load duke parents: diff changeset	1575	* will start at the beginning of the string.
90ce3da70b43 Initial load duke parents: diff changeset	1576	*
90ce3da70b43 Initial load duke parents: diff changeset	1577	* @param mode The normalization mode.
90ce3da70b43 Initial load duke parents: diff changeset	1578	*/
90ce3da70b43 Initial load duke parents: diff changeset	1579	public NormalizerBase(String str, Mode mode) {
90ce3da70b43 Initial load duke parents: diff changeset	1580	this(str, mode, UNICODE_LATEST);
90ce3da70b43 Initial load duke parents: diff changeset	1581	}
90ce3da70b43 Initial load duke parents: diff changeset	1582
90ce3da70b43 Initial load duke parents: diff changeset	1583	/**
90ce3da70b43 Initial load duke parents: diff changeset	1584	* Normalizes a <code>String</code> using the given normalization form.
90ce3da70b43 Initial load duke parents: diff changeset	1585	*
90ce3da70b43 Initial load duke parents: diff changeset	1586	* @param str the input string to be normalized.
90ce3da70b43 Initial load duke parents: diff changeset	1587	* @param form the normalization form
90ce3da70b43 Initial load duke parents: diff changeset	1588	*/
90ce3da70b43 Initial load duke parents: diff changeset	1589	public static String normalize(String str, Normalizer.Form form) {
90ce3da70b43 Initial load duke parents: diff changeset	1590	return normalize(str, form, UNICODE_LATEST);
90ce3da70b43 Initial load duke parents: diff changeset	1591	}
90ce3da70b43 Initial load duke parents: diff changeset	1592
90ce3da70b43 Initial load duke parents: diff changeset	1593	/**
90ce3da70b43 Initial load duke parents: diff changeset	1594	* Normalizes a <code>String</code> using the given normalization form.
90ce3da70b43 Initial load duke parents: diff changeset	1595	*
90ce3da70b43 Initial load duke parents: diff changeset	1596	* @param str the input string to be normalized.
90ce3da70b43 Initial load duke parents: diff changeset	1597	* @param form the normalization form
90ce3da70b43 Initial load duke parents: diff changeset	1598	* @param options the optional features to be enabled.
90ce3da70b43 Initial load duke parents: diff changeset	1599	*/
90ce3da70b43 Initial load duke parents: diff changeset	1600	public static String normalize(String str, Normalizer.Form form, int options) {
3101 2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1601	int len = str.length();
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1602	boolean asciiOnly = true;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1603	if (len < 80) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1604	for (int i = 0; i < len; i++) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1605	if (str.charAt(i) > 127) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1606	asciiOnly = false;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1607	break;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1608	}
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1609	}
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1610	} else {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1611	char[] a = str.toCharArray();
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1612	for (int i = 0; i < len; i++) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1613	if (a[i] > 127) {
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1614	asciiOnly = false;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1615	break;
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1616	}
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1617	}
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1618	}
2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1619
2 90ce3da70b43 Initial load duke parents: diff changeset	1620	switch (form) {
90ce3da70b43 Initial load duke parents: diff changeset	1621	case NFC :
3101 2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1622	return asciiOnly ? str : NFC.normalize(str, options);
2 90ce3da70b43 Initial load duke parents: diff changeset	1623	case NFD :
3101 2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1624	return asciiOnly ? str : NFD.normalize(str, options);
2 90ce3da70b43 Initial load duke parents: diff changeset	1625	case NFKC :
3101 2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1626	return asciiOnly ? str : NFKC.normalize(str, options);
2 90ce3da70b43 Initial load duke parents: diff changeset	1627	case NFKD :
3101 2dde80384b3d 6792400: Avoid loading of Normalizer resources for simple uses peytoia parents: 2497 diff changeset	1628	return asciiOnly ? str : NFKD.normalize(str, options);
2 90ce3da70b43 Initial load duke parents: diff changeset	1629	}
90ce3da70b43 Initial load duke parents: diff changeset	1630
90ce3da70b43 Initial load duke parents: diff changeset	1631	throw new IllegalArgumentException("Unexpected normalization form: " +
90ce3da70b43 Initial load duke parents: diff changeset	1632	form);
90ce3da70b43 Initial load duke parents: diff changeset	1633	}
90ce3da70b43 Initial load duke parents: diff changeset	1634
90ce3da70b43 Initial load duke parents: diff changeset	1635	/**
90ce3da70b43 Initial load duke parents: diff changeset	1636	* Test if a string is in a given normalization form.
90ce3da70b43 Initial load duke parents: diff changeset	1637	* This is semantically equivalent to source.equals(normalize(source, mode)).
90ce3da70b43 Initial load duke parents: diff changeset	1638	*
90ce3da70b43 Initial load duke parents: diff changeset	1639	* Unlike quickCheck(), this function returns a definitive result,
90ce3da70b43 Initial load duke parents: diff changeset	1640	* never a "maybe".
90ce3da70b43 Initial load duke parents: diff changeset	1641	* For NFD, NFKD, and FCD, both functions work exactly the same.
90ce3da70b43 Initial load duke parents: diff changeset	1642	* For NFC and NFKC where quickCheck may return "maybe", this function will
90ce3da70b43 Initial load duke parents: diff changeset	1643	* perform further tests to arrive at a true/false result.
90ce3da70b43 Initial load duke parents: diff changeset	1644	* @param str the input string to be checked to see if it is normalized
90ce3da70b43 Initial load duke parents: diff changeset	1645	* @param form the normalization form
90ce3da70b43 Initial load duke parents: diff changeset	1646	* @param options the optional features to be enabled.
90ce3da70b43 Initial load duke parents: diff changeset	1647	*/
90ce3da70b43 Initial load duke parents: diff changeset	1648	public static boolean isNormalized(String str, Normalizer.Form form) {
90ce3da70b43 Initial load duke parents: diff changeset	1649	return isNormalized(str, form, UNICODE_LATEST);
90ce3da70b43 Initial load duke parents: diff changeset	1650	}
90ce3da70b43 Initial load duke parents: diff changeset	1651
90ce3da70b43 Initial load duke parents: diff changeset	1652	/**
90ce3da70b43 Initial load duke parents: diff changeset	1653	* Test if a string is in a given normalization form.
90ce3da70b43 Initial load duke parents: diff changeset	1654	* This is semantically equivalent to source.equals(normalize(source, mode)).
90ce3da70b43 Initial load duke parents: diff changeset	1655	*
90ce3da70b43 Initial load duke parents: diff changeset	1656	* Unlike quickCheck(), this function returns a definitive result,
90ce3da70b43 Initial load duke parents: diff changeset	1657	* never a "maybe".
90ce3da70b43 Initial load duke parents: diff changeset	1658	* For NFD, NFKD, and FCD, both functions work exactly the same.
90ce3da70b43 Initial load duke parents: diff changeset	1659	* For NFC and NFKC where quickCheck may return "maybe", this function will
90ce3da70b43 Initial load duke parents: diff changeset	1660	* perform further tests to arrive at a true/false result.
90ce3da70b43 Initial load duke parents: diff changeset	1661	* @param str the input string to be checked to see if it is normalized
90ce3da70b43 Initial load duke parents: diff changeset	1662	* @param form the normalization form
90ce3da70b43 Initial load duke parents: diff changeset	1663	* @param options the optional features to be enabled.
90ce3da70b43 Initial load duke parents: diff changeset	1664	*/
90ce3da70b43 Initial load duke parents: diff changeset	1665	public static boolean isNormalized(String str, Normalizer.Form form, int options) {
90ce3da70b43 Initial load duke parents: diff changeset	1666	switch (form) {
90ce3da70b43 Initial load duke parents: diff changeset	1667	case NFC:
90ce3da70b43 Initial load duke parents: diff changeset	1668	return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load duke parents: diff changeset	1669	case NFD:
90ce3da70b43 Initial load duke parents: diff changeset	1670	return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load duke parents: diff changeset	1671	case NFKC:
90ce3da70b43 Initial load duke parents: diff changeset	1672	return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load duke parents: diff changeset	1673	case NFKD:
90ce3da70b43 Initial load duke parents: diff changeset	1674	return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
90ce3da70b43 Initial load duke parents: diff changeset	1675	}
90ce3da70b43 Initial load duke parents: diff changeset	1676
90ce3da70b43 Initial load duke parents: diff changeset	1677	throw new IllegalArgumentException("Unexpected normalization form: " +
90ce3da70b43 Initial load duke parents: diff changeset	1678	form);
90ce3da70b43 Initial load duke parents: diff changeset	1679	}
90ce3da70b43 Initial load duke parents: diff changeset	1680	}

author	sherman
	Tue, 30 Aug 2011 11:53:11 -0700
changeset 10419	12c063b39232
parent 5506	202f599c92aa
child 11136	f0f53bbe5bd1
permissions	-rw-r--r--