jdk-sandbox: jdk/src/share/classes/java/lang/ConditionalSpecialCasing.java@dc5744ca15ea (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
90ce3da70b43 Initial load duke parents: diff changeset	2	* Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load duke parents: diff changeset	7	* published by the Free Software Foundation. Sun designates this
90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load duke parents: diff changeset	9	* by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
90ce3da70b43 Initial load duke parents: diff changeset	21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load duke parents: diff changeset	22	* CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load duke parents: diff changeset	23	* have any questions.
90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25
90ce3da70b43 Initial load duke parents: diff changeset	26	package java.lang;
90ce3da70b43 Initial load duke parents: diff changeset	27
90ce3da70b43 Initial load duke parents: diff changeset	28	import java.text.BreakIterator;
90ce3da70b43 Initial load duke parents: diff changeset	29	import java.util.HashSet;
90ce3da70b43 Initial load duke parents: diff changeset	30	import java.util.Hashtable;
90ce3da70b43 Initial load duke parents: diff changeset	31	import java.util.Iterator;
90ce3da70b43 Initial load duke parents: diff changeset	32	import java.util.Locale;
90ce3da70b43 Initial load duke parents: diff changeset	33	import sun.text.Normalizer;
90ce3da70b43 Initial load duke parents: diff changeset	34
90ce3da70b43 Initial load duke parents: diff changeset	35
90ce3da70b43 Initial load duke parents: diff changeset	36	/**
90ce3da70b43 Initial load duke parents: diff changeset	37	* This is a utility class for <code>String.toLowerCase()</code> and
90ce3da70b43 Initial load duke parents: diff changeset	38	* <code>String.toUpperCase()</code>, that handles special casing with
90ce3da70b43 Initial load duke parents: diff changeset	39	* conditions. In other words, it handles the mappings with conditions
90ce3da70b43 Initial load duke parents: diff changeset	40	* that are defined in
90ce3da70b43 Initial load duke parents: diff changeset	41	* <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
90ce3da70b43 Initial load duke parents: diff changeset	42	* Casing Properties</a> file.
90ce3da70b43 Initial load duke parents: diff changeset	43	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	44	* Note that the unconditional case mappings (including 1:M mappings)
90ce3da70b43 Initial load duke parents: diff changeset	45	* are handled in <code>Character.toLower/UpperCase()</code>.
90ce3da70b43 Initial load duke parents: diff changeset	46	*/
90ce3da70b43 Initial load duke parents: diff changeset	47	final class ConditionalSpecialCasing {
90ce3da70b43 Initial load duke parents: diff changeset	48
90ce3da70b43 Initial load duke parents: diff changeset	49	// context conditions.
90ce3da70b43 Initial load duke parents: diff changeset	50	final static int FINAL_CASED = 1;
90ce3da70b43 Initial load duke parents: diff changeset	51	final static int AFTER_SOFT_DOTTED = 2;
90ce3da70b43 Initial load duke parents: diff changeset	52	final static int MORE_ABOVE = 3;
90ce3da70b43 Initial load duke parents: diff changeset	53	final static int AFTER_I = 4;
90ce3da70b43 Initial load duke parents: diff changeset	54	final static int NOT_BEFORE_DOT = 5;
90ce3da70b43 Initial load duke parents: diff changeset	55
90ce3da70b43 Initial load duke parents: diff changeset	56	// combining class definitions
90ce3da70b43 Initial load duke parents: diff changeset	57	final static int COMBINING_CLASS_ABOVE = 230;
90ce3da70b43 Initial load duke parents: diff changeset	58
90ce3da70b43 Initial load duke parents: diff changeset	59	// Special case mapping entries
90ce3da70b43 Initial load duke parents: diff changeset	60	static Entry[] entry = {
90ce3da70b43 Initial load duke parents: diff changeset	61	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	62	//# Conditional mappings
90ce3da70b43 Initial load duke parents: diff changeset	63	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	64	new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
90ce3da70b43 Initial load duke parents: diff changeset	65
90ce3da70b43 Initial load duke parents: diff changeset	66	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	67	//# Locale-sensitive mappings
90ce3da70b43 Initial load duke parents: diff changeset	68	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	69	//# Lithuanian
90ce3da70b43 Initial load duke parents: diff changeset	70	new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt", AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	71	new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	72	new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
90ce3da70b43 Initial load duke parents: diff changeset	73	new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
90ce3da70b43 Initial load duke parents: diff changeset	74	new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
90ce3da70b43 Initial load duke parents: diff changeset	75	new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
90ce3da70b43 Initial load duke parents: diff changeset	76	new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
90ce3da70b43 Initial load duke parents: diff changeset	77
90ce3da70b43 Initial load duke parents: diff changeset	78	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	79	//# Turkish and Azeri
90ce3da70b43 Initial load duke parents: diff changeset	80	// new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	81	// new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	82	new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	83	new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	84	new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	85	new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	86	new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	87	new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0) // # LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	88	};
90ce3da70b43 Initial load duke parents: diff changeset	89
90ce3da70b43 Initial load duke parents: diff changeset	90	// A hash table that contains the above entries
90ce3da70b43 Initial load duke parents: diff changeset	91	static Hashtable entryTable = new Hashtable();
90ce3da70b43 Initial load duke parents: diff changeset	92	static {
90ce3da70b43 Initial load duke parents: diff changeset	93	// create hashtable from the entry
90ce3da70b43 Initial load duke parents: diff changeset	94	for (int i = 0; i < entry.length; i ++) {
90ce3da70b43 Initial load duke parents: diff changeset	95	Entry cur = entry[i];
90ce3da70b43 Initial load duke parents: diff changeset	96	Integer cp = new Integer(cur.getCodePoint());
90ce3da70b43 Initial load duke parents: diff changeset	97	HashSet set = (HashSet)entryTable.get(cp);
90ce3da70b43 Initial load duke parents: diff changeset	98	if (set == null) {
90ce3da70b43 Initial load duke parents: diff changeset	99	set = new HashSet();
90ce3da70b43 Initial load duke parents: diff changeset	100	}
90ce3da70b43 Initial load duke parents: diff changeset	101	set.add(cur);
90ce3da70b43 Initial load duke parents: diff changeset	102	entryTable.put(cp, set);
90ce3da70b43 Initial load duke parents: diff changeset	103	}
90ce3da70b43 Initial load duke parents: diff changeset	104	}
90ce3da70b43 Initial load duke parents: diff changeset	105
90ce3da70b43 Initial load duke parents: diff changeset	106	static int toLowerCaseEx(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	107	char[] result = lookUpTable(src, index, locale, true);
90ce3da70b43 Initial load duke parents: diff changeset	108
90ce3da70b43 Initial load duke parents: diff changeset	109	if (result != null) {
90ce3da70b43 Initial load duke parents: diff changeset	110	if (result.length == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	111	return result[0];
90ce3da70b43 Initial load duke parents: diff changeset	112	} else {
90ce3da70b43 Initial load duke parents: diff changeset	113	return Character.ERROR;
90ce3da70b43 Initial load duke parents: diff changeset	114	}
90ce3da70b43 Initial load duke parents: diff changeset	115	} else {
90ce3da70b43 Initial load duke parents: diff changeset	116	// default to Character class' one
90ce3da70b43 Initial load duke parents: diff changeset	117	return Character.toLowerCase(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	118	}
90ce3da70b43 Initial load duke parents: diff changeset	119	}
90ce3da70b43 Initial load duke parents: diff changeset	120
90ce3da70b43 Initial load duke parents: diff changeset	121	static int toUpperCaseEx(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	122	char[] result = lookUpTable(src, index, locale, false);
90ce3da70b43 Initial load duke parents: diff changeset	123
90ce3da70b43 Initial load duke parents: diff changeset	124	if (result != null) {
90ce3da70b43 Initial load duke parents: diff changeset	125	if (result.length == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	126	return result[0];
90ce3da70b43 Initial load duke parents: diff changeset	127	} else {
90ce3da70b43 Initial load duke parents: diff changeset	128	return Character.ERROR;
90ce3da70b43 Initial load duke parents: diff changeset	129	}
90ce3da70b43 Initial load duke parents: diff changeset	130	} else {
90ce3da70b43 Initial load duke parents: diff changeset	131	// default to Character class' one
90ce3da70b43 Initial load duke parents: diff changeset	132	return Character.toUpperCaseEx(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	133	}
90ce3da70b43 Initial load duke parents: diff changeset	134	}
90ce3da70b43 Initial load duke parents: diff changeset	135
90ce3da70b43 Initial load duke parents: diff changeset	136	static char[] toLowerCaseCharArray(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	137	return lookUpTable(src, index, locale, true);
90ce3da70b43 Initial load duke parents: diff changeset	138	}
90ce3da70b43 Initial load duke parents: diff changeset	139
90ce3da70b43 Initial load duke parents: diff changeset	140	static char[] toUpperCaseCharArray(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	141	char[] result = lookUpTable(src, index, locale, false);
90ce3da70b43 Initial load duke parents: diff changeset	142	if (result != null) {
90ce3da70b43 Initial load duke parents: diff changeset	143	return result;
90ce3da70b43 Initial load duke parents: diff changeset	144	} else {
90ce3da70b43 Initial load duke parents: diff changeset	145	return Character.toUpperCaseCharArray(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	146	}
90ce3da70b43 Initial load duke parents: diff changeset	147	}
90ce3da70b43 Initial load duke parents: diff changeset	148
90ce3da70b43 Initial load duke parents: diff changeset	149	private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
90ce3da70b43 Initial load duke parents: diff changeset	150	HashSet set = (HashSet)entryTable.get(new Integer(src.codePointAt(index)));
90ce3da70b43 Initial load duke parents: diff changeset	151
90ce3da70b43 Initial load duke parents: diff changeset	152	if (set != null) {
90ce3da70b43 Initial load duke parents: diff changeset	153	Iterator iter = set.iterator();
90ce3da70b43 Initial load duke parents: diff changeset	154	String currentLang = locale.getLanguage();
90ce3da70b43 Initial load duke parents: diff changeset	155	while (iter.hasNext()) {
90ce3da70b43 Initial load duke parents: diff changeset	156	Entry entry = (Entry)iter.next();
90ce3da70b43 Initial load duke parents: diff changeset	157	String conditionLang= entry.getLanguage();
90ce3da70b43 Initial load duke parents: diff changeset	158	if (((conditionLang == null) \|\| (conditionLang.equals(currentLang))) &&
90ce3da70b43 Initial load duke parents: diff changeset	159	isConditionMet(src, index, locale, entry.getCondition())) {
90ce3da70b43 Initial load duke parents: diff changeset	160	return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase());
90ce3da70b43 Initial load duke parents: diff changeset	161	}
90ce3da70b43 Initial load duke parents: diff changeset	162	}
90ce3da70b43 Initial load duke parents: diff changeset	163	}
90ce3da70b43 Initial load duke parents: diff changeset	164
90ce3da70b43 Initial load duke parents: diff changeset	165	return null;
90ce3da70b43 Initial load duke parents: diff changeset	166	}
90ce3da70b43 Initial load duke parents: diff changeset	167
90ce3da70b43 Initial load duke parents: diff changeset	168	private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
90ce3da70b43 Initial load duke parents: diff changeset	169	switch (condition) {
90ce3da70b43 Initial load duke parents: diff changeset	170	case FINAL_CASED:
90ce3da70b43 Initial load duke parents: diff changeset	171	return isFinalCased(src, index, locale);
90ce3da70b43 Initial load duke parents: diff changeset	172
90ce3da70b43 Initial load duke parents: diff changeset	173	case AFTER_SOFT_DOTTED:
90ce3da70b43 Initial load duke parents: diff changeset	174	return isAfterSoftDotted(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	175
90ce3da70b43 Initial load duke parents: diff changeset	176	case MORE_ABOVE:
90ce3da70b43 Initial load duke parents: diff changeset	177	return isMoreAbove(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	178
90ce3da70b43 Initial load duke parents: diff changeset	179	case AFTER_I:
90ce3da70b43 Initial load duke parents: diff changeset	180	return isAfterI(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	181
90ce3da70b43 Initial load duke parents: diff changeset	182	case NOT_BEFORE_DOT:
90ce3da70b43 Initial load duke parents: diff changeset	183	return !isBeforeDot(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	184
90ce3da70b43 Initial load duke parents: diff changeset	185	default:
90ce3da70b43 Initial load duke parents: diff changeset	186	return true;
90ce3da70b43 Initial load duke parents: diff changeset	187	}
90ce3da70b43 Initial load duke parents: diff changeset	188	}
90ce3da70b43 Initial load duke parents: diff changeset	189
90ce3da70b43 Initial load duke parents: diff changeset	190	/**
90ce3da70b43 Initial load duke parents: diff changeset	191	* Implements the "Final_Cased" condition
90ce3da70b43 Initial load duke parents: diff changeset	192	*
90ce3da70b43 Initial load duke parents: diff changeset	193	* Specification: Within the closest word boundaries containing C, there is a cased
90ce3da70b43 Initial load duke parents: diff changeset	194	* letter before C, and there is no cased letter after C.
90ce3da70b43 Initial load duke parents: diff changeset	195	*
90ce3da70b43 Initial load duke parents: diff changeset	196	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	197	* Before C: [{cased==true}][{wordBoundary!=true}]*
90ce3da70b43 Initial load duke parents: diff changeset	198	* After C: !([{wordBoundary!=true}]*[{cased}])
90ce3da70b43 Initial load duke parents: diff changeset	199	*/
90ce3da70b43 Initial load duke parents: diff changeset	200	private static boolean isFinalCased(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	201	BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
90ce3da70b43 Initial load duke parents: diff changeset	202	wordBoundary.setText(src);
90ce3da70b43 Initial load duke parents: diff changeset	203	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	204
90ce3da70b43 Initial load duke parents: diff changeset	205	// Look for a preceding 'cased' letter
90ce3da70b43 Initial load duke parents: diff changeset	206	for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
90ce3da70b43 Initial load duke parents: diff changeset	207	i -= Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	208
90ce3da70b43 Initial load duke parents: diff changeset	209	ch = src.codePointBefore(i);
90ce3da70b43 Initial load duke parents: diff changeset	210	if (isCased(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	211
90ce3da70b43 Initial load duke parents: diff changeset	212	int len = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	213	// Check that there is no 'cased' letter after the index
90ce3da70b43 Initial load duke parents: diff changeset	214	for (i = index + Character.charCount(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	215	(i < len) && !wordBoundary.isBoundary(i);
90ce3da70b43 Initial load duke parents: diff changeset	216	i += Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	217
90ce3da70b43 Initial load duke parents: diff changeset	218	ch = src.codePointAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	219	if (isCased(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	220	return false;
90ce3da70b43 Initial load duke parents: diff changeset	221	}
90ce3da70b43 Initial load duke parents: diff changeset	222	}
90ce3da70b43 Initial load duke parents: diff changeset	223
90ce3da70b43 Initial load duke parents: diff changeset	224	return true;
90ce3da70b43 Initial load duke parents: diff changeset	225	}
90ce3da70b43 Initial load duke parents: diff changeset	226	}
90ce3da70b43 Initial load duke parents: diff changeset	227
90ce3da70b43 Initial load duke parents: diff changeset	228	return false;
90ce3da70b43 Initial load duke parents: diff changeset	229	}
90ce3da70b43 Initial load duke parents: diff changeset	230
90ce3da70b43 Initial load duke parents: diff changeset	231	/**
90ce3da70b43 Initial load duke parents: diff changeset	232	* Implements the "After_I" condition
90ce3da70b43 Initial load duke parents: diff changeset	233	*
90ce3da70b43 Initial load duke parents: diff changeset	234	* Specification: The last preceding base character was an uppercase I,
90ce3da70b43 Initial load duke parents: diff changeset	235	* and there is no intervening combining character class 230 (ABOVE).
90ce3da70b43 Initial load duke parents: diff changeset	236	*
90ce3da70b43 Initial load duke parents: diff changeset	237	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	238	* Before C: [I]([{cc!=230}&{cc!=0}])*
90ce3da70b43 Initial load duke parents: diff changeset	239	*/
90ce3da70b43 Initial load duke parents: diff changeset	240	private static boolean isAfterI(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	241	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	242	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	243
90ce3da70b43 Initial load duke parents: diff changeset	244	// Look for the last preceding base character
90ce3da70b43 Initial load duke parents: diff changeset	245	for (int i = index; i > 0; i -= Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	246
90ce3da70b43 Initial load duke parents: diff changeset	247	ch = src.codePointBefore(i);
90ce3da70b43 Initial load duke parents: diff changeset	248
90ce3da70b43 Initial load duke parents: diff changeset	249	if (ch == 'I') {
90ce3da70b43 Initial load duke parents: diff changeset	250	return true;
90ce3da70b43 Initial load duke parents: diff changeset	251	} else {
90ce3da70b43 Initial load duke parents: diff changeset	252	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	253	if ((cc == 0) \|\| (cc == COMBINING_CLASS_ABOVE)) {
90ce3da70b43 Initial load duke parents: diff changeset	254	return false;
90ce3da70b43 Initial load duke parents: diff changeset	255	}
90ce3da70b43 Initial load duke parents: diff changeset	256	}
90ce3da70b43 Initial load duke parents: diff changeset	257	}
90ce3da70b43 Initial load duke parents: diff changeset	258
90ce3da70b43 Initial load duke parents: diff changeset	259	return false;
90ce3da70b43 Initial load duke parents: diff changeset	260	}
90ce3da70b43 Initial load duke parents: diff changeset	261
90ce3da70b43 Initial load duke parents: diff changeset	262	/**
90ce3da70b43 Initial load duke parents: diff changeset	263	* Implements the "After_Soft_Dotted" condition
90ce3da70b43 Initial load duke parents: diff changeset	264	*
90ce3da70b43 Initial load duke parents: diff changeset	265	* Specification: The last preceding character with combining class
90ce3da70b43 Initial load duke parents: diff changeset	266	* of zero before C was Soft_Dotted, and there is no intervening
90ce3da70b43 Initial load duke parents: diff changeset	267	* combining character class 230 (ABOVE).
90ce3da70b43 Initial load duke parents: diff changeset	268	*
90ce3da70b43 Initial load duke parents: diff changeset	269	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	270	* Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
90ce3da70b43 Initial load duke parents: diff changeset	271	*/
90ce3da70b43 Initial load duke parents: diff changeset	272	private static boolean isAfterSoftDotted(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	273	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	274	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	275
90ce3da70b43 Initial load duke parents: diff changeset	276	// Look for the last preceding character
90ce3da70b43 Initial load duke parents: diff changeset	277	for (int i = index; i > 0; i -= Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	278
90ce3da70b43 Initial load duke parents: diff changeset	279	ch = src.codePointBefore(i);
90ce3da70b43 Initial load duke parents: diff changeset	280
90ce3da70b43 Initial load duke parents: diff changeset	281	if (isSoftDotted(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	282	return true;
90ce3da70b43 Initial load duke parents: diff changeset	283	} else {
90ce3da70b43 Initial load duke parents: diff changeset	284	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	285	if ((cc == 0) \|\| (cc == COMBINING_CLASS_ABOVE)) {
90ce3da70b43 Initial load duke parents: diff changeset	286	return false;
90ce3da70b43 Initial load duke parents: diff changeset	287	}
90ce3da70b43 Initial load duke parents: diff changeset	288	}
90ce3da70b43 Initial load duke parents: diff changeset	289	}
90ce3da70b43 Initial load duke parents: diff changeset	290
90ce3da70b43 Initial load duke parents: diff changeset	291	return false;
90ce3da70b43 Initial load duke parents: diff changeset	292	}
90ce3da70b43 Initial load duke parents: diff changeset	293
90ce3da70b43 Initial load duke parents: diff changeset	294	/**
90ce3da70b43 Initial load duke parents: diff changeset	295	* Implements the "More_Above" condition
90ce3da70b43 Initial load duke parents: diff changeset	296	*
90ce3da70b43 Initial load duke parents: diff changeset	297	* Specification: C is followed by one or more characters of combining
90ce3da70b43 Initial load duke parents: diff changeset	298	* class 230 (ABOVE) in the combining character sequence.
90ce3da70b43 Initial load duke parents: diff changeset	299	*
90ce3da70b43 Initial load duke parents: diff changeset	300	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	301	* After C: [{cc!=0}]*[{cc==230}]
90ce3da70b43 Initial load duke parents: diff changeset	302	*/
90ce3da70b43 Initial load duke parents: diff changeset	303	private static boolean isMoreAbove(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	304	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	305	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	306	int len = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	307
90ce3da70b43 Initial load duke parents: diff changeset	308	// Look for a following ABOVE combining class character
90ce3da70b43 Initial load duke parents: diff changeset	309	for (int i = index + Character.charCount(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	310	i < len; i += Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	311
90ce3da70b43 Initial load duke parents: diff changeset	312	ch = src.codePointAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	313	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	314
90ce3da70b43 Initial load duke parents: diff changeset	315	if (cc == COMBINING_CLASS_ABOVE) {
90ce3da70b43 Initial load duke parents: diff changeset	316	return true;
90ce3da70b43 Initial load duke parents: diff changeset	317	} else if (cc == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	318	return false;
90ce3da70b43 Initial load duke parents: diff changeset	319	}
90ce3da70b43 Initial load duke parents: diff changeset	320	}
90ce3da70b43 Initial load duke parents: diff changeset	321
90ce3da70b43 Initial load duke parents: diff changeset	322	return false;
90ce3da70b43 Initial load duke parents: diff changeset	323	}
90ce3da70b43 Initial load duke parents: diff changeset	324
90ce3da70b43 Initial load duke parents: diff changeset	325	/**
90ce3da70b43 Initial load duke parents: diff changeset	326	* Implements the "Before_Dot" condition
90ce3da70b43 Initial load duke parents: diff changeset	327	*
90ce3da70b43 Initial load duke parents: diff changeset	328	* Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
90ce3da70b43 Initial load duke parents: diff changeset	329	* Any sequence of characters with a combining class that is
90ce3da70b43 Initial load duke parents: diff changeset	330	* neither 0 nor 230 may intervene between the current character
90ce3da70b43 Initial load duke parents: diff changeset	331	* and the combining dot above.
90ce3da70b43 Initial load duke parents: diff changeset	332	*
90ce3da70b43 Initial load duke parents: diff changeset	333	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	334	* After C: ([{cc!=230}&{cc!=0}])*[\u0307]
90ce3da70b43 Initial load duke parents: diff changeset	335	*/
90ce3da70b43 Initial load duke parents: diff changeset	336	private static boolean isBeforeDot(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	337	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	338	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	339	int len = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	340
90ce3da70b43 Initial load duke parents: diff changeset	341	// Look for a following COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	342	for (int i = index + Character.charCount(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	343	i < len; i += Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	344
90ce3da70b43 Initial load duke parents: diff changeset	345	ch = src.codePointAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	346
90ce3da70b43 Initial load duke parents: diff changeset	347	if (ch == '\u0307') {
90ce3da70b43 Initial load duke parents: diff changeset	348	return true;
90ce3da70b43 Initial load duke parents: diff changeset	349	} else {
90ce3da70b43 Initial load duke parents: diff changeset	350	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	351	if ((cc == 0) \|\| (cc == COMBINING_CLASS_ABOVE)) {
90ce3da70b43 Initial load duke parents: diff changeset	352	return false;
90ce3da70b43 Initial load duke parents: diff changeset	353	}
90ce3da70b43 Initial load duke parents: diff changeset	354	}
90ce3da70b43 Initial load duke parents: diff changeset	355	}
90ce3da70b43 Initial load duke parents: diff changeset	356
90ce3da70b43 Initial load duke parents: diff changeset	357	return false;
90ce3da70b43 Initial load duke parents: diff changeset	358	}
90ce3da70b43 Initial load duke parents: diff changeset	359
90ce3da70b43 Initial load duke parents: diff changeset	360	/**
90ce3da70b43 Initial load duke parents: diff changeset	361	* Examines whether a character is 'cased'.
90ce3da70b43 Initial load duke parents: diff changeset	362	*
90ce3da70b43 Initial load duke parents: diff changeset	363	* A character C is defined to be 'cased' if and only if at least one of
90ce3da70b43 Initial load duke parents: diff changeset	364	* following are true for C: uppercase==true, or lowercase==true, or
90ce3da70b43 Initial load duke parents: diff changeset	365	* general_category==titlecase_letter.
90ce3da70b43 Initial load duke parents: diff changeset	366	*
90ce3da70b43 Initial load duke parents: diff changeset	367	* The uppercase and lowercase property values are specified in the data
90ce3da70b43 Initial load duke parents: diff changeset	368	* file DerivedCoreProperties.txt in the Unicode Character Database.
90ce3da70b43 Initial load duke parents: diff changeset	369	*/
90ce3da70b43 Initial load duke parents: diff changeset	370	private static boolean isCased(int ch) {
90ce3da70b43 Initial load duke parents: diff changeset	371	int type = Character.getType(ch);
90ce3da70b43 Initial load duke parents: diff changeset	372	if (type == Character.LOWERCASE_LETTER \|\|
90ce3da70b43 Initial load duke parents: diff changeset	373	type == Character.UPPERCASE_LETTER \|\|
90ce3da70b43 Initial load duke parents: diff changeset	374	type == Character.TITLECASE_LETTER) {
90ce3da70b43 Initial load duke parents: diff changeset	375	return true;
90ce3da70b43 Initial load duke parents: diff changeset	376	} else {
90ce3da70b43 Initial load duke parents: diff changeset	377	// Check for Other_Lowercase and Other_Uppercase
90ce3da70b43 Initial load duke parents: diff changeset	378	//
90ce3da70b43 Initial load duke parents: diff changeset	379	if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
90ce3da70b43 Initial load duke parents: diff changeset	380	// MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
90ce3da70b43 Initial load duke parents: diff changeset	381	return true;
90ce3da70b43 Initial load duke parents: diff changeset	382	} else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
90ce3da70b43 Initial load duke parents: diff changeset	383	// MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
90ce3da70b43 Initial load duke parents: diff changeset	384	return true;
90ce3da70b43 Initial load duke parents: diff changeset	385	} else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
90ce3da70b43 Initial load duke parents: diff changeset	386	// MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
90ce3da70b43 Initial load duke parents: diff changeset	387	return true;
90ce3da70b43 Initial load duke parents: diff changeset	388	} else if (ch == 0x0345) {
90ce3da70b43 Initial load duke parents: diff changeset	389	// COMBINING GREEK YPOGEGRAMMENI
90ce3da70b43 Initial load duke parents: diff changeset	390	return true;
90ce3da70b43 Initial load duke parents: diff changeset	391	} else if (ch == 0x037A) {
90ce3da70b43 Initial load duke parents: diff changeset	392	// GREEK YPOGEGRAMMENI
90ce3da70b43 Initial load duke parents: diff changeset	393	return true;
90ce3da70b43 Initial load duke parents: diff changeset	394	} else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
90ce3da70b43 Initial load duke parents: diff changeset	395	// MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
90ce3da70b43 Initial load duke parents: diff changeset	396	return true;
90ce3da70b43 Initial load duke parents: diff changeset	397	} else if ((ch >= 0x2160) && (ch <= 0x217F)) {
90ce3da70b43 Initial load duke parents: diff changeset	398	// ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
90ce3da70b43 Initial load duke parents: diff changeset	399	// SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
90ce3da70b43 Initial load duke parents: diff changeset	400	return true;
90ce3da70b43 Initial load duke parents: diff changeset	401	} else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
90ce3da70b43 Initial load duke parents: diff changeset	402	// CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
90ce3da70b43 Initial load duke parents: diff changeset	403	// CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
90ce3da70b43 Initial load duke parents: diff changeset	404	return true;
90ce3da70b43 Initial load duke parents: diff changeset	405	} else {
90ce3da70b43 Initial load duke parents: diff changeset	406	return false;
90ce3da70b43 Initial load duke parents: diff changeset	407	}
90ce3da70b43 Initial load duke parents: diff changeset	408	}
90ce3da70b43 Initial load duke parents: diff changeset	409	}
90ce3da70b43 Initial load duke parents: diff changeset	410
90ce3da70b43 Initial load duke parents: diff changeset	411	private static boolean isSoftDotted(int ch) {
90ce3da70b43 Initial load duke parents: diff changeset	412	switch (ch) {
90ce3da70b43 Initial load duke parents: diff changeset	413	case 0x0069: // Soft_Dotted # L& LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	414	case 0x006A: // Soft_Dotted # L& LATIN SMALL LETTER J
90ce3da70b43 Initial load duke parents: diff changeset	415	case 0x012F: // Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK
90ce3da70b43 Initial load duke parents: diff changeset	416	case 0x0268: // Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE
90ce3da70b43 Initial load duke parents: diff changeset	417	case 0x0456: // Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
90ce3da70b43 Initial load duke parents: diff changeset	418	case 0x0458: // Soft_Dotted # L& CYRILLIC SMALL LETTER JE
90ce3da70b43 Initial load duke parents: diff changeset	419	case 0x1D62: // Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	420	case 0x1E2D: // Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
90ce3da70b43 Initial load duke parents: diff changeset	421	case 0x1ECB: // Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
90ce3da70b43 Initial load duke parents: diff changeset	422	case 0x2071: // Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	423	return true;
90ce3da70b43 Initial load duke parents: diff changeset	424	default:
90ce3da70b43 Initial load duke parents: diff changeset	425	return false;
90ce3da70b43 Initial load duke parents: diff changeset	426	}
90ce3da70b43 Initial load duke parents: diff changeset	427	}
90ce3da70b43 Initial load duke parents: diff changeset	428
90ce3da70b43 Initial load duke parents: diff changeset	429	/**
90ce3da70b43 Initial load duke parents: diff changeset	430	* An internal class that represents an entry in the Special Casing Properties.
90ce3da70b43 Initial load duke parents: diff changeset	431	*/
90ce3da70b43 Initial load duke parents: diff changeset	432	static class Entry {
90ce3da70b43 Initial load duke parents: diff changeset	433	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	434	char [] lower;
90ce3da70b43 Initial load duke parents: diff changeset	435	char [] upper;
90ce3da70b43 Initial load duke parents: diff changeset	436	String lang;
90ce3da70b43 Initial load duke parents: diff changeset	437	int condition;
90ce3da70b43 Initial load duke parents: diff changeset	438
90ce3da70b43 Initial load duke parents: diff changeset	439	Entry(int ch, char[] lower, char[] upper, String lang, int condition) {
90ce3da70b43 Initial load duke parents: diff changeset	440	this.ch = ch;
90ce3da70b43 Initial load duke parents: diff changeset	441	this.lower = lower;
90ce3da70b43 Initial load duke parents: diff changeset	442	this.upper = upper;
90ce3da70b43 Initial load duke parents: diff changeset	443	this.lang = lang;
90ce3da70b43 Initial load duke parents: diff changeset	444	this.condition = condition;
90ce3da70b43 Initial load duke parents: diff changeset	445	}
90ce3da70b43 Initial load duke parents: diff changeset	446
90ce3da70b43 Initial load duke parents: diff changeset	447	int getCodePoint() {
90ce3da70b43 Initial load duke parents: diff changeset	448	return ch;
90ce3da70b43 Initial load duke parents: diff changeset	449	}
90ce3da70b43 Initial load duke parents: diff changeset	450
90ce3da70b43 Initial load duke parents: diff changeset	451	char[] getLowerCase() {
90ce3da70b43 Initial load duke parents: diff changeset	452	return lower;
90ce3da70b43 Initial load duke parents: diff changeset	453	}
90ce3da70b43 Initial load duke parents: diff changeset	454
90ce3da70b43 Initial load duke parents: diff changeset	455	char[] getUpperCase() {
90ce3da70b43 Initial load duke parents: diff changeset	456	return upper;
90ce3da70b43 Initial load duke parents: diff changeset	457	}
90ce3da70b43 Initial load duke parents: diff changeset	458
90ce3da70b43 Initial load duke parents: diff changeset	459	String getLanguage() {
90ce3da70b43 Initial load duke parents: diff changeset	460	return lang;
90ce3da70b43 Initial load duke parents: diff changeset	461	}
90ce3da70b43 Initial load duke parents: diff changeset	462
90ce3da70b43 Initial load duke parents: diff changeset	463	int getCondition() {
90ce3da70b43 Initial load duke parents: diff changeset	464	return condition;
90ce3da70b43 Initial load duke parents: diff changeset	465	}
90ce3da70b43 Initial load duke parents: diff changeset	466	}
90ce3da70b43 Initial load duke parents: diff changeset	467	}

author	martin
	Mon, 10 Mar 2008 14:32:51 -0700
changeset 48	dc5744ca15ea
parent 2	90ce3da70b43
child 2497	903fd9d785ef
permissions	-rw-r--r--