jdk-sandbox: src/java.base/share/classes/java/lang/ConditionalSpecialCasing.java@43e41800d579 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
23010 6dadb192ad81 8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013 lana parents: 22581 diff changeset	2	* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
2 90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2497 diff changeset	7	* published by the Free Software Foundation. Oracle designates this
2 90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2497 diff changeset	9	* by Oracle in the LICENSE file that accompanied this code.
2 90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2497 diff changeset	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2497 diff changeset	22	* or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2497 diff changeset	23	* questions.
2 90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25
90ce3da70b43 Initial load duke parents: diff changeset	26	package java.lang;
90ce3da70b43 Initial load duke parents: diff changeset	27
90ce3da70b43 Initial load duke parents: diff changeset	28	import java.text.BreakIterator;
90ce3da70b43 Initial load duke parents: diff changeset	29	import java.util.HashSet;
90ce3da70b43 Initial load duke parents: diff changeset	30	import java.util.Hashtable;
90ce3da70b43 Initial load duke parents: diff changeset	31	import java.util.Iterator;
90ce3da70b43 Initial load duke parents: diff changeset	32	import java.util.Locale;
90ce3da70b43 Initial load duke parents: diff changeset	33	import sun.text.Normalizer;
90ce3da70b43 Initial load duke parents: diff changeset	34
90ce3da70b43 Initial load duke parents: diff changeset	35
90ce3da70b43 Initial load duke parents: diff changeset	36	/**
90ce3da70b43 Initial load duke parents: diff changeset	37	* This is a utility class for <code>String.toLowerCase()</code> and
90ce3da70b43 Initial load duke parents: diff changeset	38	* <code>String.toUpperCase()</code>, that handles special casing with
90ce3da70b43 Initial load duke parents: diff changeset	39	* conditions. In other words, it handles the mappings with conditions
90ce3da70b43 Initial load duke parents: diff changeset	40	* that are defined in
90ce3da70b43 Initial load duke parents: diff changeset	41	* <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
90ce3da70b43 Initial load duke parents: diff changeset	42	* Casing Properties</a> file.
90ce3da70b43 Initial load duke parents: diff changeset	43	* <p>
90ce3da70b43 Initial load duke parents: diff changeset	44	* Note that the unconditional case mappings (including 1:M mappings)
90ce3da70b43 Initial load duke parents: diff changeset	45	* are handled in <code>Character.toLower/UpperCase()</code>.
90ce3da70b43 Initial load duke parents: diff changeset	46	*/
90ce3da70b43 Initial load duke parents: diff changeset	47	final class ConditionalSpecialCasing {
90ce3da70b43 Initial load duke parents: diff changeset	48
90ce3da70b43 Initial load duke parents: diff changeset	49	// context conditions.
32649 2ee9017c7597 8136583: Core libraries should use blessed modifier order martin parents: 25859 diff changeset	50	static final int FINAL_CASED = 1;
2ee9017c7597 8136583: Core libraries should use blessed modifier order martin parents: 25859 diff changeset	51	static final int AFTER_SOFT_DOTTED = 2;
2ee9017c7597 8136583: Core libraries should use blessed modifier order martin parents: 25859 diff changeset	52	static final int MORE_ABOVE = 3;
2ee9017c7597 8136583: Core libraries should use blessed modifier order martin parents: 25859 diff changeset	53	static final int AFTER_I = 4;
2ee9017c7597 8136583: Core libraries should use blessed modifier order martin parents: 25859 diff changeset	54	static final int NOT_BEFORE_DOT = 5;
2 90ce3da70b43 Initial load duke parents: diff changeset	55
90ce3da70b43 Initial load duke parents: diff changeset	56	// combining class definitions
32649 2ee9017c7597 8136583: Core libraries should use blessed modifier order martin parents: 25859 diff changeset	57	static final int COMBINING_CLASS_ABOVE = 230;
2 90ce3da70b43 Initial load duke parents: diff changeset	58
90ce3da70b43 Initial load duke parents: diff changeset	59	// Special case mapping entries
90ce3da70b43 Initial load duke parents: diff changeset	60	static Entry[] entry = {
90ce3da70b43 Initial load duke parents: diff changeset	61	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	62	//# Conditional mappings
90ce3da70b43 Initial load duke parents: diff changeset	63	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	64	new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
24374 a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	65	new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
2 90ce3da70b43 Initial load duke parents: diff changeset	66
90ce3da70b43 Initial load duke parents: diff changeset	67	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	68	//# Locale-sensitive mappings
90ce3da70b43 Initial load duke parents: diff changeset	69	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	70	//# Lithuanian
90ce3da70b43 Initial load duke parents: diff changeset	71	new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt", AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	72	new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	73	new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
90ce3da70b43 Initial load duke parents: diff changeset	74	new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
90ce3da70b43 Initial load duke parents: diff changeset	75	new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
90ce3da70b43 Initial load duke parents: diff changeset	76	new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
90ce3da70b43 Initial load duke parents: diff changeset	77	new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
90ce3da70b43 Initial load duke parents: diff changeset	78
90ce3da70b43 Initial load duke parents: diff changeset	79	//# ================================================================================
90ce3da70b43 Initial load duke parents: diff changeset	80	//# Turkish and Azeri
24374 a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	81	new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	82	new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
2 90ce3da70b43 Initial load duke parents: diff changeset	83	new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	84	new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	85	new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	86	new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	87	new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
21308 638d0533f230 8020037: String.toLowerCase incorrectly increases length, if string contains \u0130 char peytoia parents: 14342 diff changeset	88	new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0) // # LATIN SMALL LETTER I
2 90ce3da70b43 Initial load duke parents: diff changeset	89	};
90ce3da70b43 Initial load duke parents: diff changeset	90
90ce3da70b43 Initial load duke parents: diff changeset	91	// A hash table that contains the above entries
11275 7cb0861d512f 7117612: Miscellaneous warnings in java.lang omajid parents: 5506 diff changeset	92	static Hashtable<Integer, HashSet<Entry>> entryTable = new Hashtable<>();
2 90ce3da70b43 Initial load duke parents: diff changeset	93	static {
90ce3da70b43 Initial load duke parents: diff changeset	94	// create hashtable from the entry
22581 e868cde95050 8032779: Update code in java.lang to use newer language features psandoz parents: 21308 diff changeset	95	for (Entry cur : entry) {
e868cde95050 8032779: Update code in java.lang to use newer language features psandoz parents: 21308 diff changeset	96	Integer cp = cur.getCodePoint();
11275 7cb0861d512f 7117612: Miscellaneous warnings in java.lang omajid parents: 5506 diff changeset	97	HashSet<Entry> set = entryTable.get(cp);
2 90ce3da70b43 Initial load duke parents: diff changeset	98	if (set == null) {
22581 e868cde95050 8032779: Update code in java.lang to use newer language features psandoz parents: 21308 diff changeset	99	set = new HashSet<>();
e868cde95050 8032779: Update code in java.lang to use newer language features psandoz parents: 21308 diff changeset	100	entryTable.put(cp, set);
2 90ce3da70b43 Initial load duke parents: diff changeset	101	}
90ce3da70b43 Initial load duke parents: diff changeset	102	set.add(cur);
90ce3da70b43 Initial load duke parents: diff changeset	103	}
90ce3da70b43 Initial load duke parents: diff changeset	104	}
90ce3da70b43 Initial load duke parents: diff changeset	105
90ce3da70b43 Initial load duke parents: diff changeset	106	static int toLowerCaseEx(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	107	char[] result = lookUpTable(src, index, locale, true);
90ce3da70b43 Initial load duke parents: diff changeset	108
90ce3da70b43 Initial load duke parents: diff changeset	109	if (result != null) {
90ce3da70b43 Initial load duke parents: diff changeset	110	if (result.length == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	111	return result[0];
90ce3da70b43 Initial load duke parents: diff changeset	112	} else {
90ce3da70b43 Initial load duke parents: diff changeset	113	return Character.ERROR;
90ce3da70b43 Initial load duke parents: diff changeset	114	}
90ce3da70b43 Initial load duke parents: diff changeset	115	} else {
90ce3da70b43 Initial load duke parents: diff changeset	116	// default to Character class' one
90ce3da70b43 Initial load duke parents: diff changeset	117	return Character.toLowerCase(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	118	}
90ce3da70b43 Initial load duke parents: diff changeset	119	}
90ce3da70b43 Initial load duke parents: diff changeset	120
90ce3da70b43 Initial load duke parents: diff changeset	121	static int toUpperCaseEx(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	122	char[] result = lookUpTable(src, index, locale, false);
90ce3da70b43 Initial load duke parents: diff changeset	123
90ce3da70b43 Initial load duke parents: diff changeset	124	if (result != null) {
90ce3da70b43 Initial load duke parents: diff changeset	125	if (result.length == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	126	return result[0];
90ce3da70b43 Initial load duke parents: diff changeset	127	} else {
90ce3da70b43 Initial load duke parents: diff changeset	128	return Character.ERROR;
90ce3da70b43 Initial load duke parents: diff changeset	129	}
90ce3da70b43 Initial load duke parents: diff changeset	130	} else {
90ce3da70b43 Initial load duke parents: diff changeset	131	// default to Character class' one
90ce3da70b43 Initial load duke parents: diff changeset	132	return Character.toUpperCaseEx(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	133	}
90ce3da70b43 Initial load duke parents: diff changeset	134	}
90ce3da70b43 Initial load duke parents: diff changeset	135
90ce3da70b43 Initial load duke parents: diff changeset	136	static char[] toLowerCaseCharArray(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	137	return lookUpTable(src, index, locale, true);
90ce3da70b43 Initial load duke parents: diff changeset	138	}
90ce3da70b43 Initial load duke parents: diff changeset	139
90ce3da70b43 Initial load duke parents: diff changeset	140	static char[] toUpperCaseCharArray(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	141	char[] result = lookUpTable(src, index, locale, false);
90ce3da70b43 Initial load duke parents: diff changeset	142	if (result != null) {
90ce3da70b43 Initial load duke parents: diff changeset	143	return result;
90ce3da70b43 Initial load duke parents: diff changeset	144	} else {
90ce3da70b43 Initial load duke parents: diff changeset	145	return Character.toUpperCaseCharArray(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	146	}
90ce3da70b43 Initial load duke parents: diff changeset	147	}
90ce3da70b43 Initial load duke parents: diff changeset	148
90ce3da70b43 Initial load duke parents: diff changeset	149	private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
25522 10d789df41bb 8049892: Replace uses of 'new Integer()' with appropriate alternative across core classes prr parents: 24374 diff changeset	150	HashSet<Entry> set = entryTable.get(src.codePointAt(index));
24374 a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	151	char[] ret = null;
2 90ce3da70b43 Initial load duke parents: diff changeset	152
90ce3da70b43 Initial load duke parents: diff changeset	153	if (set != null) {
11275 7cb0861d512f 7117612: Miscellaneous warnings in java.lang omajid parents: 5506 diff changeset	154	Iterator<Entry> iter = set.iterator();
2 90ce3da70b43 Initial load duke parents: diff changeset	155	String currentLang = locale.getLanguage();
90ce3da70b43 Initial load duke parents: diff changeset	156	while (iter.hasNext()) {
11275 7cb0861d512f 7117612: Miscellaneous warnings in java.lang omajid parents: 5506 diff changeset	157	Entry entry = iter.next();
24374 a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	158	String conditionLang = entry.getLanguage();
2 90ce3da70b43 Initial load duke parents: diff changeset	159	if (((conditionLang == null) \|\| (conditionLang.equals(currentLang))) &&
90ce3da70b43 Initial load duke parents: diff changeset	160	isConditionMet(src, index, locale, entry.getCondition())) {
24374 a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	161	ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();
a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	162	if (conditionLang != null) {
a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	163	break;
a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	164	}
2 90ce3da70b43 Initial load duke parents: diff changeset	165	}
90ce3da70b43 Initial load duke parents: diff changeset	166	}
90ce3da70b43 Initial load duke parents: diff changeset	167	}
90ce3da70b43 Initial load duke parents: diff changeset	168
24374 a38282cba2fc 8041791: String.toLowerCase regression - violates Unicode standard naoto parents: 23010 diff changeset	169	return ret;
2 90ce3da70b43 Initial load duke parents: diff changeset	170	}
90ce3da70b43 Initial load duke parents: diff changeset	171
90ce3da70b43 Initial load duke parents: diff changeset	172	private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
90ce3da70b43 Initial load duke parents: diff changeset	173	switch (condition) {
90ce3da70b43 Initial load duke parents: diff changeset	174	case FINAL_CASED:
90ce3da70b43 Initial load duke parents: diff changeset	175	return isFinalCased(src, index, locale);
90ce3da70b43 Initial load duke parents: diff changeset	176
90ce3da70b43 Initial load duke parents: diff changeset	177	case AFTER_SOFT_DOTTED:
90ce3da70b43 Initial load duke parents: diff changeset	178	return isAfterSoftDotted(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	179
90ce3da70b43 Initial load duke parents: diff changeset	180	case MORE_ABOVE:
90ce3da70b43 Initial load duke parents: diff changeset	181	return isMoreAbove(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	182
90ce3da70b43 Initial load duke parents: diff changeset	183	case AFTER_I:
90ce3da70b43 Initial load duke parents: diff changeset	184	return isAfterI(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	185
90ce3da70b43 Initial load duke parents: diff changeset	186	case NOT_BEFORE_DOT:
90ce3da70b43 Initial load duke parents: diff changeset	187	return !isBeforeDot(src, index);
90ce3da70b43 Initial load duke parents: diff changeset	188
90ce3da70b43 Initial load duke parents: diff changeset	189	default:
90ce3da70b43 Initial load duke parents: diff changeset	190	return true;
90ce3da70b43 Initial load duke parents: diff changeset	191	}
90ce3da70b43 Initial load duke parents: diff changeset	192	}
90ce3da70b43 Initial load duke parents: diff changeset	193
90ce3da70b43 Initial load duke parents: diff changeset	194	/**
90ce3da70b43 Initial load duke parents: diff changeset	195	* Implements the "Final_Cased" condition
90ce3da70b43 Initial load duke parents: diff changeset	196	*
90ce3da70b43 Initial load duke parents: diff changeset	197	* Specification: Within the closest word boundaries containing C, there is a cased
90ce3da70b43 Initial load duke parents: diff changeset	198	* letter before C, and there is no cased letter after C.
90ce3da70b43 Initial load duke parents: diff changeset	199	*
90ce3da70b43 Initial load duke parents: diff changeset	200	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	201	* Before C: [{cased==true}][{wordBoundary!=true}]*
90ce3da70b43 Initial load duke parents: diff changeset	202	* After C: !([{wordBoundary!=true}]*[{cased}])
90ce3da70b43 Initial load duke parents: diff changeset	203	*/
90ce3da70b43 Initial load duke parents: diff changeset	204	private static boolean isFinalCased(String src, int index, Locale locale) {
90ce3da70b43 Initial load duke parents: diff changeset	205	BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
90ce3da70b43 Initial load duke parents: diff changeset	206	wordBoundary.setText(src);
90ce3da70b43 Initial load duke parents: diff changeset	207	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	208
90ce3da70b43 Initial load duke parents: diff changeset	209	// Look for a preceding 'cased' letter
90ce3da70b43 Initial load duke parents: diff changeset	210	for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
90ce3da70b43 Initial load duke parents: diff changeset	211	i -= Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	212
90ce3da70b43 Initial load duke parents: diff changeset	213	ch = src.codePointBefore(i);
90ce3da70b43 Initial load duke parents: diff changeset	214	if (isCased(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	215
90ce3da70b43 Initial load duke parents: diff changeset	216	int len = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	217	// Check that there is no 'cased' letter after the index
90ce3da70b43 Initial load duke parents: diff changeset	218	for (i = index + Character.charCount(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	219	(i < len) && !wordBoundary.isBoundary(i);
90ce3da70b43 Initial load duke parents: diff changeset	220	i += Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	221
90ce3da70b43 Initial load duke parents: diff changeset	222	ch = src.codePointAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	223	if (isCased(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	224	return false;
90ce3da70b43 Initial load duke parents: diff changeset	225	}
90ce3da70b43 Initial load duke parents: diff changeset	226	}
90ce3da70b43 Initial load duke parents: diff changeset	227
90ce3da70b43 Initial load duke parents: diff changeset	228	return true;
90ce3da70b43 Initial load duke parents: diff changeset	229	}
90ce3da70b43 Initial load duke parents: diff changeset	230	}
90ce3da70b43 Initial load duke parents: diff changeset	231
90ce3da70b43 Initial load duke parents: diff changeset	232	return false;
90ce3da70b43 Initial load duke parents: diff changeset	233	}
90ce3da70b43 Initial load duke parents: diff changeset	234
90ce3da70b43 Initial load duke parents: diff changeset	235	/**
90ce3da70b43 Initial load duke parents: diff changeset	236	* Implements the "After_I" condition
90ce3da70b43 Initial load duke parents: diff changeset	237	*
90ce3da70b43 Initial load duke parents: diff changeset	238	* Specification: The last preceding base character was an uppercase I,
90ce3da70b43 Initial load duke parents: diff changeset	239	* and there is no intervening combining character class 230 (ABOVE).
90ce3da70b43 Initial load duke parents: diff changeset	240	*
90ce3da70b43 Initial load duke parents: diff changeset	241	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	242	* Before C: [I]([{cc!=230}&{cc!=0}])*
90ce3da70b43 Initial load duke parents: diff changeset	243	*/
90ce3da70b43 Initial load duke parents: diff changeset	244	private static boolean isAfterI(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	245	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	246	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	247
90ce3da70b43 Initial load duke parents: diff changeset	248	// Look for the last preceding base character
90ce3da70b43 Initial load duke parents: diff changeset	249	for (int i = index; i > 0; i -= Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	250
90ce3da70b43 Initial load duke parents: diff changeset	251	ch = src.codePointBefore(i);
90ce3da70b43 Initial load duke parents: diff changeset	252
90ce3da70b43 Initial load duke parents: diff changeset	253	if (ch == 'I') {
90ce3da70b43 Initial load duke parents: diff changeset	254	return true;
90ce3da70b43 Initial load duke parents: diff changeset	255	} else {
90ce3da70b43 Initial load duke parents: diff changeset	256	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	257	if ((cc == 0) \|\| (cc == COMBINING_CLASS_ABOVE)) {
90ce3da70b43 Initial load duke parents: diff changeset	258	return false;
90ce3da70b43 Initial load duke parents: diff changeset	259	}
90ce3da70b43 Initial load duke parents: diff changeset	260	}
90ce3da70b43 Initial load duke parents: diff changeset	261	}
90ce3da70b43 Initial load duke parents: diff changeset	262
90ce3da70b43 Initial load duke parents: diff changeset	263	return false;
90ce3da70b43 Initial load duke parents: diff changeset	264	}
90ce3da70b43 Initial load duke parents: diff changeset	265
90ce3da70b43 Initial load duke parents: diff changeset	266	/**
90ce3da70b43 Initial load duke parents: diff changeset	267	* Implements the "After_Soft_Dotted" condition
90ce3da70b43 Initial load duke parents: diff changeset	268	*
90ce3da70b43 Initial load duke parents: diff changeset	269	* Specification: The last preceding character with combining class
90ce3da70b43 Initial load duke parents: diff changeset	270	* of zero before C was Soft_Dotted, and there is no intervening
90ce3da70b43 Initial load duke parents: diff changeset	271	* combining character class 230 (ABOVE).
90ce3da70b43 Initial load duke parents: diff changeset	272	*
90ce3da70b43 Initial load duke parents: diff changeset	273	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	274	* Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
90ce3da70b43 Initial load duke parents: diff changeset	275	*/
90ce3da70b43 Initial load duke parents: diff changeset	276	private static boolean isAfterSoftDotted(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	277	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	278	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	279
90ce3da70b43 Initial load duke parents: diff changeset	280	// Look for the last preceding character
90ce3da70b43 Initial load duke parents: diff changeset	281	for (int i = index; i > 0; i -= Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	282
90ce3da70b43 Initial load duke parents: diff changeset	283	ch = src.codePointBefore(i);
90ce3da70b43 Initial load duke parents: diff changeset	284
90ce3da70b43 Initial load duke parents: diff changeset	285	if (isSoftDotted(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	286	return true;
90ce3da70b43 Initial load duke parents: diff changeset	287	} else {
90ce3da70b43 Initial load duke parents: diff changeset	288	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	289	if ((cc == 0) \|\| (cc == COMBINING_CLASS_ABOVE)) {
90ce3da70b43 Initial load duke parents: diff changeset	290	return false;
90ce3da70b43 Initial load duke parents: diff changeset	291	}
90ce3da70b43 Initial load duke parents: diff changeset	292	}
90ce3da70b43 Initial load duke parents: diff changeset	293	}
90ce3da70b43 Initial load duke parents: diff changeset	294
90ce3da70b43 Initial load duke parents: diff changeset	295	return false;
90ce3da70b43 Initial load duke parents: diff changeset	296	}
90ce3da70b43 Initial load duke parents: diff changeset	297
90ce3da70b43 Initial load duke parents: diff changeset	298	/**
90ce3da70b43 Initial load duke parents: diff changeset	299	* Implements the "More_Above" condition
90ce3da70b43 Initial load duke parents: diff changeset	300	*
90ce3da70b43 Initial load duke parents: diff changeset	301	* Specification: C is followed by one or more characters of combining
90ce3da70b43 Initial load duke parents: diff changeset	302	* class 230 (ABOVE) in the combining character sequence.
90ce3da70b43 Initial load duke parents: diff changeset	303	*
90ce3da70b43 Initial load duke parents: diff changeset	304	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	305	* After C: [{cc!=0}]*[{cc==230}]
90ce3da70b43 Initial load duke parents: diff changeset	306	*/
90ce3da70b43 Initial load duke parents: diff changeset	307	private static boolean isMoreAbove(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	308	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	309	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	310	int len = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	311
90ce3da70b43 Initial load duke parents: diff changeset	312	// Look for a following ABOVE combining class character
90ce3da70b43 Initial load duke parents: diff changeset	313	for (int i = index + Character.charCount(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	314	i < len; i += Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	315
90ce3da70b43 Initial load duke parents: diff changeset	316	ch = src.codePointAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	317	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	318
90ce3da70b43 Initial load duke parents: diff changeset	319	if (cc == COMBINING_CLASS_ABOVE) {
90ce3da70b43 Initial load duke parents: diff changeset	320	return true;
90ce3da70b43 Initial load duke parents: diff changeset	321	} else if (cc == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	322	return false;
90ce3da70b43 Initial load duke parents: diff changeset	323	}
90ce3da70b43 Initial load duke parents: diff changeset	324	}
90ce3da70b43 Initial load duke parents: diff changeset	325
90ce3da70b43 Initial load duke parents: diff changeset	326	return false;
90ce3da70b43 Initial load duke parents: diff changeset	327	}
90ce3da70b43 Initial load duke parents: diff changeset	328
90ce3da70b43 Initial load duke parents: diff changeset	329	/**
90ce3da70b43 Initial load duke parents: diff changeset	330	* Implements the "Before_Dot" condition
90ce3da70b43 Initial load duke parents: diff changeset	331	*
90ce3da70b43 Initial load duke parents: diff changeset	332	* Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
90ce3da70b43 Initial load duke parents: diff changeset	333	* Any sequence of characters with a combining class that is
90ce3da70b43 Initial load duke parents: diff changeset	334	* neither 0 nor 230 may intervene between the current character
90ce3da70b43 Initial load duke parents: diff changeset	335	* and the combining dot above.
90ce3da70b43 Initial load duke parents: diff changeset	336	*
90ce3da70b43 Initial load duke parents: diff changeset	337	* Regular Expression:
90ce3da70b43 Initial load duke parents: diff changeset	338	* After C: ([{cc!=230}&{cc!=0}])*[\u0307]
90ce3da70b43 Initial load duke parents: diff changeset	339	*/
90ce3da70b43 Initial load duke parents: diff changeset	340	private static boolean isBeforeDot(String src, int index) {
90ce3da70b43 Initial load duke parents: diff changeset	341	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	342	int cc;
90ce3da70b43 Initial load duke parents: diff changeset	343	int len = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	344
90ce3da70b43 Initial load duke parents: diff changeset	345	// Look for a following COMBINING DOT ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	346	for (int i = index + Character.charCount(src.codePointAt(index));
90ce3da70b43 Initial load duke parents: diff changeset	347	i < len; i += Character.charCount(ch)) {
90ce3da70b43 Initial load duke parents: diff changeset	348
90ce3da70b43 Initial load duke parents: diff changeset	349	ch = src.codePointAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	350
90ce3da70b43 Initial load duke parents: diff changeset	351	if (ch == '\u0307') {
90ce3da70b43 Initial load duke parents: diff changeset	352	return true;
90ce3da70b43 Initial load duke parents: diff changeset	353	} else {
90ce3da70b43 Initial load duke parents: diff changeset	354	cc = Normalizer.getCombiningClass(ch);
90ce3da70b43 Initial load duke parents: diff changeset	355	if ((cc == 0) \|\| (cc == COMBINING_CLASS_ABOVE)) {
90ce3da70b43 Initial load duke parents: diff changeset	356	return false;
90ce3da70b43 Initial load duke parents: diff changeset	357	}
90ce3da70b43 Initial load duke parents: diff changeset	358	}
90ce3da70b43 Initial load duke parents: diff changeset	359	}
90ce3da70b43 Initial load duke parents: diff changeset	360
90ce3da70b43 Initial load duke parents: diff changeset	361	return false;
90ce3da70b43 Initial load duke parents: diff changeset	362	}
90ce3da70b43 Initial load duke parents: diff changeset	363
90ce3da70b43 Initial load duke parents: diff changeset	364	/**
90ce3da70b43 Initial load duke parents: diff changeset	365	* Examines whether a character is 'cased'.
90ce3da70b43 Initial load duke parents: diff changeset	366	*
90ce3da70b43 Initial load duke parents: diff changeset	367	* A character C is defined to be 'cased' if and only if at least one of
90ce3da70b43 Initial load duke parents: diff changeset	368	* following are true for C: uppercase==true, or lowercase==true, or
90ce3da70b43 Initial load duke parents: diff changeset	369	* general_category==titlecase_letter.
90ce3da70b43 Initial load duke parents: diff changeset	370	*
90ce3da70b43 Initial load duke parents: diff changeset	371	* The uppercase and lowercase property values are specified in the data
90ce3da70b43 Initial load duke parents: diff changeset	372	* file DerivedCoreProperties.txt in the Unicode Character Database.
90ce3da70b43 Initial load duke parents: diff changeset	373	*/
90ce3da70b43 Initial load duke parents: diff changeset	374	private static boolean isCased(int ch) {
90ce3da70b43 Initial load duke parents: diff changeset	375	int type = Character.getType(ch);
90ce3da70b43 Initial load duke parents: diff changeset	376	if (type == Character.LOWERCASE_LETTER \|\|
90ce3da70b43 Initial load duke parents: diff changeset	377	type == Character.UPPERCASE_LETTER \|\|
90ce3da70b43 Initial load duke parents: diff changeset	378	type == Character.TITLECASE_LETTER) {
90ce3da70b43 Initial load duke parents: diff changeset	379	return true;
90ce3da70b43 Initial load duke parents: diff changeset	380	} else {
90ce3da70b43 Initial load duke parents: diff changeset	381	// Check for Other_Lowercase and Other_Uppercase
90ce3da70b43 Initial load duke parents: diff changeset	382	//
90ce3da70b43 Initial load duke parents: diff changeset	383	if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
90ce3da70b43 Initial load duke parents: diff changeset	384	// MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
90ce3da70b43 Initial load duke parents: diff changeset	385	return true;
90ce3da70b43 Initial load duke parents: diff changeset	386	} else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
90ce3da70b43 Initial load duke parents: diff changeset	387	// MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
90ce3da70b43 Initial load duke parents: diff changeset	388	return true;
90ce3da70b43 Initial load duke parents: diff changeset	389	} else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
90ce3da70b43 Initial load duke parents: diff changeset	390	// MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
90ce3da70b43 Initial load duke parents: diff changeset	391	return true;
90ce3da70b43 Initial load duke parents: diff changeset	392	} else if (ch == 0x0345) {
90ce3da70b43 Initial load duke parents: diff changeset	393	// COMBINING GREEK YPOGEGRAMMENI
90ce3da70b43 Initial load duke parents: diff changeset	394	return true;
90ce3da70b43 Initial load duke parents: diff changeset	395	} else if (ch == 0x037A) {
90ce3da70b43 Initial load duke parents: diff changeset	396	// GREEK YPOGEGRAMMENI
90ce3da70b43 Initial load duke parents: diff changeset	397	return true;
90ce3da70b43 Initial load duke parents: diff changeset	398	} else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
90ce3da70b43 Initial load duke parents: diff changeset	399	// MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
90ce3da70b43 Initial load duke parents: diff changeset	400	return true;
90ce3da70b43 Initial load duke parents: diff changeset	401	} else if ((ch >= 0x2160) && (ch <= 0x217F)) {
90ce3da70b43 Initial load duke parents: diff changeset	402	// ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
90ce3da70b43 Initial load duke parents: diff changeset	403	// SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
90ce3da70b43 Initial load duke parents: diff changeset	404	return true;
90ce3da70b43 Initial load duke parents: diff changeset	405	} else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
90ce3da70b43 Initial load duke parents: diff changeset	406	// CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
90ce3da70b43 Initial load duke parents: diff changeset	407	// CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
90ce3da70b43 Initial load duke parents: diff changeset	408	return true;
90ce3da70b43 Initial load duke parents: diff changeset	409	} else {
90ce3da70b43 Initial load duke parents: diff changeset	410	return false;
90ce3da70b43 Initial load duke parents: diff changeset	411	}
90ce3da70b43 Initial load duke parents: diff changeset	412	}
90ce3da70b43 Initial load duke parents: diff changeset	413	}
90ce3da70b43 Initial load duke parents: diff changeset	414
90ce3da70b43 Initial load duke parents: diff changeset	415	private static boolean isSoftDotted(int ch) {
90ce3da70b43 Initial load duke parents: diff changeset	416	switch (ch) {
90ce3da70b43 Initial load duke parents: diff changeset	417	case 0x0069: // Soft_Dotted # L& LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	418	case 0x006A: // Soft_Dotted # L& LATIN SMALL LETTER J
90ce3da70b43 Initial load duke parents: diff changeset	419	case 0x012F: // Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK
90ce3da70b43 Initial load duke parents: diff changeset	420	case 0x0268: // Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE
90ce3da70b43 Initial load duke parents: diff changeset	421	case 0x0456: // Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
90ce3da70b43 Initial load duke parents: diff changeset	422	case 0x0458: // Soft_Dotted # L& CYRILLIC SMALL LETTER JE
90ce3da70b43 Initial load duke parents: diff changeset	423	case 0x1D62: // Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	424	case 0x1E2D: // Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
90ce3da70b43 Initial load duke parents: diff changeset	425	case 0x1ECB: // Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
90ce3da70b43 Initial load duke parents: diff changeset	426	case 0x2071: // Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
90ce3da70b43 Initial load duke parents: diff changeset	427	return true;
90ce3da70b43 Initial load duke parents: diff changeset	428	default:
90ce3da70b43 Initial load duke parents: diff changeset	429	return false;
90ce3da70b43 Initial load duke parents: diff changeset	430	}
90ce3da70b43 Initial load duke parents: diff changeset	431	}
90ce3da70b43 Initial load duke parents: diff changeset	432
90ce3da70b43 Initial load duke parents: diff changeset	433	/**
90ce3da70b43 Initial load duke parents: diff changeset	434	* An internal class that represents an entry in the Special Casing Properties.
90ce3da70b43 Initial load duke parents: diff changeset	435	*/
90ce3da70b43 Initial load duke parents: diff changeset	436	static class Entry {
90ce3da70b43 Initial load duke parents: diff changeset	437	int ch;
90ce3da70b43 Initial load duke parents: diff changeset	438	char [] lower;
90ce3da70b43 Initial load duke parents: diff changeset	439	char [] upper;
90ce3da70b43 Initial load duke parents: diff changeset	440	String lang;
90ce3da70b43 Initial load duke parents: diff changeset	441	int condition;
90ce3da70b43 Initial load duke parents: diff changeset	442
90ce3da70b43 Initial load duke parents: diff changeset	443	Entry(int ch, char[] lower, char[] upper, String lang, int condition) {
90ce3da70b43 Initial load duke parents: diff changeset	444	this.ch = ch;
90ce3da70b43 Initial load duke parents: diff changeset	445	this.lower = lower;
90ce3da70b43 Initial load duke parents: diff changeset	446	this.upper = upper;
90ce3da70b43 Initial load duke parents: diff changeset	447	this.lang = lang;
90ce3da70b43 Initial load duke parents: diff changeset	448	this.condition = condition;
90ce3da70b43 Initial load duke parents: diff changeset	449	}
90ce3da70b43 Initial load duke parents: diff changeset	450
90ce3da70b43 Initial load duke parents: diff changeset	451	int getCodePoint() {
90ce3da70b43 Initial load duke parents: diff changeset	452	return ch;
90ce3da70b43 Initial load duke parents: diff changeset	453	}
90ce3da70b43 Initial load duke parents: diff changeset	454
90ce3da70b43 Initial load duke parents: diff changeset	455	char[] getLowerCase() {
90ce3da70b43 Initial load duke parents: diff changeset	456	return lower;
90ce3da70b43 Initial load duke parents: diff changeset	457	}
90ce3da70b43 Initial load duke parents: diff changeset	458
90ce3da70b43 Initial load duke parents: diff changeset	459	char[] getUpperCase() {
90ce3da70b43 Initial load duke parents: diff changeset	460	return upper;
90ce3da70b43 Initial load duke parents: diff changeset	461	}
90ce3da70b43 Initial load duke parents: diff changeset	462
90ce3da70b43 Initial load duke parents: diff changeset	463	String getLanguage() {
90ce3da70b43 Initial load duke parents: diff changeset	464	return lang;
90ce3da70b43 Initial load duke parents: diff changeset	465	}
90ce3da70b43 Initial load duke parents: diff changeset	466
90ce3da70b43 Initial load duke parents: diff changeset	467	int getCondition() {
90ce3da70b43 Initial load duke parents: diff changeset	468	return condition;
90ce3da70b43 Initial load duke parents: diff changeset	469	}
90ce3da70b43 Initial load duke parents: diff changeset	470	}
90ce3da70b43 Initial load duke parents: diff changeset	471	}

author	darcy
	Wed, 15 Aug 2018 10:44:56 -0700
changeset 51413	43e41800d579
parent 47216	71c04702a3d5
child 58288	48e480e56aad
permissions	-rw-r--r--