jdk-sandbox: jdk/make/src/classes/build/tools/generatebreakiteratordata/CharSet.java@fcdebb803c62 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
23010 6dadb192ad81 8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013 lana parents: 21805 diff changeset	2	* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
2 90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	7	* published by the Free Software Foundation. Oracle designates this
2 90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	9	* by Oracle in the LICENSE file that accompanied this code.
2 90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	22	* or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	23	* questions.
2 90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25
90ce3da70b43 Initial load duke parents: diff changeset	26	/*
90ce3da70b43 Initial load duke parents: diff changeset	27	* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
90ce3da70b43 Initial load duke parents: diff changeset	28	* (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
90ce3da70b43 Initial load duke parents: diff changeset	29	*
90ce3da70b43 Initial load duke parents: diff changeset	30	* The original version of this source code and documentation
90ce3da70b43 Initial load duke parents: diff changeset	31	* is copyrighted and owned by Taligent, Inc., a wholly-owned
90ce3da70b43 Initial load duke parents: diff changeset	32	* subsidiary of IBM. These materials are provided under terms
90ce3da70b43 Initial load duke parents: diff changeset	33	* of a License Agreement between Taligent and Sun. This technology
90ce3da70b43 Initial load duke parents: diff changeset	34	* is protected by multiple US and International patents.
90ce3da70b43 Initial load duke parents: diff changeset	35	*
90ce3da70b43 Initial load duke parents: diff changeset	36	* This notice and attribution to Taligent may not be removed.
90ce3da70b43 Initial load duke parents: diff changeset	37	* Taligent is a registered trademark of Taligent, Inc.
90ce3da70b43 Initial load duke parents: diff changeset	38	*/
90ce3da70b43 Initial load duke parents: diff changeset	39
90ce3da70b43 Initial load duke parents: diff changeset	40	package build.tools.generatebreakiteratordata;
90ce3da70b43 Initial load duke parents: diff changeset	41
17950 b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	42	import java.util.Arrays;
2 90ce3da70b43 Initial load duke parents: diff changeset	43	import java.util.Hashtable;
90ce3da70b43 Initial load duke parents: diff changeset	44
90ce3da70b43 Initial load duke parents: diff changeset	45	/**
90ce3da70b43 Initial load duke parents: diff changeset	46	* An object representing a set of characters. (This is a "set" in the
90ce3da70b43 Initial load duke parents: diff changeset	47	* mathematical sense: an unduplicated list of characters on which set
90ce3da70b43 Initial load duke parents: diff changeset	48	* operations such as union and intersection can be performed.) The
90ce3da70b43 Initial load duke parents: diff changeset	49	* set information is stored in compressed, optimized form: The object
90ce3da70b43 Initial load duke parents: diff changeset	50	* contains an integer array with an even number of characters. Each
90ce3da70b43 Initial load duke parents: diff changeset	51	* pair of characters represents a range of characters contained in the set
90ce3da70b43 Initial load duke parents: diff changeset	52	* (a pair of the same character represents a single character). The
90ce3da70b43 Initial load duke parents: diff changeset	53	* characters are sorted in increasing order.
90ce3da70b43 Initial load duke parents: diff changeset	54	*/
90ce3da70b43 Initial load duke parents: diff changeset	55	class CharSet {
90ce3da70b43 Initial load duke parents: diff changeset	56	/**
90ce3da70b43 Initial load duke parents: diff changeset	57	* The structure containing the set information. The characters
90ce3da70b43 Initial load duke parents: diff changeset	58	* in this array are organized into pairs, each pair representing
90ce3da70b43 Initial load duke parents: diff changeset	59	* a range of characters contained in the set
90ce3da70b43 Initial load duke parents: diff changeset	60	*/
90ce3da70b43 Initial load duke parents: diff changeset	61	private int[] chars;
90ce3da70b43 Initial load duke parents: diff changeset	62
90ce3da70b43 Initial load duke parents: diff changeset	63	//==========================================================================
90ce3da70b43 Initial load duke parents: diff changeset	64	// parseString() and associated routines
90ce3da70b43 Initial load duke parents: diff changeset	65	//==========================================================================
90ce3da70b43 Initial load duke parents: diff changeset	66	/**
90ce3da70b43 Initial load duke parents: diff changeset	67	* A cache which is used to speed up parseString() whenever it is
90ce3da70b43 Initial load duke parents: diff changeset	68	* used to parse a description that has been parsed before
90ce3da70b43 Initial load duke parents: diff changeset	69	*/
10110 75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	70	private static Hashtable<String, CharSet> expressionCache = null;
2 90ce3da70b43 Initial load duke parents: diff changeset	71
90ce3da70b43 Initial load duke parents: diff changeset	72	/**
90ce3da70b43 Initial load duke parents: diff changeset	73	* Builds a CharSet based on a textual description. For the syntax of
90ce3da70b43 Initial load duke parents: diff changeset	74	* the description, see the documentation of RuleBasedBreakIterator.
90ce3da70b43 Initial load duke parents: diff changeset	75	* @see java.text.RuleBasedBreakIterator
90ce3da70b43 Initial load duke parents: diff changeset	76	*/
90ce3da70b43 Initial load duke parents: diff changeset	77	public static CharSet parseString(String s) {
90ce3da70b43 Initial load duke parents: diff changeset	78	CharSet result = null;
90ce3da70b43 Initial load duke parents: diff changeset	79
90ce3da70b43 Initial load duke parents: diff changeset	80	// if "s" is in the expression cache, pull the result out
90ce3da70b43 Initial load duke parents: diff changeset	81	// of the expresison cache
90ce3da70b43 Initial load duke parents: diff changeset	82	if (expressionCache != null) {
10110 75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	83	result = expressionCache.get(s);
2 90ce3da70b43 Initial load duke parents: diff changeset	84	}
90ce3da70b43 Initial load duke parents: diff changeset	85
90ce3da70b43 Initial load duke parents: diff changeset	86	// otherwise, use doParseString() to actually parse the string,
90ce3da70b43 Initial load duke parents: diff changeset	87	// and then add a corresponding entry to the expression cache
90ce3da70b43 Initial load duke parents: diff changeset	88	if (result == null) {
90ce3da70b43 Initial load duke parents: diff changeset	89	result = doParseString(s);
90ce3da70b43 Initial load duke parents: diff changeset	90	if (expressionCache == null) {
10110 75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	91	expressionCache = new Hashtable<>();
2 90ce3da70b43 Initial load duke parents: diff changeset	92	}
90ce3da70b43 Initial load duke parents: diff changeset	93	expressionCache.put(s, result);
90ce3da70b43 Initial load duke parents: diff changeset	94	}
90ce3da70b43 Initial load duke parents: diff changeset	95	result = (CharSet)(result.clone());
90ce3da70b43 Initial load duke parents: diff changeset	96	return result;
90ce3da70b43 Initial load duke parents: diff changeset	97	}
90ce3da70b43 Initial load duke parents: diff changeset	98
90ce3da70b43 Initial load duke parents: diff changeset	99	/**
90ce3da70b43 Initial load duke parents: diff changeset	100	* This function is used by parseString() to actually parse the string
90ce3da70b43 Initial load duke parents: diff changeset	101	*/
90ce3da70b43 Initial load duke parents: diff changeset	102	private static CharSet doParseString(String s) {
90ce3da70b43 Initial load duke parents: diff changeset	103	CharSet result = new CharSet();
90ce3da70b43 Initial load duke parents: diff changeset	104	int p = 0;
90ce3da70b43 Initial load duke parents: diff changeset	105
90ce3da70b43 Initial load duke parents: diff changeset	106	boolean haveDash = false;
90ce3da70b43 Initial load duke parents: diff changeset	107	boolean haveTilde = false;
90ce3da70b43 Initial load duke parents: diff changeset	108	boolean wIsReal = false;
90ce3da70b43 Initial load duke parents: diff changeset	109	int w = 0x0000;
90ce3da70b43 Initial load duke parents: diff changeset	110
90ce3da70b43 Initial load duke parents: diff changeset	111	// for each character in the description...
90ce3da70b43 Initial load duke parents: diff changeset	112	while (p < s.length()) {
90ce3da70b43 Initial load duke parents: diff changeset	113	int c = s.codePointAt(p);
90ce3da70b43 Initial load duke parents: diff changeset	114
90ce3da70b43 Initial load duke parents: diff changeset	115	// if it's an opening bracket...
90ce3da70b43 Initial load duke parents: diff changeset	116	if (c == '[') {
90ce3da70b43 Initial load duke parents: diff changeset	117	// flush the single-character cache
90ce3da70b43 Initial load duke parents: diff changeset	118	if (wIsReal) {
90ce3da70b43 Initial load duke parents: diff changeset	119	result.internalUnion(new CharSet(w));
90ce3da70b43 Initial load duke parents: diff changeset	120	}
90ce3da70b43 Initial load duke parents: diff changeset	121
90ce3da70b43 Initial load duke parents: diff changeset	122	// locate the matching closing bracket
90ce3da70b43 Initial load duke parents: diff changeset	123	int bracketLevel = 1;
90ce3da70b43 Initial load duke parents: diff changeset	124	int q = p + 1;
90ce3da70b43 Initial load duke parents: diff changeset	125	while (bracketLevel != 0) {
90ce3da70b43 Initial load duke parents: diff changeset	126	// if no matching bracket by end of string then...
90ce3da70b43 Initial load duke parents: diff changeset	127	if (q >= s.length()) {
90ce3da70b43 Initial load duke parents: diff changeset	128	throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
90ce3da70b43 Initial load duke parents: diff changeset	129	}
90ce3da70b43 Initial load duke parents: diff changeset	130	int ch = s.codePointAt(q);
90ce3da70b43 Initial load duke parents: diff changeset	131	switch (ch) {
90ce3da70b43 Initial load duke parents: diff changeset	132	case '\\': // need to step over next character
90ce3da70b43 Initial load duke parents: diff changeset	133	ch = s.codePointAt(++q);
90ce3da70b43 Initial load duke parents: diff changeset	134	break;
90ce3da70b43 Initial load duke parents: diff changeset	135	case '[':
90ce3da70b43 Initial load duke parents: diff changeset	136	++bracketLevel;
90ce3da70b43 Initial load duke parents: diff changeset	137	break;
90ce3da70b43 Initial load duke parents: diff changeset	138	case ']':
90ce3da70b43 Initial load duke parents: diff changeset	139	--bracketLevel;
90ce3da70b43 Initial load duke parents: diff changeset	140	break;
90ce3da70b43 Initial load duke parents: diff changeset	141	}
90ce3da70b43 Initial load duke parents: diff changeset	142	q += Character.charCount(ch);
90ce3da70b43 Initial load duke parents: diff changeset	143	}
90ce3da70b43 Initial load duke parents: diff changeset	144	--q;
90ce3da70b43 Initial load duke parents: diff changeset	145
90ce3da70b43 Initial load duke parents: diff changeset	146	// call parseString() recursively to parse the text inside
90ce3da70b43 Initial load duke parents: diff changeset	147	// the brackets, then either add or subtract the result from
90ce3da70b43 Initial load duke parents: diff changeset	148	// our running result depending on whether or not the []
90ce3da70b43 Initial load duke parents: diff changeset	149	// expresison was preceded by a ^
90ce3da70b43 Initial load duke parents: diff changeset	150	if (!haveTilde) {
90ce3da70b43 Initial load duke parents: diff changeset	151	result.internalUnion(CharSet.parseString(s.substring(p + 1, q)));
90ce3da70b43 Initial load duke parents: diff changeset	152	}
90ce3da70b43 Initial load duke parents: diff changeset	153	else {
90ce3da70b43 Initial load duke parents: diff changeset	154	result.internalDifference(CharSet.parseString(s.substring(p + 1, q)));
90ce3da70b43 Initial load duke parents: diff changeset	155	}
90ce3da70b43 Initial load duke parents: diff changeset	156	haveTilde = false;
90ce3da70b43 Initial load duke parents: diff changeset	157	haveDash = false;
90ce3da70b43 Initial load duke parents: diff changeset	158	wIsReal = false;
90ce3da70b43 Initial load duke parents: diff changeset	159	p = q + 1;
90ce3da70b43 Initial load duke parents: diff changeset	160	}
90ce3da70b43 Initial load duke parents: diff changeset	161
90ce3da70b43 Initial load duke parents: diff changeset	162	// if the character is a colon...
90ce3da70b43 Initial load duke parents: diff changeset	163	else if (c == ':') {
90ce3da70b43 Initial load duke parents: diff changeset	164	// flush the single-character cache
90ce3da70b43 Initial load duke parents: diff changeset	165	if (wIsReal) {
90ce3da70b43 Initial load duke parents: diff changeset	166	result.internalUnion(new CharSet(w));
90ce3da70b43 Initial load duke parents: diff changeset	167	}
90ce3da70b43 Initial load duke parents: diff changeset	168
90ce3da70b43 Initial load duke parents: diff changeset	169	// locate the matching colon (and throw an error if there
90ce3da70b43 Initial load duke parents: diff changeset	170	// isn't one)
90ce3da70b43 Initial load duke parents: diff changeset	171	int q = s.indexOf(':', p + 1);
90ce3da70b43 Initial load duke parents: diff changeset	172	if (q == -1) {
90ce3da70b43 Initial load duke parents: diff changeset	173	throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
90ce3da70b43 Initial load duke parents: diff changeset	174	}
90ce3da70b43 Initial load duke parents: diff changeset	175
90ce3da70b43 Initial load duke parents: diff changeset	176	// use charSetForCategory() to parse the text in the colons,
90ce3da70b43 Initial load duke parents: diff changeset	177	// and either add or substract the result from our running
90ce3da70b43 Initial load duke parents: diff changeset	178	// result depending on whether the :: expression was
90ce3da70b43 Initial load duke parents: diff changeset	179	// preceded by a ^
90ce3da70b43 Initial load duke parents: diff changeset	180	if (!haveTilde) {
90ce3da70b43 Initial load duke parents: diff changeset	181	result.internalUnion(charSetForCategory(s.substring(p + 1, q)));
90ce3da70b43 Initial load duke parents: diff changeset	182	}
90ce3da70b43 Initial load duke parents: diff changeset	183	else {
90ce3da70b43 Initial load duke parents: diff changeset	184	result.internalDifference(charSetForCategory(s.substring(p + 1, q)));
90ce3da70b43 Initial load duke parents: diff changeset	185	}
90ce3da70b43 Initial load duke parents: diff changeset	186
90ce3da70b43 Initial load duke parents: diff changeset	187	// reset everything and advance to the next character
90ce3da70b43 Initial load duke parents: diff changeset	188	haveTilde = false;
90ce3da70b43 Initial load duke parents: diff changeset	189	haveDash = false;
90ce3da70b43 Initial load duke parents: diff changeset	190	wIsReal = false;
90ce3da70b43 Initial load duke parents: diff changeset	191	p = q + 1;
90ce3da70b43 Initial load duke parents: diff changeset	192	}
90ce3da70b43 Initial load duke parents: diff changeset	193
90ce3da70b43 Initial load duke parents: diff changeset	194	// if the character is a dash, set an appropriate flag
90ce3da70b43 Initial load duke parents: diff changeset	195	else if (c == '-') {
90ce3da70b43 Initial load duke parents: diff changeset	196	if (wIsReal) {
90ce3da70b43 Initial load duke parents: diff changeset	197	haveDash = true;
90ce3da70b43 Initial load duke parents: diff changeset	198	}
90ce3da70b43 Initial load duke parents: diff changeset	199	++p;
90ce3da70b43 Initial load duke parents: diff changeset	200	}
90ce3da70b43 Initial load duke parents: diff changeset	201
90ce3da70b43 Initial load duke parents: diff changeset	202	// if the character is a caret, flush the single-character
90ce3da70b43 Initial load duke parents: diff changeset	203	// cache and set an appropriate flag. If the set is empty
90ce3da70b43 Initial load duke parents: diff changeset	204	// (i.e., if the expression begins with ^), invert the set
90ce3da70b43 Initial load duke parents: diff changeset	205	// (i.e., set it to include everything). The idea here is
90ce3da70b43 Initial load duke parents: diff changeset	206	// that a set that includes nothing but ^ expressions
90ce3da70b43 Initial load duke parents: diff changeset	207	// means "everything but these things".
90ce3da70b43 Initial load duke parents: diff changeset	208	else if (c == '^') {
90ce3da70b43 Initial load duke parents: diff changeset	209	if (wIsReal) {
90ce3da70b43 Initial load duke parents: diff changeset	210	result.internalUnion(new CharSet(w));
90ce3da70b43 Initial load duke parents: diff changeset	211	wIsReal = false;
90ce3da70b43 Initial load duke parents: diff changeset	212	}
90ce3da70b43 Initial load duke parents: diff changeset	213	haveTilde = true;
90ce3da70b43 Initial load duke parents: diff changeset	214	++p;
90ce3da70b43 Initial load duke parents: diff changeset	215	if (result.empty()) {
90ce3da70b43 Initial load duke parents: diff changeset	216	result.internalComplement();
90ce3da70b43 Initial load duke parents: diff changeset	217	}
90ce3da70b43 Initial load duke parents: diff changeset	218	}
90ce3da70b43 Initial load duke parents: diff changeset	219
90ce3da70b43 Initial load duke parents: diff changeset	220	// throw an exception on an illegal character
90ce3da70b43 Initial load duke parents: diff changeset	221	else if (c >= ' ' && c < '\u007f' && !Character.isLetter((char)c)
90ce3da70b43 Initial load duke parents: diff changeset	222	&& !Character.isDigit((char)c) && c != '\\') {
90ce3da70b43 Initial load duke parents: diff changeset	223	throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
90ce3da70b43 Initial load duke parents: diff changeset	224	}
90ce3da70b43 Initial load duke parents: diff changeset	225
90ce3da70b43 Initial load duke parents: diff changeset	226	// otherwise, we end up here...
90ce3da70b43 Initial load duke parents: diff changeset	227	else {
90ce3da70b43 Initial load duke parents: diff changeset	228	// on a backslash, advance to the next character
90ce3da70b43 Initial load duke parents: diff changeset	229	if (c == '\\') {
90ce3da70b43 Initial load duke parents: diff changeset	230	++p;
90ce3da70b43 Initial load duke parents: diff changeset	231	}
90ce3da70b43 Initial load duke parents: diff changeset	232
90ce3da70b43 Initial load duke parents: diff changeset	233	// if the preceding character was a dash, this character
90ce3da70b43 Initial load duke parents: diff changeset	234	// defines the end of a range. Add or subtract that range
90ce3da70b43 Initial load duke parents: diff changeset	235	// from the running result depending on whether or not it
90ce3da70b43 Initial load duke parents: diff changeset	236	// was preceded by a ^
90ce3da70b43 Initial load duke parents: diff changeset	237	if (haveDash) {
90ce3da70b43 Initial load duke parents: diff changeset	238	if (s.codePointAt(p) < w) {
90ce3da70b43 Initial load duke parents: diff changeset	239	throw new IllegalArgumentException("U+" +
90ce3da70b43 Initial load duke parents: diff changeset	240	Integer.toHexString(s.codePointAt(p))
90ce3da70b43 Initial load duke parents: diff changeset	241	+ " is less than U+" + Integer.toHexString(w) + ". Dash expressions "
90ce3da70b43 Initial load duke parents: diff changeset	242	+ "can't have their endpoints in reverse order.");
90ce3da70b43 Initial load duke parents: diff changeset	243	}
90ce3da70b43 Initial load duke parents: diff changeset	244
90ce3da70b43 Initial load duke parents: diff changeset	245	int ch = s.codePointAt(p);
90ce3da70b43 Initial load duke parents: diff changeset	246	if (!haveTilde) {
90ce3da70b43 Initial load duke parents: diff changeset	247	result.internalUnion(new CharSet(w, ch));
90ce3da70b43 Initial load duke parents: diff changeset	248	}
90ce3da70b43 Initial load duke parents: diff changeset	249	else {
90ce3da70b43 Initial load duke parents: diff changeset	250	result.internalDifference(new CharSet(w, ch));
90ce3da70b43 Initial load duke parents: diff changeset	251	}
90ce3da70b43 Initial load duke parents: diff changeset	252	p += Character.charCount(ch);
90ce3da70b43 Initial load duke parents: diff changeset	253	haveDash = false;
90ce3da70b43 Initial load duke parents: diff changeset	254	haveTilde = false;
90ce3da70b43 Initial load duke parents: diff changeset	255	wIsReal = false;
90ce3da70b43 Initial load duke parents: diff changeset	256	}
90ce3da70b43 Initial load duke parents: diff changeset	257
90ce3da70b43 Initial load duke parents: diff changeset	258	// if the preceding character was a caret, remove this character
90ce3da70b43 Initial load duke parents: diff changeset	259	// from the running result
90ce3da70b43 Initial load duke parents: diff changeset	260	else if (haveTilde) {
90ce3da70b43 Initial load duke parents: diff changeset	261	w = s.codePointAt(p);
90ce3da70b43 Initial load duke parents: diff changeset	262	result.internalDifference(new CharSet(w));
90ce3da70b43 Initial load duke parents: diff changeset	263	p += Character.charCount(w);
90ce3da70b43 Initial load duke parents: diff changeset	264	haveTilde = false;
90ce3da70b43 Initial load duke parents: diff changeset	265	wIsReal = false;
90ce3da70b43 Initial load duke parents: diff changeset	266	}
90ce3da70b43 Initial load duke parents: diff changeset	267
90ce3da70b43 Initial load duke parents: diff changeset	268	// otherwise, flush the single-character cache and then
90ce3da70b43 Initial load duke parents: diff changeset	269	// put this character into the cache
90ce3da70b43 Initial load duke parents: diff changeset	270	else if (wIsReal) {
90ce3da70b43 Initial load duke parents: diff changeset	271	result.internalUnion(new CharSet(w));
90ce3da70b43 Initial load duke parents: diff changeset	272	w = s.codePointAt(p);
90ce3da70b43 Initial load duke parents: diff changeset	273	p += Character.charCount(w);
90ce3da70b43 Initial load duke parents: diff changeset	274	wIsReal = true;
90ce3da70b43 Initial load duke parents: diff changeset	275	} else {
90ce3da70b43 Initial load duke parents: diff changeset	276	w = s.codePointAt(p);
90ce3da70b43 Initial load duke parents: diff changeset	277	p += Character.charCount(w);
90ce3da70b43 Initial load duke parents: diff changeset	278	wIsReal = true;
90ce3da70b43 Initial load duke parents: diff changeset	279	}
90ce3da70b43 Initial load duke parents: diff changeset	280	}
90ce3da70b43 Initial load duke parents: diff changeset	281	}
90ce3da70b43 Initial load duke parents: diff changeset	282
90ce3da70b43 Initial load duke parents: diff changeset	283	// finally, flush the single-character cache one last time
90ce3da70b43 Initial load duke parents: diff changeset	284	if (wIsReal) {
90ce3da70b43 Initial load duke parents: diff changeset	285	result.internalUnion(new CharSet(w));
90ce3da70b43 Initial load duke parents: diff changeset	286	}
90ce3da70b43 Initial load duke parents: diff changeset	287
90ce3da70b43 Initial load duke parents: diff changeset	288	return result;
90ce3da70b43 Initial load duke parents: diff changeset	289	}
90ce3da70b43 Initial load duke parents: diff changeset	290
90ce3da70b43 Initial load duke parents: diff changeset	291	/**
90ce3da70b43 Initial load duke parents: diff changeset	292	* Creates a CharSet containing all the characters in a particular
90ce3da70b43 Initial load duke parents: diff changeset	293	* Unicode category. The text is either a two-character code from
90ce3da70b43 Initial load duke parents: diff changeset	294	* the Unicode database or a single character that begins one or more
90ce3da70b43 Initial load duke parents: diff changeset	295	* two-character codes.
90ce3da70b43 Initial load duke parents: diff changeset	296	*/
90ce3da70b43 Initial load duke parents: diff changeset	297	private static CharSet charSetForCategory(String category) {
90ce3da70b43 Initial load duke parents: diff changeset	298	// throw an exception if we have anything other than one or two
90ce3da70b43 Initial load duke parents: diff changeset	299	// characters inside the colons
90ce3da70b43 Initial load duke parents: diff changeset	300	if (category.length() == 0 \|\| category.length() >= 3) {
90ce3da70b43 Initial load duke parents: diff changeset	301	throw new IllegalArgumentException("Invalid character category: " + category);
90ce3da70b43 Initial load duke parents: diff changeset	302	}
90ce3da70b43 Initial load duke parents: diff changeset	303
90ce3da70b43 Initial load duke parents: diff changeset	304	// if we have two characters, search the category map for that code
90ce3da70b43 Initial load duke parents: diff changeset	305	// and either construct and return a CharSet from the data in the
90ce3da70b43 Initial load duke parents: diff changeset	306	// category map or throw an exception
90ce3da70b43 Initial load duke parents: diff changeset	307	if (category.length() == 2) {
90ce3da70b43 Initial load duke parents: diff changeset	308	for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	309	if (CharacterCategory.categoryNames[i].equals(category)) {
90ce3da70b43 Initial load duke parents: diff changeset	310	return new CharSet(CharacterCategory.getCategoryMap(i));
90ce3da70b43 Initial load duke parents: diff changeset	311	}
90ce3da70b43 Initial load duke parents: diff changeset	312	}
90ce3da70b43 Initial load duke parents: diff changeset	313	throw new IllegalArgumentException("Invalid character category: " + category);
90ce3da70b43 Initial load duke parents: diff changeset	314	}
90ce3da70b43 Initial load duke parents: diff changeset	315
90ce3da70b43 Initial load duke parents: diff changeset	316	// if we have one character, search the category map for codes beginning
90ce3da70b43 Initial load duke parents: diff changeset	317	// with that letter, and union together all of the matching sets that
90ce3da70b43 Initial load duke parents: diff changeset	318	// we find (or throw an exception if there are no matches)
90ce3da70b43 Initial load duke parents: diff changeset	319	else if (category.length() == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	320	CharSet result = new CharSet();
90ce3da70b43 Initial load duke parents: diff changeset	321	for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
90ce3da70b43 Initial load duke parents: diff changeset	322	if (CharacterCategory.categoryNames[i].startsWith(category)) {
90ce3da70b43 Initial load duke parents: diff changeset	323	result = result.union(new CharSet(CharacterCategory.getCategoryMap(i)));
90ce3da70b43 Initial load duke parents: diff changeset	324	}
90ce3da70b43 Initial load duke parents: diff changeset	325	}
90ce3da70b43 Initial load duke parents: diff changeset	326	if (result.empty()) {
90ce3da70b43 Initial load duke parents: diff changeset	327	throw new IllegalArgumentException("Invalid character category: " + category);
90ce3da70b43 Initial load duke parents: diff changeset	328	}
90ce3da70b43 Initial load duke parents: diff changeset	329	else {
90ce3da70b43 Initial load duke parents: diff changeset	330	return result;
90ce3da70b43 Initial load duke parents: diff changeset	331	}
90ce3da70b43 Initial load duke parents: diff changeset	332	}
90ce3da70b43 Initial load duke parents: diff changeset	333	return new CharSet(); // should never get here, but to make the compiler happy...
90ce3da70b43 Initial load duke parents: diff changeset	334	}
90ce3da70b43 Initial load duke parents: diff changeset	335
90ce3da70b43 Initial load duke parents: diff changeset	336	/**
90ce3da70b43 Initial load duke parents: diff changeset	337	* Returns a copy of CharSet's expression cache and sets CharSet's
90ce3da70b43 Initial load duke parents: diff changeset	338	* expression cache to empty.
90ce3da70b43 Initial load duke parents: diff changeset	339	*/
10110 75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	340	public static Hashtable<String, CharSet> releaseExpressionCache() {
75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	341	Hashtable<String, CharSet> result = expressionCache;
2 90ce3da70b43 Initial load duke parents: diff changeset	342	expressionCache = null;
90ce3da70b43 Initial load duke parents: diff changeset	343	return result;
90ce3da70b43 Initial load duke parents: diff changeset	344	}
90ce3da70b43 Initial load duke parents: diff changeset	345
90ce3da70b43 Initial load duke parents: diff changeset	346	//==========================================================================
90ce3da70b43 Initial load duke parents: diff changeset	347	// CharSet manipulation
90ce3da70b43 Initial load duke parents: diff changeset	348	//==========================================================================
90ce3da70b43 Initial load duke parents: diff changeset	349	/**
90ce3da70b43 Initial load duke parents: diff changeset	350	* Creates an empty CharSet.
90ce3da70b43 Initial load duke parents: diff changeset	351	*/
90ce3da70b43 Initial load duke parents: diff changeset	352	public CharSet() {
90ce3da70b43 Initial load duke parents: diff changeset	353	chars = new int[0];
90ce3da70b43 Initial load duke parents: diff changeset	354	}
90ce3da70b43 Initial load duke parents: diff changeset	355
90ce3da70b43 Initial load duke parents: diff changeset	356	/**
90ce3da70b43 Initial load duke parents: diff changeset	357	* Creates a CharSet containing a single character.
90ce3da70b43 Initial load duke parents: diff changeset	358	* @param c The character to put into the CharSet
90ce3da70b43 Initial load duke parents: diff changeset	359	*/
90ce3da70b43 Initial load duke parents: diff changeset	360	public CharSet(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	361	chars = new int[2];
90ce3da70b43 Initial load duke parents: diff changeset	362	chars[0] = c;
90ce3da70b43 Initial load duke parents: diff changeset	363	chars[1] = c;
90ce3da70b43 Initial load duke parents: diff changeset	364	}
90ce3da70b43 Initial load duke parents: diff changeset	365
90ce3da70b43 Initial load duke parents: diff changeset	366	/**
90ce3da70b43 Initial load duke parents: diff changeset	367	* Creates a CharSet containing a range of characters.
90ce3da70b43 Initial load duke parents: diff changeset	368	* @param lo The lowest-numbered character to include in the range
90ce3da70b43 Initial load duke parents: diff changeset	369	* @param hi The highest-numbered character to include in the range
90ce3da70b43 Initial load duke parents: diff changeset	370	*/
90ce3da70b43 Initial load duke parents: diff changeset	371	public CharSet(int lo, int hi) {
90ce3da70b43 Initial load duke parents: diff changeset	372	chars = new int[2];
90ce3da70b43 Initial load duke parents: diff changeset	373	if (lo <= hi) {
90ce3da70b43 Initial load duke parents: diff changeset	374	chars[0] = lo;
90ce3da70b43 Initial load duke parents: diff changeset	375	chars[1] = hi;
90ce3da70b43 Initial load duke parents: diff changeset	376	}
90ce3da70b43 Initial load duke parents: diff changeset	377	else {
90ce3da70b43 Initial load duke parents: diff changeset	378	chars[0] = hi;
90ce3da70b43 Initial load duke parents: diff changeset	379	chars[1] = lo;
90ce3da70b43 Initial load duke parents: diff changeset	380	}
90ce3da70b43 Initial load duke parents: diff changeset	381	}
90ce3da70b43 Initial load duke parents: diff changeset	382
90ce3da70b43 Initial load duke parents: diff changeset	383	/**
90ce3da70b43 Initial load duke parents: diff changeset	384	* Creates a CharSet, initializing it from the internal storage
90ce3da70b43 Initial load duke parents: diff changeset	385	* of another CharSet (this function performs no error checking
90ce3da70b43 Initial load duke parents: diff changeset	386	* on "chars", so if it's malformed, undefined behavior will result)
90ce3da70b43 Initial load duke parents: diff changeset	387	*/
90ce3da70b43 Initial load duke parents: diff changeset	388	private CharSet(int[] chars) {
90ce3da70b43 Initial load duke parents: diff changeset	389	this.chars = chars;
90ce3da70b43 Initial load duke parents: diff changeset	390	}
90ce3da70b43 Initial load duke parents: diff changeset	391
90ce3da70b43 Initial load duke parents: diff changeset	392	/**
90ce3da70b43 Initial load duke parents: diff changeset	393	* Returns a CharSet representing the union of two CharSets.
90ce3da70b43 Initial load duke parents: diff changeset	394	*/
90ce3da70b43 Initial load duke parents: diff changeset	395	public CharSet union(CharSet that) {
90ce3da70b43 Initial load duke parents: diff changeset	396	return new CharSet(doUnion(that.chars));
90ce3da70b43 Initial load duke parents: diff changeset	397	}
90ce3da70b43 Initial load duke parents: diff changeset	398
90ce3da70b43 Initial load duke parents: diff changeset	399	/**
90ce3da70b43 Initial load duke parents: diff changeset	400	* Adds the characters in "that" to this CharSet
90ce3da70b43 Initial load duke parents: diff changeset	401	*/
90ce3da70b43 Initial load duke parents: diff changeset	402	private void internalUnion(CharSet that) {
90ce3da70b43 Initial load duke parents: diff changeset	403	chars = doUnion(that.chars);
90ce3da70b43 Initial load duke parents: diff changeset	404	}
90ce3da70b43 Initial load duke parents: diff changeset	405
90ce3da70b43 Initial load duke parents: diff changeset	406	/**
90ce3da70b43 Initial load duke parents: diff changeset	407	* The actual implementation of the union functions
90ce3da70b43 Initial load duke parents: diff changeset	408	*/
90ce3da70b43 Initial load duke parents: diff changeset	409	private int[] doUnion(int[] c2) {
90ce3da70b43 Initial load duke parents: diff changeset	410	int[] result = new int[chars.length+c2.length];
90ce3da70b43 Initial load duke parents: diff changeset	411
90ce3da70b43 Initial load duke parents: diff changeset	412	int i = 0;
90ce3da70b43 Initial load duke parents: diff changeset	413	int j = 0;
90ce3da70b43 Initial load duke parents: diff changeset	414	int index = 0;
90ce3da70b43 Initial load duke parents: diff changeset	415
90ce3da70b43 Initial load duke parents: diff changeset	416	// consider all the characters in both strings
90ce3da70b43 Initial load duke parents: diff changeset	417	while (i < chars.length && j < c2.length) {
90ce3da70b43 Initial load duke parents: diff changeset	418	int ub;
90ce3da70b43 Initial load duke parents: diff changeset	419
90ce3da70b43 Initial load duke parents: diff changeset	420	// the first character in the result is the lower of the
90ce3da70b43 Initial load duke parents: diff changeset	421	// starting characters of the two strings, and "ub" gets
90ce3da70b43 Initial load duke parents: diff changeset	422	// set to the upper bound of that range
90ce3da70b43 Initial load duke parents: diff changeset	423	if (chars[i] < c2[j]) {
90ce3da70b43 Initial load duke parents: diff changeset	424	result[index++] = chars[i];
90ce3da70b43 Initial load duke parents: diff changeset	425	ub = chars[++i];
90ce3da70b43 Initial load duke parents: diff changeset	426	}
90ce3da70b43 Initial load duke parents: diff changeset	427	else {
90ce3da70b43 Initial load duke parents: diff changeset	428	result[index++] = c2[j];
90ce3da70b43 Initial load duke parents: diff changeset	429	ub = c2[++j];
90ce3da70b43 Initial load duke parents: diff changeset	430	}
90ce3da70b43 Initial load duke parents: diff changeset	431
90ce3da70b43 Initial load duke parents: diff changeset	432	// for as long as one of our two pointers is pointing to a range's
90ce3da70b43 Initial load duke parents: diff changeset	433	// end point, or i is pointing to a character that is less than
90ce3da70b43 Initial load duke parents: diff changeset	434	// "ub" plus one (the "plus one" stitches touching ranges together)...
90ce3da70b43 Initial load duke parents: diff changeset	435	while (i % 2 == 1 \|\|
90ce3da70b43 Initial load duke parents: diff changeset	436	j % 2 == 1 \|\|
90ce3da70b43 Initial load duke parents: diff changeset	437	(i < chars.length && chars[i] <= ub + 1)) {
90ce3da70b43 Initial load duke parents: diff changeset	438
90ce3da70b43 Initial load duke parents: diff changeset	439	// advance i to the first character that is greater than
90ce3da70b43 Initial load duke parents: diff changeset	440	// "ub" plus one
90ce3da70b43 Initial load duke parents: diff changeset	441	while (i < chars.length && chars[i] <= ub + 1) {
90ce3da70b43 Initial load duke parents: diff changeset	442	++i;
90ce3da70b43 Initial load duke parents: diff changeset	443	}
90ce3da70b43 Initial load duke parents: diff changeset	444
90ce3da70b43 Initial load duke parents: diff changeset	445	// if i points to the endpoint of a range, update "ub"
90ce3da70b43 Initial load duke parents: diff changeset	446	// to that character, or if i points to the start of
90ce3da70b43 Initial load duke parents: diff changeset	447	// a range and the endpoint of the preceding range is
90ce3da70b43 Initial load duke parents: diff changeset	448	// greater than "ub", update "up" to _that_ character
90ce3da70b43 Initial load duke parents: diff changeset	449	if (i % 2 == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	450	ub = chars[i];
90ce3da70b43 Initial load duke parents: diff changeset	451	}
90ce3da70b43 Initial load duke parents: diff changeset	452	else if (i > 0 && chars[i - 1] > ub) {
90ce3da70b43 Initial load duke parents: diff changeset	453	ub = chars[i - 1];
90ce3da70b43 Initial load duke parents: diff changeset	454	}
90ce3da70b43 Initial load duke parents: diff changeset	455
90ce3da70b43 Initial load duke parents: diff changeset	456	// now advance j to the first character that is greater
90ce3da70b43 Initial load duke parents: diff changeset	457	// that "ub" plus one
90ce3da70b43 Initial load duke parents: diff changeset	458	while (j < c2.length && c2[j] <= ub + 1) {
90ce3da70b43 Initial load duke parents: diff changeset	459	++j;
90ce3da70b43 Initial load duke parents: diff changeset	460	}
90ce3da70b43 Initial load duke parents: diff changeset	461
90ce3da70b43 Initial load duke parents: diff changeset	462	// if j points to the endpoint of a range, update "ub"
90ce3da70b43 Initial load duke parents: diff changeset	463	// to that character, or if j points to the start of
90ce3da70b43 Initial load duke parents: diff changeset	464	// a range and the endpoint of the preceding range is
90ce3da70b43 Initial load duke parents: diff changeset	465	// greater than "ub", update "up" to _that_ character
90ce3da70b43 Initial load duke parents: diff changeset	466	if (j % 2 == 1) {
90ce3da70b43 Initial load duke parents: diff changeset	467	ub = c2[j];
90ce3da70b43 Initial load duke parents: diff changeset	468	}
90ce3da70b43 Initial load duke parents: diff changeset	469	else if (j > 0 && c2[j - 1] > ub) {
90ce3da70b43 Initial load duke parents: diff changeset	470	ub = c2[j - 1];
90ce3da70b43 Initial load duke parents: diff changeset	471	}
90ce3da70b43 Initial load duke parents: diff changeset	472	}
90ce3da70b43 Initial load duke parents: diff changeset	473	// when we finally fall out of this loop, we will have stitched
90ce3da70b43 Initial load duke parents: diff changeset	474	// together a series of ranges that overlap or touch, i and j
90ce3da70b43 Initial load duke parents: diff changeset	475	// will both point to starting points of ranges, and "ub" will
90ce3da70b43 Initial load duke parents: diff changeset	476	// be the endpoint of the range we're working on. Write "ub"
90ce3da70b43 Initial load duke parents: diff changeset	477	// to the result
90ce3da70b43 Initial load duke parents: diff changeset	478	result[index++] = ub;
90ce3da70b43 Initial load duke parents: diff changeset	479
90ce3da70b43 Initial load duke parents: diff changeset	480	// loop back around to create the next range in the result
90ce3da70b43 Initial load duke parents: diff changeset	481	}
90ce3da70b43 Initial load duke parents: diff changeset	482
90ce3da70b43 Initial load duke parents: diff changeset	483	// we fall out to here when we've exhausted all the characters in
90ce3da70b43 Initial load duke parents: diff changeset	484	// one of the operands. We can append all of the remaining characters
90ce3da70b43 Initial load duke parents: diff changeset	485	// in the other operand without doing any extra work.
90ce3da70b43 Initial load duke parents: diff changeset	486	if (i < chars.length) {
90ce3da70b43 Initial load duke parents: diff changeset	487	for (int k = i; k < chars.length; k++) {
90ce3da70b43 Initial load duke parents: diff changeset	488	result[index++] = chars[k];
90ce3da70b43 Initial load duke parents: diff changeset	489	}
90ce3da70b43 Initial load duke parents: diff changeset	490	}
90ce3da70b43 Initial load duke parents: diff changeset	491	if (j < c2.length) {
90ce3da70b43 Initial load duke parents: diff changeset	492	for (int k = j; k < c2.length; k++) {
90ce3da70b43 Initial load duke parents: diff changeset	493	result[index++] = c2[k];
90ce3da70b43 Initial load duke parents: diff changeset	494	}
90ce3da70b43 Initial load duke parents: diff changeset	495	}
90ce3da70b43 Initial load duke parents: diff changeset	496
90ce3da70b43 Initial load duke parents: diff changeset	497	if (result.length > index) {
90ce3da70b43 Initial load duke parents: diff changeset	498	int[] tmpbuf = new int[index];
90ce3da70b43 Initial load duke parents: diff changeset	499	System.arraycopy(result, 0, tmpbuf, 0, index);
90ce3da70b43 Initial load duke parents: diff changeset	500	return tmpbuf;
90ce3da70b43 Initial load duke parents: diff changeset	501	}
90ce3da70b43 Initial load duke parents: diff changeset	502
90ce3da70b43 Initial load duke parents: diff changeset	503	return result;
90ce3da70b43 Initial load duke parents: diff changeset	504	}
90ce3da70b43 Initial load duke parents: diff changeset	505
90ce3da70b43 Initial load duke parents: diff changeset	506	/**
90ce3da70b43 Initial load duke parents: diff changeset	507	* Returns the intersection of two CharSets.
90ce3da70b43 Initial load duke parents: diff changeset	508	*/
90ce3da70b43 Initial load duke parents: diff changeset	509	public CharSet intersection(CharSet that) {
90ce3da70b43 Initial load duke parents: diff changeset	510	return new CharSet(doIntersection(that.chars));
90ce3da70b43 Initial load duke parents: diff changeset	511	}
90ce3da70b43 Initial load duke parents: diff changeset	512
90ce3da70b43 Initial load duke parents: diff changeset	513	/**
90ce3da70b43 Initial load duke parents: diff changeset	514	* Removes from this CharSet any characters that aren't also in "that"
90ce3da70b43 Initial load duke parents: diff changeset	515	*/
90ce3da70b43 Initial load duke parents: diff changeset	516	private void internalIntersection(CharSet that) {
90ce3da70b43 Initial load duke parents: diff changeset	517	chars = doIntersection(that.chars);
90ce3da70b43 Initial load duke parents: diff changeset	518	}
90ce3da70b43 Initial load duke parents: diff changeset	519
90ce3da70b43 Initial load duke parents: diff changeset	520	/**
90ce3da70b43 Initial load duke parents: diff changeset	521	* The internal implementation of the two intersection functions
90ce3da70b43 Initial load duke parents: diff changeset	522	*/
90ce3da70b43 Initial load duke parents: diff changeset	523	private int[] doIntersection(int[] c2) {
90ce3da70b43 Initial load duke parents: diff changeset	524	int[] result = new int[chars.length+c2.length];
90ce3da70b43 Initial load duke parents: diff changeset	525
90ce3da70b43 Initial load duke parents: diff changeset	526	int i = 0;
90ce3da70b43 Initial load duke parents: diff changeset	527	int j = 0;
90ce3da70b43 Initial load duke parents: diff changeset	528	int oldI;
90ce3da70b43 Initial load duke parents: diff changeset	529	int oldJ;
90ce3da70b43 Initial load duke parents: diff changeset	530	int index = 0;
90ce3da70b43 Initial load duke parents: diff changeset	531
90ce3da70b43 Initial load duke parents: diff changeset	532	// iterate until we've exhausted one of the operands
90ce3da70b43 Initial load duke parents: diff changeset	533	while (i < chars.length && j < c2.length) {
90ce3da70b43 Initial load duke parents: diff changeset	534
90ce3da70b43 Initial load duke parents: diff changeset	535	// advance j until it points to a character that is larger than
90ce3da70b43 Initial load duke parents: diff changeset	536	// the one i points to. If this is the beginning of a one-
90ce3da70b43 Initial load duke parents: diff changeset	537	// character range, advance j to point to the end
90ce3da70b43 Initial load duke parents: diff changeset	538	if (i < chars.length && i % 2 == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	539	while (j < c2.length && c2[j] < chars[i]) {
90ce3da70b43 Initial load duke parents: diff changeset	540	++j;
90ce3da70b43 Initial load duke parents: diff changeset	541	}
90ce3da70b43 Initial load duke parents: diff changeset	542	if (j < c2.length && j % 2 == 0 && c2[j] == chars[i]) {
90ce3da70b43 Initial load duke parents: diff changeset	543	++j;
90ce3da70b43 Initial load duke parents: diff changeset	544	}
90ce3da70b43 Initial load duke parents: diff changeset	545	}
90ce3da70b43 Initial load duke parents: diff changeset	546
90ce3da70b43 Initial load duke parents: diff changeset	547	// if j points to the endpoint of a range, save the current
90ce3da70b43 Initial load duke parents: diff changeset	548	// value of i, then advance i until it reaches a character
90ce3da70b43 Initial load duke parents: diff changeset	549	// which is larger than the character pointed at
90ce3da70b43 Initial load duke parents: diff changeset	550	// by j. All of the characters we've advanced over (except
90ce3da70b43 Initial load duke parents: diff changeset	551	// the one currently pointed to by i) are added to the result
90ce3da70b43 Initial load duke parents: diff changeset	552	oldI = i;
90ce3da70b43 Initial load duke parents: diff changeset	553	while (j % 2 == 1 && i < chars.length && chars[i] <= c2[j]) {
90ce3da70b43 Initial load duke parents: diff changeset	554	++i;
90ce3da70b43 Initial load duke parents: diff changeset	555	}
90ce3da70b43 Initial load duke parents: diff changeset	556	for (int k = oldI; k < i; k++) {
90ce3da70b43 Initial load duke parents: diff changeset	557	result[index++] = chars[k];
90ce3da70b43 Initial load duke parents: diff changeset	558	}
90ce3da70b43 Initial load duke parents: diff changeset	559
90ce3da70b43 Initial load duke parents: diff changeset	560	// if i points to the endpoint of a range, save the current
90ce3da70b43 Initial load duke parents: diff changeset	561	// value of j, then advance j until it reaches a character
90ce3da70b43 Initial load duke parents: diff changeset	562	// which is larger than the character pointed at
90ce3da70b43 Initial load duke parents: diff changeset	563	// by i. All of the characters we've advanced over (except
90ce3da70b43 Initial load duke parents: diff changeset	564	// the one currently pointed to by i) are added to the result
90ce3da70b43 Initial load duke parents: diff changeset	565	oldJ = j;
90ce3da70b43 Initial load duke parents: diff changeset	566	while (i % 2 == 1 && j < c2.length && c2[j] <= chars[i]) {
90ce3da70b43 Initial load duke parents: diff changeset	567	++j;
90ce3da70b43 Initial load duke parents: diff changeset	568	}
90ce3da70b43 Initial load duke parents: diff changeset	569	for (int k = oldJ; k < j; k++) {
90ce3da70b43 Initial load duke parents: diff changeset	570	result[index++] = c2[k];
90ce3da70b43 Initial load duke parents: diff changeset	571	}
90ce3da70b43 Initial load duke parents: diff changeset	572
90ce3da70b43 Initial load duke parents: diff changeset	573	// advance i until it points to a character larger than j
90ce3da70b43 Initial load duke parents: diff changeset	574	// If it points at the beginning of a one-character range,
90ce3da70b43 Initial load duke parents: diff changeset	575	// advance it to the end of that range
90ce3da70b43 Initial load duke parents: diff changeset	576	if (j < c2.length && j % 2 == 0) {
90ce3da70b43 Initial load duke parents: diff changeset	577	while (i < chars.length && chars[i] < c2[j]) {
90ce3da70b43 Initial load duke parents: diff changeset	578	++i;
90ce3da70b43 Initial load duke parents: diff changeset	579	}
90ce3da70b43 Initial load duke parents: diff changeset	580	if (i < chars.length && i % 2 == 0 && c2[j] == chars[i]) {
90ce3da70b43 Initial load duke parents: diff changeset	581	++i;
90ce3da70b43 Initial load duke parents: diff changeset	582	}
90ce3da70b43 Initial load duke parents: diff changeset	583	}
90ce3da70b43 Initial load duke parents: diff changeset	584	}
90ce3da70b43 Initial load duke parents: diff changeset	585
90ce3da70b43 Initial load duke parents: diff changeset	586	if (result.length > index) {
90ce3da70b43 Initial load duke parents: diff changeset	587	int[] tmpbuf = new int[index];
90ce3da70b43 Initial load duke parents: diff changeset	588	System.arraycopy(result, 0, tmpbuf, 0, index);
90ce3da70b43 Initial load duke parents: diff changeset	589	return tmpbuf;
90ce3da70b43 Initial load duke parents: diff changeset	590	}
90ce3da70b43 Initial load duke parents: diff changeset	591
90ce3da70b43 Initial load duke parents: diff changeset	592	return result;
90ce3da70b43 Initial load duke parents: diff changeset	593	}
90ce3da70b43 Initial load duke parents: diff changeset	594
90ce3da70b43 Initial load duke parents: diff changeset	595	/**
90ce3da70b43 Initial load duke parents: diff changeset	596	* Returns a CharSet containing all the characters in "this" that
90ce3da70b43 Initial load duke parents: diff changeset	597	* aren't also in "that"
90ce3da70b43 Initial load duke parents: diff changeset	598	*/
90ce3da70b43 Initial load duke parents: diff changeset	599	public CharSet difference(CharSet that) {
90ce3da70b43 Initial load duke parents: diff changeset	600	return new CharSet(doIntersection(that.doComplement()));
90ce3da70b43 Initial load duke parents: diff changeset	601	}
90ce3da70b43 Initial load duke parents: diff changeset	602
90ce3da70b43 Initial load duke parents: diff changeset	603	/**
90ce3da70b43 Initial load duke parents: diff changeset	604	* Removes from "this" all the characters that are also in "that"
90ce3da70b43 Initial load duke parents: diff changeset	605	*/
90ce3da70b43 Initial load duke parents: diff changeset	606	private void internalDifference(CharSet that) {
90ce3da70b43 Initial load duke parents: diff changeset	607	chars = doIntersection(that.doComplement());
90ce3da70b43 Initial load duke parents: diff changeset	608	}
90ce3da70b43 Initial load duke parents: diff changeset	609
90ce3da70b43 Initial load duke parents: diff changeset	610	/**
90ce3da70b43 Initial load duke parents: diff changeset	611	* Returns a CharSet containing all the characters which are not
90ce3da70b43 Initial load duke parents: diff changeset	612	* in "this"
90ce3da70b43 Initial load duke parents: diff changeset	613	*/
90ce3da70b43 Initial load duke parents: diff changeset	614	public CharSet complement() {
90ce3da70b43 Initial load duke parents: diff changeset	615	return new CharSet(doComplement());
90ce3da70b43 Initial load duke parents: diff changeset	616	}
90ce3da70b43 Initial load duke parents: diff changeset	617
90ce3da70b43 Initial load duke parents: diff changeset	618	/**
90ce3da70b43 Initial load duke parents: diff changeset	619	* Complements "this". All the characters it contains are removed,
90ce3da70b43 Initial load duke parents: diff changeset	620	* and all the characters it doesn't contain are added.
90ce3da70b43 Initial load duke parents: diff changeset	621	*/
90ce3da70b43 Initial load duke parents: diff changeset	622	private void internalComplement() {
90ce3da70b43 Initial load duke parents: diff changeset	623	chars = doComplement();
90ce3da70b43 Initial load duke parents: diff changeset	624	}
90ce3da70b43 Initial load duke parents: diff changeset	625
90ce3da70b43 Initial load duke parents: diff changeset	626	/**
90ce3da70b43 Initial load duke parents: diff changeset	627	* The internal implementation function for the complement routines
90ce3da70b43 Initial load duke parents: diff changeset	628	*/
90ce3da70b43 Initial load duke parents: diff changeset	629	private int[] doComplement() {
90ce3da70b43 Initial load duke parents: diff changeset	630	// the complement of an empty CharSet is one containing everything
90ce3da70b43 Initial load duke parents: diff changeset	631	if (empty()) {
90ce3da70b43 Initial load duke parents: diff changeset	632	int[] result = new int[2];
90ce3da70b43 Initial load duke parents: diff changeset	633	result[0] = 0x0000;
90ce3da70b43 Initial load duke parents: diff changeset	634	result[1] = 0x10FFFF;
90ce3da70b43 Initial load duke parents: diff changeset	635	return result;
90ce3da70b43 Initial load duke parents: diff changeset	636	}
90ce3da70b43 Initial load duke parents: diff changeset	637
90ce3da70b43 Initial load duke parents: diff changeset	638	int[] result = new int[chars.length+2];
90ce3da70b43 Initial load duke parents: diff changeset	639
90ce3da70b43 Initial load duke parents: diff changeset	640	int i = 0;
90ce3da70b43 Initial load duke parents: diff changeset	641	int index = 0;
90ce3da70b43 Initial load duke parents: diff changeset	642
90ce3da70b43 Initial load duke parents: diff changeset	643	// the result begins with \u0000 unless the original CharSet does
90ce3da70b43 Initial load duke parents: diff changeset	644	if (chars[0] != 0x0000) {
90ce3da70b43 Initial load duke parents: diff changeset	645	result[index++] = 0x0000;
90ce3da70b43 Initial load duke parents: diff changeset	646	}
90ce3da70b43 Initial load duke parents: diff changeset	647
90ce3da70b43 Initial load duke parents: diff changeset	648	// walk through the characters in this CharSet. Append a pair of
90ce3da70b43 Initial load duke parents: diff changeset	649	// characters the first of which is one less than the first
90ce3da70b43 Initial load duke parents: diff changeset	650	// character we see and the second of which is one plus the second
90ce3da70b43 Initial load duke parents: diff changeset	651	// character we see (don't write the first character if it's \u0000,
90ce3da70b43 Initial load duke parents: diff changeset	652	// and don't write the second character if it's \uffff.
90ce3da70b43 Initial load duke parents: diff changeset	653	while (i < chars.length) {
90ce3da70b43 Initial load duke parents: diff changeset	654	if (chars[i] != 0x0000) {
90ce3da70b43 Initial load duke parents: diff changeset	655	result[index++] = chars[i] - 1;
90ce3da70b43 Initial load duke parents: diff changeset	656	}
90ce3da70b43 Initial load duke parents: diff changeset	657	if (chars[i + 1] != 0x10FFFF) {
90ce3da70b43 Initial load duke parents: diff changeset	658	result[index++] = chars[i + 1] + 1;
90ce3da70b43 Initial load duke parents: diff changeset	659	}
90ce3da70b43 Initial load duke parents: diff changeset	660	i += 2;
90ce3da70b43 Initial load duke parents: diff changeset	661	}
90ce3da70b43 Initial load duke parents: diff changeset	662
90ce3da70b43 Initial load duke parents: diff changeset	663	// add 0x10ffff to the end of the result, unless it was in
90ce3da70b43 Initial load duke parents: diff changeset	664	// the original set
90ce3da70b43 Initial load duke parents: diff changeset	665	if (chars[i-1] != 0x10FFFF) {
90ce3da70b43 Initial load duke parents: diff changeset	666	result[index++] = 0x10FFFF;
90ce3da70b43 Initial load duke parents: diff changeset	667	}
90ce3da70b43 Initial load duke parents: diff changeset	668
90ce3da70b43 Initial load duke parents: diff changeset	669	if (result.length > index) {
90ce3da70b43 Initial load duke parents: diff changeset	670	int[] tmpbuf = new int[index];
90ce3da70b43 Initial load duke parents: diff changeset	671	System.arraycopy(result, 0, tmpbuf, 0, index);
90ce3da70b43 Initial load duke parents: diff changeset	672	return tmpbuf;
90ce3da70b43 Initial load duke parents: diff changeset	673	}
90ce3da70b43 Initial load duke parents: diff changeset	674
90ce3da70b43 Initial load duke parents: diff changeset	675	return result;
90ce3da70b43 Initial load duke parents: diff changeset	676	}
90ce3da70b43 Initial load duke parents: diff changeset	677
90ce3da70b43 Initial load duke parents: diff changeset	678	/**
90ce3da70b43 Initial load duke parents: diff changeset	679	* Returns true if this CharSet contains the specified character
90ce3da70b43 Initial load duke parents: diff changeset	680	* @param c The character we're testing for set membership
90ce3da70b43 Initial load duke parents: diff changeset	681	*/
90ce3da70b43 Initial load duke parents: diff changeset	682	public boolean contains(int c) {
90ce3da70b43 Initial load duke parents: diff changeset	683	// search for the first range endpoint that is greater than or
90ce3da70b43 Initial load duke parents: diff changeset	684	// equal to c
90ce3da70b43 Initial load duke parents: diff changeset	685	int i = 1;
90ce3da70b43 Initial load duke parents: diff changeset	686	while (i < chars.length && chars[i] < c) {
90ce3da70b43 Initial load duke parents: diff changeset	687	i += 2;
90ce3da70b43 Initial load duke parents: diff changeset	688	}
90ce3da70b43 Initial load duke parents: diff changeset	689
90ce3da70b43 Initial load duke parents: diff changeset	690	// if we've walked off the end, we don't contain c
90ce3da70b43 Initial load duke parents: diff changeset	691	if (i == chars.length) {
90ce3da70b43 Initial load duke parents: diff changeset	692	return false;
90ce3da70b43 Initial load duke parents: diff changeset	693	}
90ce3da70b43 Initial load duke parents: diff changeset	694
90ce3da70b43 Initial load duke parents: diff changeset	695	// otherwise, we contain c if the beginning of the range is less
90ce3da70b43 Initial load duke parents: diff changeset	696	// than or equal to c
90ce3da70b43 Initial load duke parents: diff changeset	697	return chars[i - 1] <= c;
90ce3da70b43 Initial load duke parents: diff changeset	698	}
90ce3da70b43 Initial load duke parents: diff changeset	699
90ce3da70b43 Initial load duke parents: diff changeset	700	/**
90ce3da70b43 Initial load duke parents: diff changeset	701	* Returns true if "that" is another instance of CharSet containing
90ce3da70b43 Initial load duke parents: diff changeset	702	* the exact same characters as this one
90ce3da70b43 Initial load duke parents: diff changeset	703	*/
90ce3da70b43 Initial load duke parents: diff changeset	704	public boolean equals(Object that) {
17950 b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	705	return (that instanceof CharSet) && Arrays.equals(chars, ((CharSet)that).chars);
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	706	}
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	707
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	708	/**
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	709	* Returns the hash code for this set of characters
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	710	*/
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	711	public int hashCode() {
b2d5b298ec6e 8015880: GenerateBreakIteratorData build warning alanb parents: 10110 diff changeset	712	return Arrays.hashCode(chars);
2 90ce3da70b43 Initial load duke parents: diff changeset	713	}
90ce3da70b43 Initial load duke parents: diff changeset	714
90ce3da70b43 Initial load duke parents: diff changeset	715	/**
90ce3da70b43 Initial load duke parents: diff changeset	716	* Creates a new CharSet that is equal to this one
90ce3da70b43 Initial load duke parents: diff changeset	717	*/
90ce3da70b43 Initial load duke parents: diff changeset	718	public Object clone() {
90ce3da70b43 Initial load duke parents: diff changeset	719	return new CharSet(chars);
90ce3da70b43 Initial load duke parents: diff changeset	720	}
90ce3da70b43 Initial load duke parents: diff changeset	721
90ce3da70b43 Initial load duke parents: diff changeset	722	/**
90ce3da70b43 Initial load duke parents: diff changeset	723	* Returns true if this CharSet contains no characters
90ce3da70b43 Initial load duke parents: diff changeset	724	*/
90ce3da70b43 Initial load duke parents: diff changeset	725	public boolean empty() {
90ce3da70b43 Initial load duke parents: diff changeset	726	return chars.length == 0;
90ce3da70b43 Initial load duke parents: diff changeset	727	}
90ce3da70b43 Initial load duke parents: diff changeset	728
90ce3da70b43 Initial load duke parents: diff changeset	729	/**
90ce3da70b43 Initial load duke parents: diff changeset	730	* Returns a textual representation of this CharSet. If the result
90ce3da70b43 Initial load duke parents: diff changeset	731	* of calling this function is passed to CharSet.parseString(), it
90ce3da70b43 Initial load duke parents: diff changeset	732	* will produce another CharSet that is equal to this one.
90ce3da70b43 Initial load duke parents: diff changeset	733	*/
90ce3da70b43 Initial load duke parents: diff changeset	734	public String toString() {
90ce3da70b43 Initial load duke parents: diff changeset	735	StringBuffer result = new StringBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	736
90ce3da70b43 Initial load duke parents: diff changeset	737	// the result begins with an opening bracket
90ce3da70b43 Initial load duke parents: diff changeset	738	result.append('[');
90ce3da70b43 Initial load duke parents: diff changeset	739
90ce3da70b43 Initial load duke parents: diff changeset	740	// iterate through the ranges in the CharSet
90ce3da70b43 Initial load duke parents: diff changeset	741	for (int i = 0; i < chars.length; i += 2) {
90ce3da70b43 Initial load duke parents: diff changeset	742	// for a range with the same beginning and ending point,
90ce3da70b43 Initial load duke parents: diff changeset	743	// output that character
90ce3da70b43 Initial load duke parents: diff changeset	744	if (chars[i] == chars[i + 1]) {
90ce3da70b43 Initial load duke parents: diff changeset	745	result.append("0x");
90ce3da70b43 Initial load duke parents: diff changeset	746	result.append(Integer.toHexString(chars[i]));
90ce3da70b43 Initial load duke parents: diff changeset	747	}
90ce3da70b43 Initial load duke parents: diff changeset	748
90ce3da70b43 Initial load duke parents: diff changeset	749	// otherwise, output the start and end points of the range
90ce3da70b43 Initial load duke parents: diff changeset	750	// separated by a dash
90ce3da70b43 Initial load duke parents: diff changeset	751	else {
90ce3da70b43 Initial load duke parents: diff changeset	752	result.append("0x");
90ce3da70b43 Initial load duke parents: diff changeset	753	result.append(Integer.toHexString(chars[i]));
90ce3da70b43 Initial load duke parents: diff changeset	754	result.append("-0x");
90ce3da70b43 Initial load duke parents: diff changeset	755	result.append(Integer.toHexString(chars[i + 1]));
90ce3da70b43 Initial load duke parents: diff changeset	756	}
90ce3da70b43 Initial load duke parents: diff changeset	757	}
90ce3da70b43 Initial load duke parents: diff changeset	758
90ce3da70b43 Initial load duke parents: diff changeset	759	// the result ends with a closing bracket
90ce3da70b43 Initial load duke parents: diff changeset	760	result.append(']');
90ce3da70b43 Initial load duke parents: diff changeset	761	return result.toString();
90ce3da70b43 Initial load duke parents: diff changeset	762	}
90ce3da70b43 Initial load duke parents: diff changeset	763
90ce3da70b43 Initial load duke parents: diff changeset	764	/**
90ce3da70b43 Initial load duke parents: diff changeset	765	* Returns an integer array representing the contents of this CharSet
90ce3da70b43 Initial load duke parents: diff changeset	766	* in the same form in which they're stored internally: as pairs
90ce3da70b43 Initial load duke parents: diff changeset	767	* of characters representing the start and end points of ranges
90ce3da70b43 Initial load duke parents: diff changeset	768	*/
90ce3da70b43 Initial load duke parents: diff changeset	769	public int[] getRanges() {
90ce3da70b43 Initial load duke parents: diff changeset	770	return chars;
90ce3da70b43 Initial load duke parents: diff changeset	771	}
90ce3da70b43 Initial load duke parents: diff changeset	772
90ce3da70b43 Initial load duke parents: diff changeset	773	/**
90ce3da70b43 Initial load duke parents: diff changeset	774	* Returns an Enumeration that will return the ranges of characters
90ce3da70b43 Initial load duke parents: diff changeset	775	* contained in this CharSet one at a time
90ce3da70b43 Initial load duke parents: diff changeset	776	*/
90ce3da70b43 Initial load duke parents: diff changeset	777	public Enumeration getChars() {
90ce3da70b43 Initial load duke parents: diff changeset	778	return new Enumeration(this);
90ce3da70b43 Initial load duke parents: diff changeset	779	}
90ce3da70b43 Initial load duke parents: diff changeset	780
90ce3da70b43 Initial load duke parents: diff changeset	781	//==========================================================================
90ce3da70b43 Initial load duke parents: diff changeset	782	// CharSet.Enumeration
90ce3da70b43 Initial load duke parents: diff changeset	783	//==========================================================================
90ce3da70b43 Initial load duke parents: diff changeset	784
90ce3da70b43 Initial load duke parents: diff changeset	785	/**
90ce3da70b43 Initial load duke parents: diff changeset	786	* An Enumeration that can be used to extract the character ranges
90ce3da70b43 Initial load duke parents: diff changeset	787	* from a CharSet one at a time
90ce3da70b43 Initial load duke parents: diff changeset	788	*/
10110 75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	789	public class Enumeration implements java.util.Enumeration<int[]> {
2 90ce3da70b43 Initial load duke parents: diff changeset	790	/**
90ce3da70b43 Initial load duke parents: diff changeset	791	* Initializes a CharSet.Enumeration
90ce3da70b43 Initial load duke parents: diff changeset	792	*/
90ce3da70b43 Initial load duke parents: diff changeset	793	Enumeration(CharSet cs) {
90ce3da70b43 Initial load duke parents: diff changeset	794	this.chars = cs.chars;
90ce3da70b43 Initial load duke parents: diff changeset	795	p = 0;
90ce3da70b43 Initial load duke parents: diff changeset	796	}
90ce3da70b43 Initial load duke parents: diff changeset	797
90ce3da70b43 Initial load duke parents: diff changeset	798	/**
90ce3da70b43 Initial load duke parents: diff changeset	799	* Returns true if the enumeration hasn't yet returned
90ce3da70b43 Initial load duke parents: diff changeset	800	* all the ranges in the CharSet
90ce3da70b43 Initial load duke parents: diff changeset	801	*/
90ce3da70b43 Initial load duke parents: diff changeset	802	public boolean hasMoreElements() {
90ce3da70b43 Initial load duke parents: diff changeset	803	return p < chars.length;
90ce3da70b43 Initial load duke parents: diff changeset	804	}
90ce3da70b43 Initial load duke parents: diff changeset	805
90ce3da70b43 Initial load duke parents: diff changeset	806	/**
90ce3da70b43 Initial load duke parents: diff changeset	807	* Returns the next range in the CarSet
90ce3da70b43 Initial load duke parents: diff changeset	808	*/
10110 75674d930b1f 7058708: Eliminate JDK build tools build warnings jjg parents: 5506 diff changeset	809	public int[] nextElement() {
2 90ce3da70b43 Initial load duke parents: diff changeset	810	int[] result = new int[2];
90ce3da70b43 Initial load duke parents: diff changeset	811	result[0] = chars[p++];
90ce3da70b43 Initial load duke parents: diff changeset	812	result[1] = chars[p++];
90ce3da70b43 Initial load duke parents: diff changeset	813	return result;
90ce3da70b43 Initial load duke parents: diff changeset	814	}
90ce3da70b43 Initial load duke parents: diff changeset	815
90ce3da70b43 Initial load duke parents: diff changeset	816	int p;
90ce3da70b43 Initial load duke parents: diff changeset	817	int[] chars;
90ce3da70b43 Initial load duke parents: diff changeset	818	}
90ce3da70b43 Initial load duke parents: diff changeset	819	}

author	rfield
	Mon, 13 Feb 2017 08:50:26 -0800
changeset 43856	fcdebb803c62
parent 23010	6dadb192ad81
permissions	-rw-r--r--