2
|
1 |
/*
|
5506
|
2 |
* Copyright (c) 1999, 2005, Oracle and/or its affiliates. All rights reserved.
|
2
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
5506
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
2
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
5506
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
2
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
5506
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
2
|
24 |
*/
|
|
25 |
|
|
26 |
/*
|
|
27 |
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
|
|
28 |
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
|
|
29 |
*
|
|
30 |
* The original version of this source code and documentation is copyrighted
|
|
31 |
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
|
|
32 |
* materials are provided under terms of a License Agreement between Taligent
|
|
33 |
* and Sun. This technology is protected by multiple US and International
|
|
34 |
* patents. This notice and attribution to Taligent may not be removed.
|
|
35 |
* Taligent is a registered trademark of Taligent, Inc.
|
|
36 |
*
|
|
37 |
*/
|
|
38 |
|
|
39 |
package java.text;
|
|
40 |
|
|
41 |
import java.util.Vector;
|
|
42 |
import sun.text.UCompactIntArray;
|
|
43 |
import sun.text.IntHashtable;
|
|
44 |
import sun.text.ComposedCharIter;
|
|
45 |
import sun.text.CollatorUtilities;
|
|
46 |
import sun.text.normalizer.NormalizerImpl;
|
|
47 |
|
|
48 |
/**
|
|
49 |
* This class contains all the code to parse a RuleBasedCollator pattern
|
|
50 |
* and build a RBCollationTables object from it. A particular instance
|
|
51 |
* of tis class exists only during the actual build process-- once an
|
|
52 |
* RBCollationTables object has been built, the RBTableBuilder object
|
|
53 |
* goes away. This object carries all of the state which is only needed
|
|
54 |
* during the build process, plus a "shadow" copy of all of the state
|
|
55 |
* that will go into the tables object itself. This object communicates
|
|
56 |
* with RBCollationTables through a separate class, RBCollationTables.BuildAPI,
|
|
57 |
* this is an inner class of RBCollationTables and provides a separate
|
|
58 |
* private API for communication with RBTableBuilder.
|
|
59 |
* This class isn't just an inner class of RBCollationTables itself because
|
|
60 |
* of its large size. For source-code readability, it seemed better for the
|
|
61 |
* builder to have its own source file.
|
|
62 |
*/
|
|
63 |
final class RBTableBuilder {
|
|
64 |
|
|
65 |
public RBTableBuilder(RBCollationTables.BuildAPI tables) {
|
|
66 |
this.tables = tables;
|
|
67 |
}
|
|
68 |
|
|
69 |
/**
|
|
70 |
* Create a table-based collation object with the given rules.
|
|
71 |
* This is the main function that actually builds the tables and
|
|
72 |
* stores them back in the RBCollationTables object. It is called
|
|
73 |
* ONLY by the RBCollationTables constructor.
|
|
74 |
* @see java.util.RuleBasedCollator#RuleBasedCollator
|
|
75 |
* @exception ParseException If the rules format is incorrect.
|
|
76 |
*/
|
|
77 |
|
|
78 |
public void build(String pattern, int decmp) throws ParseException
|
|
79 |
{
|
|
80 |
boolean isSource = true;
|
|
81 |
int i = 0;
|
|
82 |
String expChars;
|
|
83 |
String groupChars;
|
|
84 |
if (pattern.length() == 0)
|
|
85 |
throw new ParseException("Build rules empty.", 0);
|
|
86 |
|
|
87 |
// This array maps Unicode characters to their collation ordering
|
|
88 |
mapping = new UCompactIntArray((int)RBCollationTables.UNMAPPED);
|
|
89 |
// Normalize the build rules. Find occurances of all decomposed characters
|
|
90 |
// and normalize the rules before feeding into the builder. By "normalize",
|
|
91 |
// we mean that all precomposed Unicode characters must be converted into
|
|
92 |
// a base character and one or more combining characters (such as accents).
|
|
93 |
// When there are multiple combining characters attached to a base character,
|
|
94 |
// the combining characters must be in their canonical order
|
|
95 |
//
|
|
96 |
// sherman/Note:
|
|
97 |
//(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
|
|
98 |
//hangual syllables to jamos, so we can actually just call decompose with
|
|
99 |
//normalizer's IGNORE_HANGUL option turned on
|
|
100 |
//
|
|
101 |
//(2)just call the "special version" in NormalizerImpl directly
|
|
102 |
//pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
|
|
103 |
//
|
|
104 |
//Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
|
|
105 |
//pattern = Normalizer.normalize(pattern, mode, 0, true);
|
|
106 |
|
|
107 |
pattern = NormalizerImpl.canonicalDecomposeWithSingleQuotation(pattern);
|
|
108 |
|
|
109 |
// Build the merged collation entries
|
|
110 |
// Since rules can be specified in any order in the string
|
|
111 |
// (e.g. "c , C < d , D < e , E .... C < CH")
|
|
112 |
// this splits all of the rules in the string out into separate
|
|
113 |
// objects and then sorts them. In the above example, it merges the
|
|
114 |
// "C < CH" rule in just before the "C < D" rule.
|
|
115 |
//
|
|
116 |
|
|
117 |
mPattern = new MergeCollation(pattern);
|
|
118 |
|
|
119 |
int order = 0;
|
|
120 |
|
|
121 |
// Now walk though each entry and add it to my own tables
|
|
122 |
for (i = 0; i < mPattern.getCount(); ++i)
|
|
123 |
{
|
|
124 |
PatternEntry entry = mPattern.getItemAt(i);
|
|
125 |
if (entry != null) {
|
|
126 |
groupChars = entry.getChars();
|
|
127 |
if (groupChars.length() > 1) {
|
|
128 |
switch(groupChars.charAt(groupChars.length()-1)) {
|
|
129 |
case '@':
|
|
130 |
frenchSec = true;
|
|
131 |
groupChars = groupChars.substring(0, groupChars.length()-1);
|
|
132 |
break;
|
|
133 |
case '!':
|
|
134 |
seAsianSwapping = true;
|
|
135 |
groupChars = groupChars.substring(0, groupChars.length()-1);
|
|
136 |
break;
|
|
137 |
}
|
|
138 |
}
|
|
139 |
|
|
140 |
order = increment(entry.getStrength(), order);
|
|
141 |
expChars = entry.getExtension();
|
|
142 |
|
|
143 |
if (expChars.length() != 0) {
|
|
144 |
addExpandOrder(groupChars, expChars, order);
|
|
145 |
} else if (groupChars.length() > 1) {
|
|
146 |
char ch = groupChars.charAt(0);
|
|
147 |
if (Character.isHighSurrogate(ch) && groupChars.length() == 2) {
|
|
148 |
addOrder(Character.toCodePoint(ch, groupChars.charAt(1)), order);
|
|
149 |
} else {
|
|
150 |
addContractOrder(groupChars, order);
|
|
151 |
}
|
|
152 |
} else {
|
|
153 |
char ch = groupChars.charAt(0);
|
|
154 |
addOrder(ch, order);
|
|
155 |
}
|
|
156 |
}
|
|
157 |
}
|
|
158 |
addComposedChars();
|
|
159 |
|
|
160 |
commit();
|
|
161 |
mapping.compact();
|
|
162 |
/*
|
|
163 |
System.out.println("mappingSize=" + mapping.getKSize());
|
|
164 |
for (int j = 0; j < 0xffff; j++) {
|
|
165 |
int value = mapping.elementAt(j);
|
|
166 |
if (value != RBCollationTables.UNMAPPED)
|
|
167 |
System.out.println("index=" + Integer.toString(j, 16)
|
|
168 |
+ ", value=" + Integer.toString(value, 16));
|
|
169 |
}
|
|
170 |
*/
|
|
171 |
tables.fillInTables(frenchSec, seAsianSwapping, mapping, contractTable, expandTable,
|
|
172 |
contractFlags, maxSecOrder, maxTerOrder);
|
|
173 |
}
|
|
174 |
|
|
175 |
/** Add expanding entries for pre-composed unicode characters so that this
|
|
176 |
* collator can be used reasonably well with decomposition turned off.
|
|
177 |
*/
|
|
178 |
private void addComposedChars() throws ParseException {
|
|
179 |
// Iterate through all of the pre-composed characters in Unicode
|
|
180 |
ComposedCharIter iter = new ComposedCharIter();
|
|
181 |
int c;
|
|
182 |
while ((c = iter.next()) != ComposedCharIter.DONE) {
|
|
183 |
if (getCharOrder(c) == RBCollationTables.UNMAPPED) {
|
|
184 |
//
|
|
185 |
// We don't already have an ordering for this pre-composed character.
|
|
186 |
//
|
|
187 |
// First, see if the decomposed string is already in our
|
|
188 |
// tables as a single contracting-string ordering.
|
|
189 |
// If so, just map the precomposed character to that order.
|
|
190 |
//
|
|
191 |
// TODO: What we should really be doing here is trying to find the
|
|
192 |
// longest initial substring of the decomposition that is present
|
|
193 |
// in the tables as a contracting character sequence, and find its
|
|
194 |
// ordering. Then do this recursively with the remaining chars
|
|
195 |
// so that we build a list of orderings, and add that list to
|
|
196 |
// the expansion table.
|
|
197 |
// That would be more correct but also significantly slower, so
|
|
198 |
// I'm not totally sure it's worth doing.
|
|
199 |
//
|
|
200 |
String s = iter.decomposition();
|
|
201 |
|
|
202 |
//sherman/Note: if this is 1 character decomposed string, the
|
|
203 |
//only thing need to do is to check if this decomposed character
|
|
204 |
//has an entry in our order table, this order is not necessary
|
|
205 |
//to be a contraction order, if it does have one, add an entry
|
|
206 |
//for the precomposed character by using the same order, the
|
|
207 |
//previous impl unnecessarily adds a single character expansion
|
|
208 |
//entry.
|
|
209 |
if (s.length() == 1) {
|
|
210 |
int order = getCharOrder(s.charAt(0));
|
|
211 |
if (order != RBCollationTables.UNMAPPED) {
|
|
212 |
addOrder(c, order);
|
|
213 |
}
|
|
214 |
continue;
|
|
215 |
} else if (s.length() == 2) {
|
|
216 |
char ch0 = s.charAt(0);
|
|
217 |
if (Character.isHighSurrogate(ch0)) {
|
|
218 |
int order = getCharOrder(s.codePointAt(0));
|
|
219 |
if (order != RBCollationTables.UNMAPPED) {
|
|
220 |
addOrder(c, order);
|
|
221 |
}
|
|
222 |
continue;
|
|
223 |
}
|
|
224 |
}
|
|
225 |
int contractOrder = getContractOrder(s);
|
|
226 |
if (contractOrder != RBCollationTables.UNMAPPED) {
|
|
227 |
addOrder(c, contractOrder);
|
|
228 |
} else {
|
|
229 |
//
|
|
230 |
// We don't have a contracting ordering for the entire string
|
|
231 |
// that results from the decomposition, but if we have orders
|
|
232 |
// for each individual character, we can add an expanding
|
|
233 |
// table entry for the pre-composed character
|
|
234 |
//
|
|
235 |
boolean allThere = true;
|
|
236 |
for (int i = 0; i < s.length(); i++) {
|
|
237 |
if (getCharOrder(s.charAt(i)) == RBCollationTables.UNMAPPED) {
|
|
238 |
allThere = false;
|
|
239 |
break;
|
|
240 |
}
|
|
241 |
}
|
|
242 |
if (allThere) {
|
|
243 |
addExpandOrder(c, s, RBCollationTables.UNMAPPED);
|
|
244 |
}
|
|
245 |
}
|
|
246 |
}
|
|
247 |
}
|
|
248 |
}
|
|
249 |
|
|
250 |
/**
|
|
251 |
* Look up for unmapped values in the expanded character table.
|
|
252 |
*
|
|
253 |
* When the expanding character tables are built by addExpandOrder,
|
|
254 |
* it doesn't know what the final ordering of each character
|
|
255 |
* in the expansion will be. Instead, it just puts the raw character
|
|
256 |
* code into the table, adding CHARINDEX as a flag. Now that we've
|
|
257 |
* finished building the mapping table, we can go back and look up
|
|
258 |
* that character to see what its real collation order is and
|
|
259 |
* stick that into the expansion table. That lets us avoid doing
|
|
260 |
* a two-stage lookup later.
|
|
261 |
*/
|
|
262 |
private final void commit()
|
|
263 |
{
|
|
264 |
if (expandTable != null) {
|
|
265 |
for (int i = 0; i < expandTable.size(); i++) {
|
|
266 |
int[] valueList = (int [])expandTable.elementAt(i);
|
|
267 |
for (int j = 0; j < valueList.length; j++) {
|
|
268 |
int order = valueList[j];
|
|
269 |
if (order < RBCollationTables.EXPANDCHARINDEX && order > CHARINDEX) {
|
|
270 |
// found a expanding character that isn't filled in yet
|
|
271 |
int ch = order - CHARINDEX;
|
|
272 |
|
|
273 |
// Get the real values for the non-filled entry
|
|
274 |
int realValue = getCharOrder(ch);
|
|
275 |
|
|
276 |
if (realValue == RBCollationTables.UNMAPPED) {
|
|
277 |
// The real value is still unmapped, maybe it's ignorable
|
|
278 |
valueList[j] = IGNORABLEMASK & ch;
|
|
279 |
} else {
|
|
280 |
// just fill in the value
|
|
281 |
valueList[j] = realValue;
|
|
282 |
}
|
|
283 |
}
|
|
284 |
}
|
|
285 |
}
|
|
286 |
}
|
|
287 |
}
|
|
288 |
/**
|
|
289 |
* Increment of the last order based on the comparison level.
|
|
290 |
*/
|
|
291 |
private final int increment(int aStrength, int lastValue)
|
|
292 |
{
|
|
293 |
switch(aStrength)
|
|
294 |
{
|
|
295 |
case Collator.PRIMARY:
|
|
296 |
// increment priamry order and mask off secondary and tertiary difference
|
|
297 |
lastValue += PRIMARYORDERINCREMENT;
|
|
298 |
lastValue &= RBCollationTables.PRIMARYORDERMASK;
|
|
299 |
isOverIgnore = true;
|
|
300 |
break;
|
|
301 |
case Collator.SECONDARY:
|
|
302 |
// increment secondary order and mask off tertiary difference
|
|
303 |
lastValue += SECONDARYORDERINCREMENT;
|
|
304 |
lastValue &= RBCollationTables.SECONDARYDIFFERENCEONLY;
|
|
305 |
// record max # of ignorable chars with secondary difference
|
|
306 |
if (!isOverIgnore)
|
|
307 |
maxSecOrder++;
|
|
308 |
break;
|
|
309 |
case Collator.TERTIARY:
|
|
310 |
// increment tertiary order
|
|
311 |
lastValue += TERTIARYORDERINCREMENT;
|
|
312 |
// record max # of ignorable chars with tertiary difference
|
|
313 |
if (!isOverIgnore)
|
|
314 |
maxTerOrder++;
|
|
315 |
break;
|
|
316 |
}
|
|
317 |
return lastValue;
|
|
318 |
}
|
|
319 |
|
|
320 |
/**
|
|
321 |
* Adds a character and its designated order into the collation table.
|
|
322 |
*/
|
|
323 |
private final void addOrder(int ch, int anOrder)
|
|
324 |
{
|
|
325 |
// See if the char already has an order in the mapping table
|
|
326 |
int order = mapping.elementAt(ch);
|
|
327 |
|
|
328 |
if (order >= RBCollationTables.CONTRACTCHARINDEX) {
|
|
329 |
// There's already an entry for this character that points to a contracting
|
|
330 |
// character table. Instead of adding the character directly to the mapping
|
|
331 |
// table, we must add it to the contract table instead.
|
|
332 |
int length = 1;
|
|
333 |
if (Character.isSupplementaryCodePoint(ch)) {
|
|
334 |
length = Character.toChars(ch, keyBuf, 0);
|
|
335 |
} else {
|
|
336 |
keyBuf[0] = (char)ch;
|
|
337 |
}
|
|
338 |
addContractOrder(new String(keyBuf, 0, length), anOrder);
|
|
339 |
} else {
|
|
340 |
// add the entry to the mapping table,
|
|
341 |
// the same later entry replaces the previous one
|
|
342 |
mapping.setElementAt(ch, anOrder);
|
|
343 |
}
|
|
344 |
}
|
|
345 |
|
|
346 |
private final void addContractOrder(String groupChars, int anOrder) {
|
|
347 |
addContractOrder(groupChars, anOrder, true);
|
|
348 |
}
|
|
349 |
|
|
350 |
/**
|
|
351 |
* Adds the contracting string into the collation table.
|
|
352 |
*/
|
|
353 |
private final void addContractOrder(String groupChars, int anOrder,
|
|
354 |
boolean fwd)
|
|
355 |
{
|
|
356 |
if (contractTable == null) {
|
|
357 |
contractTable = new Vector(INITIALTABLESIZE);
|
|
358 |
}
|
|
359 |
|
|
360 |
//initial character
|
|
361 |
int ch = groupChars.codePointAt(0);
|
|
362 |
/*
|
|
363 |
char ch0 = groupChars.charAt(0);
|
|
364 |
int ch = Character.isHighSurrogate(ch0)?
|
|
365 |
Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
|
|
366 |
*/
|
|
367 |
// See if the initial character of the string already has a contract table.
|
|
368 |
int entry = mapping.elementAt(ch);
|
|
369 |
Vector entryTable = getContractValuesImpl(entry - RBCollationTables.CONTRACTCHARINDEX);
|
|
370 |
|
|
371 |
if (entryTable == null) {
|
|
372 |
// We need to create a new table of contract entries for this base char
|
|
373 |
int tableIndex = RBCollationTables.CONTRACTCHARINDEX + contractTable.size();
|
|
374 |
entryTable = new Vector(INITIALTABLESIZE);
|
|
375 |
contractTable.addElement(entryTable);
|
|
376 |
|
|
377 |
// Add the initial character's current ordering first. then
|
|
378 |
// update its mapping to point to this contract table
|
|
379 |
entryTable.addElement(new EntryPair(groupChars.substring(0,Character.charCount(ch)), entry));
|
|
380 |
mapping.setElementAt(ch, tableIndex);
|
|
381 |
}
|
|
382 |
|
|
383 |
// Now add (or replace) this string in the table
|
|
384 |
int index = RBCollationTables.getEntry(entryTable, groupChars, fwd);
|
|
385 |
if (index != RBCollationTables.UNMAPPED) {
|
|
386 |
EntryPair pair = (EntryPair) entryTable.elementAt(index);
|
|
387 |
pair.value = anOrder;
|
|
388 |
} else {
|
|
389 |
EntryPair pair = (EntryPair)entryTable.lastElement();
|
|
390 |
|
|
391 |
// NOTE: This little bit of logic is here to speed CollationElementIterator
|
|
392 |
// .nextContractChar(). This code ensures that the longest sequence in
|
|
393 |
// this list is always the _last_ one in the list. This keeps
|
|
394 |
// nextContractChar() from having to search the entire list for the longest
|
|
395 |
// sequence.
|
|
396 |
if (groupChars.length() > pair.entryName.length()) {
|
|
397 |
entryTable.addElement(new EntryPair(groupChars, anOrder, fwd));
|
|
398 |
} else {
|
|
399 |
entryTable.insertElementAt(new EntryPair(groupChars, anOrder,
|
|
400 |
fwd), entryTable.size() - 1);
|
|
401 |
}
|
|
402 |
}
|
|
403 |
|
|
404 |
// If this was a forward mapping for a contracting string, also add a
|
|
405 |
// reverse mapping for it, so that CollationElementIterator.previous
|
|
406 |
// can work right
|
|
407 |
if (fwd && groupChars.length() > 1) {
|
|
408 |
addContractFlags(groupChars);
|
|
409 |
addContractOrder(new StringBuffer(groupChars).reverse().toString(),
|
|
410 |
anOrder, false);
|
|
411 |
}
|
|
412 |
}
|
|
413 |
|
|
414 |
/**
|
|
415 |
* If the given string has been specified as a contracting string
|
|
416 |
* in this collation table, return its ordering.
|
|
417 |
* Otherwise return UNMAPPED.
|
|
418 |
*/
|
|
419 |
private int getContractOrder(String groupChars)
|
|
420 |
{
|
|
421 |
int result = RBCollationTables.UNMAPPED;
|
|
422 |
if (contractTable != null) {
|
|
423 |
int ch = groupChars.codePointAt(0);
|
|
424 |
/*
|
|
425 |
char ch0 = groupChars.charAt(0);
|
|
426 |
int ch = Character.isHighSurrogate(ch0)?
|
|
427 |
Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
|
|
428 |
*/
|
|
429 |
Vector entryTable = getContractValues(ch);
|
|
430 |
if (entryTable != null) {
|
|
431 |
int index = RBCollationTables.getEntry(entryTable, groupChars, true);
|
|
432 |
if (index != RBCollationTables.UNMAPPED) {
|
|
433 |
EntryPair pair = (EntryPair) entryTable.elementAt(index);
|
|
434 |
result = pair.value;
|
|
435 |
}
|
|
436 |
}
|
|
437 |
}
|
|
438 |
return result;
|
|
439 |
}
|
|
440 |
|
|
441 |
private final int getCharOrder(int ch) {
|
|
442 |
int order = mapping.elementAt(ch);
|
|
443 |
|
|
444 |
if (order >= RBCollationTables.CONTRACTCHARINDEX) {
|
|
445 |
Vector groupList = getContractValuesImpl(order - RBCollationTables.CONTRACTCHARINDEX);
|
|
446 |
EntryPair pair = (EntryPair)groupList.firstElement();
|
|
447 |
order = pair.value;
|
|
448 |
}
|
|
449 |
return order;
|
|
450 |
}
|
|
451 |
|
|
452 |
/**
|
|
453 |
* Get the entry of hash table of the contracting string in the collation
|
|
454 |
* table.
|
|
455 |
* @param ch the starting character of the contracting string
|
|
456 |
*/
|
|
457 |
private Vector getContractValues(int ch)
|
|
458 |
{
|
|
459 |
int index = mapping.elementAt(ch);
|
|
460 |
return getContractValuesImpl(index - RBCollationTables.CONTRACTCHARINDEX);
|
|
461 |
}
|
|
462 |
|
|
463 |
private Vector getContractValuesImpl(int index)
|
|
464 |
{
|
|
465 |
if (index >= 0)
|
|
466 |
{
|
|
467 |
return (Vector)contractTable.elementAt(index);
|
|
468 |
}
|
|
469 |
else // not found
|
|
470 |
{
|
|
471 |
return null;
|
|
472 |
}
|
|
473 |
}
|
|
474 |
|
|
475 |
/**
|
|
476 |
* Adds the expanding string into the collation table.
|
|
477 |
*/
|
|
478 |
private final void addExpandOrder(String contractChars,
|
|
479 |
String expandChars,
|
|
480 |
int anOrder) throws ParseException
|
|
481 |
{
|
|
482 |
// Create an expansion table entry
|
|
483 |
int tableIndex = addExpansion(anOrder, expandChars);
|
|
484 |
|
|
485 |
// And add its index into the main mapping table
|
|
486 |
if (contractChars.length() > 1) {
|
|
487 |
char ch = contractChars.charAt(0);
|
|
488 |
if (Character.isHighSurrogate(ch) && contractChars.length() == 2) {
|
|
489 |
char ch2 = contractChars.charAt(1);
|
|
490 |
if (Character.isLowSurrogate(ch2)) {
|
|
491 |
//only add into table when it is a legal surrogate
|
|
492 |
addOrder(Character.toCodePoint(ch, ch2), tableIndex);
|
|
493 |
}
|
|
494 |
} else {
|
|
495 |
addContractOrder(contractChars, tableIndex);
|
|
496 |
}
|
|
497 |
} else {
|
|
498 |
addOrder(contractChars.charAt(0), tableIndex);
|
|
499 |
}
|
|
500 |
}
|
|
501 |
|
|
502 |
private final void addExpandOrder(int ch, String expandChars, int anOrder)
|
|
503 |
throws ParseException
|
|
504 |
{
|
|
505 |
int tableIndex = addExpansion(anOrder, expandChars);
|
|
506 |
addOrder(ch, tableIndex);
|
|
507 |
}
|
|
508 |
|
|
509 |
/**
|
|
510 |
* Create a new entry in the expansion table that contains the orderings
|
|
511 |
* for the given characers. If anOrder is valid, it is added to the
|
|
512 |
* beginning of the expanded list of orders.
|
|
513 |
*/
|
|
514 |
private int addExpansion(int anOrder, String expandChars) {
|
|
515 |
if (expandTable == null) {
|
|
516 |
expandTable = new Vector(INITIALTABLESIZE);
|
|
517 |
}
|
|
518 |
|
|
519 |
// If anOrder is valid, we want to add it at the beginning of the list
|
|
520 |
int offset = (anOrder == RBCollationTables.UNMAPPED) ? 0 : 1;
|
|
521 |
|
|
522 |
int[] valueList = new int[expandChars.length() + offset];
|
|
523 |
if (offset == 1) {
|
|
524 |
valueList[0] = anOrder;
|
|
525 |
}
|
|
526 |
|
|
527 |
int j = offset;
|
|
528 |
for (int i = 0; i < expandChars.length(); i++) {
|
|
529 |
char ch0 = expandChars.charAt(i);
|
|
530 |
char ch1;
|
|
531 |
int ch;
|
|
532 |
if (Character.isHighSurrogate(ch0)) {
|
|
533 |
if (++i == expandChars.length() ||
|
|
534 |
!Character.isLowSurrogate(ch1=expandChars.charAt(i))) {
|
|
535 |
//ether we are missing the low surrogate or the next char
|
|
536 |
//is not a legal low surrogate, so stop loop
|
|
537 |
break;
|
|
538 |
}
|
|
539 |
ch = Character.toCodePoint(ch0, ch1);
|
|
540 |
|
|
541 |
} else {
|
|
542 |
ch = ch0;
|
|
543 |
}
|
|
544 |
|
|
545 |
int mapValue = getCharOrder(ch);
|
|
546 |
|
|
547 |
if (mapValue != RBCollationTables.UNMAPPED) {
|
|
548 |
valueList[j++] = mapValue;
|
|
549 |
} else {
|
|
550 |
// can't find it in the table, will be filled in by commit().
|
|
551 |
valueList[j++] = CHARINDEX + ch;
|
|
552 |
}
|
|
553 |
}
|
|
554 |
if (j < valueList.length) {
|
|
555 |
//we had at least one supplementary character, the size of valueList
|
|
556 |
//is bigger than it really needs...
|
|
557 |
int[] tmpBuf = new int[j];
|
|
558 |
while (--j >= 0) {
|
|
559 |
tmpBuf[j] = valueList[j];
|
|
560 |
}
|
|
561 |
valueList = tmpBuf;
|
|
562 |
}
|
|
563 |
// Add the expanding char list into the expansion table.
|
|
564 |
int tableIndex = RBCollationTables.EXPANDCHARINDEX + expandTable.size();
|
|
565 |
expandTable.addElement(valueList);
|
|
566 |
|
|
567 |
return tableIndex;
|
|
568 |
}
|
|
569 |
|
|
570 |
private void addContractFlags(String chars) {
|
|
571 |
char c0;
|
|
572 |
int c;
|
|
573 |
int len = chars.length();
|
|
574 |
for (int i = 0; i < len; i++) {
|
|
575 |
c0 = chars.charAt(i);
|
|
576 |
c = Character.isHighSurrogate(c0)
|
|
577 |
?Character.toCodePoint(c0, chars.charAt(++i))
|
|
578 |
:c0;
|
|
579 |
contractFlags.put(c, 1);
|
|
580 |
}
|
|
581 |
}
|
|
582 |
|
|
583 |
// ==============================================================
|
|
584 |
// constants
|
|
585 |
// ==============================================================
|
|
586 |
final static int CHARINDEX = 0x70000000; // need look up in .commit()
|
|
587 |
|
|
588 |
private final static int IGNORABLEMASK = 0x0000ffff;
|
|
589 |
private final static int PRIMARYORDERINCREMENT = 0x00010000;
|
|
590 |
private final static int SECONDARYORDERINCREMENT = 0x00000100;
|
|
591 |
private final static int TERTIARYORDERINCREMENT = 0x00000001;
|
|
592 |
private final static int INITIALTABLESIZE = 20;
|
|
593 |
private final static int MAXKEYSIZE = 5;
|
|
594 |
|
|
595 |
// ==============================================================
|
|
596 |
// instance variables
|
|
597 |
// ==============================================================
|
|
598 |
|
|
599 |
// variables used by the build process
|
|
600 |
private RBCollationTables.BuildAPI tables = null;
|
|
601 |
private MergeCollation mPattern = null;
|
|
602 |
private boolean isOverIgnore = false;
|
|
603 |
private char[] keyBuf = new char[MAXKEYSIZE];
|
|
604 |
private IntHashtable contractFlags = new IntHashtable(100);
|
|
605 |
|
|
606 |
// "shadow" copies of the instance variables in RBCollationTables
|
|
607 |
// (the values in these variables are copied back into RBCollationTables
|
|
608 |
// at the end of the build process)
|
|
609 |
private boolean frenchSec = false;
|
|
610 |
private boolean seAsianSwapping = false;
|
|
611 |
|
|
612 |
private UCompactIntArray mapping = null;
|
|
613 |
private Vector contractTable = null;
|
|
614 |
private Vector expandTable = null;
|
|
615 |
|
|
616 |
private short maxSecOrder = 0;
|
|
617 |
private short maxTerOrder = 0;
|
|
618 |
}
|