2
|
1 |
/*
|
5506
|
2 |
* Copyright (c) 1999, 2003, Oracle and/or its affiliates. All rights reserved.
|
2
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
5506
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
2
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
5506
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
2
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
5506
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
2
|
24 |
*/
|
|
25 |
|
|
26 |
/*
|
|
27 |
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
|
|
28 |
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
|
|
29 |
*
|
|
30 |
* The original version of this source code and documentation is copyrighted
|
|
31 |
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
|
|
32 |
* materials are provided under terms of a License Agreement between Taligent
|
|
33 |
* and Sun. This technology is protected by multiple US and International
|
|
34 |
* patents. This notice and attribution to Taligent may not be removed.
|
|
35 |
* Taligent is a registered trademark of Taligent, Inc.
|
|
36 |
*
|
|
37 |
*/
|
|
38 |
|
|
39 |
package java.text;
|
|
40 |
|
|
41 |
import java.util.Vector;
|
|
42 |
import sun.text.UCompactIntArray;
|
|
43 |
import sun.text.IntHashtable;
|
|
44 |
|
|
45 |
/**
|
|
46 |
* This class contains the static state of a RuleBasedCollator: The various
|
|
47 |
* tables that are used by the collation routines. Several RuleBasedCollators
|
|
48 |
* can share a single RBCollationTables object, easing memory requirements and
|
|
49 |
* improving performance.
|
|
50 |
*/
|
|
51 |
final class RBCollationTables {
|
|
52 |
//===========================================================================================
|
|
53 |
// The following diagram shows the data structure of the RBCollationTables object.
|
|
54 |
// Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
|
|
55 |
// "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
|
|
56 |
// What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
|
|
57 |
// sorts 'o-umlaut' as if it's always expanded with 'e'.
|
|
58 |
//
|
|
59 |
// mapping table contracting list expanding list
|
|
60 |
// (contains all unicode char
|
|
61 |
// entries) ___ ____________ _________________________
|
|
62 |
// ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
|
|
63 |
// |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
|
|
64 |
// |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
|
|
65 |
// |____:___| | |_:_| |------------| | |-------------------------|
|
|
66 |
// |____:___| | |'cH'|v('cH')| | | : |
|
|
67 |
// |__'a'___|-> v('a') | |------------| | |-------------------------|
|
|
68 |
// |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
|
|
69 |
// |____:___| | |------------| | |-------------------------|
|
|
70 |
// |____:___| | |'CH'|v('CH')| | | : |
|
|
71 |
// |___'c'__|---------------- ------------ | |-------------------------|
|
|
72 |
// |____:___| | | : |
|
|
73 |
// |o-umlaut|---------------------------------------- |_________________________|
|
|
74 |
// |____:___|
|
|
75 |
//
|
|
76 |
// Noted by Helena Shih on 6/23/97
|
|
77 |
//============================================================================================
|
|
78 |
|
|
79 |
public RBCollationTables(String rules, int decmp) throws ParseException {
|
|
80 |
this.rules = rules;
|
|
81 |
|
|
82 |
RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
|
|
83 |
builder.build(rules, decmp); // this object is filled in through
|
|
84 |
// the BuildAPI object
|
|
85 |
}
|
|
86 |
|
|
87 |
final class BuildAPI {
|
|
88 |
/**
|
|
89 |
* Private constructor. Prevents anyone else besides RBTableBuilder
|
|
90 |
* from gaining direct access to the internals of this class.
|
|
91 |
*/
|
|
92 |
private BuildAPI() {
|
|
93 |
}
|
|
94 |
|
|
95 |
/**
|
|
96 |
* This function is used by RBTableBuilder to fill in all the members of this
|
|
97 |
* object. (Effectively, the builder class functions as a "friend" of this
|
|
98 |
* class, but to avoid changing too much of the logic, it carries around "shadow"
|
|
99 |
* copies of all these variables until the end of the build process and then
|
|
100 |
* copies them en masse into the actual tables object once all the construction
|
|
101 |
* logic is complete. This function does that "copying en masse".
|
|
102 |
* @param f2ary The value for frenchSec (the French-secondary flag)
|
|
103 |
* @param swap The value for SE Asian swapping rule
|
|
104 |
* @param map The collator's character-mapping table (the value for mapping)
|
|
105 |
* @param cTbl The collator's contracting-character table (the value for contractTable)
|
|
106 |
* @param eTbl The collator's expanding-character table (the value for expandTable)
|
|
107 |
* @param cFlgs The hash table of characters that participate in contracting-
|
|
108 |
* character sequences (the value for contractFlags)
|
|
109 |
* @param mso The value for maxSecOrder
|
|
110 |
* @param mto The value for maxTerOrder
|
|
111 |
*/
|
|
112 |
void fillInTables(boolean f2ary,
|
|
113 |
boolean swap,
|
|
114 |
UCompactIntArray map,
|
|
115 |
Vector cTbl,
|
|
116 |
Vector eTbl,
|
|
117 |
IntHashtable cFlgs,
|
|
118 |
short mso,
|
|
119 |
short mto) {
|
|
120 |
frenchSec = f2ary;
|
|
121 |
seAsianSwapping = swap;
|
|
122 |
mapping = map;
|
|
123 |
contractTable = cTbl;
|
|
124 |
expandTable = eTbl;
|
|
125 |
contractFlags = cFlgs;
|
|
126 |
maxSecOrder = mso;
|
|
127 |
maxTerOrder = mto;
|
|
128 |
}
|
|
129 |
}
|
|
130 |
|
|
131 |
/**
|
|
132 |
* Gets the table-based rules for the collation object.
|
|
133 |
* @return returns the collation rules that the table collation object
|
|
134 |
* was created from.
|
|
135 |
*/
|
|
136 |
public String getRules()
|
|
137 |
{
|
|
138 |
return rules;
|
|
139 |
}
|
|
140 |
|
|
141 |
public boolean isFrenchSec() {
|
|
142 |
return frenchSec;
|
|
143 |
}
|
|
144 |
|
|
145 |
public boolean isSEAsianSwapping() {
|
|
146 |
return seAsianSwapping;
|
|
147 |
}
|
|
148 |
|
|
149 |
// ==============================================================
|
|
150 |
// internal (for use by CollationElementIterator)
|
|
151 |
// ==============================================================
|
|
152 |
|
|
153 |
/**
|
|
154 |
* Get the entry of hash table of the contracting string in the collation
|
|
155 |
* table.
|
|
156 |
* @param ch the starting character of the contracting string
|
|
157 |
*/
|
|
158 |
Vector getContractValues(int ch)
|
|
159 |
{
|
|
160 |
int index = mapping.elementAt(ch);
|
|
161 |
return getContractValuesImpl(index - CONTRACTCHARINDEX);
|
|
162 |
}
|
|
163 |
|
|
164 |
//get contract values from contractTable by index
|
|
165 |
private Vector getContractValuesImpl(int index)
|
|
166 |
{
|
|
167 |
if (index >= 0)
|
|
168 |
{
|
|
169 |
return (Vector)contractTable.elementAt(index);
|
|
170 |
}
|
|
171 |
else // not found
|
|
172 |
{
|
|
173 |
return null;
|
|
174 |
}
|
|
175 |
}
|
|
176 |
|
|
177 |
/**
|
|
178 |
* Returns true if this character appears anywhere in a contracting
|
|
179 |
* character sequence. (Used by CollationElementIterator.setOffset().)
|
|
180 |
*/
|
|
181 |
boolean usedInContractSeq(int c) {
|
|
182 |
return contractFlags.get(c) == 1;
|
|
183 |
}
|
|
184 |
|
|
185 |
/**
|
|
186 |
* Return the maximum length of any expansion sequences that end
|
|
187 |
* with the specified comparison order.
|
|
188 |
*
|
|
189 |
* @param order a collation order returned by previous or next.
|
|
190 |
* @return the maximum length of any expansion seuences ending
|
|
191 |
* with the specified order.
|
|
192 |
*
|
|
193 |
* @see CollationElementIterator#getMaxExpansion
|
|
194 |
*/
|
|
195 |
int getMaxExpansion(int order)
|
|
196 |
{
|
|
197 |
int result = 1;
|
|
198 |
|
|
199 |
if (expandTable != null) {
|
|
200 |
// Right now this does a linear search through the entire
|
|
201 |
// expandsion table. If a collator had a large number of expansions,
|
|
202 |
// this could cause a performance problem, but in practise that
|
|
203 |
// rarely happens
|
|
204 |
for (int i = 0; i < expandTable.size(); i++) {
|
|
205 |
int[] valueList = (int [])expandTable.elementAt(i);
|
|
206 |
int length = valueList.length;
|
|
207 |
|
|
208 |
if (length > result && valueList[length-1] == order) {
|
|
209 |
result = length;
|
|
210 |
}
|
|
211 |
}
|
|
212 |
}
|
|
213 |
|
|
214 |
return result;
|
|
215 |
}
|
|
216 |
|
|
217 |
/**
|
|
218 |
* Get the entry of hash table of the expanding string in the collation
|
|
219 |
* table.
|
|
220 |
* @param idx the index of the expanding string value list
|
|
221 |
*/
|
|
222 |
final int[] getExpandValueList(int order) {
|
|
223 |
return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
|
|
224 |
}
|
|
225 |
|
|
226 |
/**
|
|
227 |
* Get the comarison order of a character from the collation table.
|
|
228 |
* @return the comparison order of a character.
|
|
229 |
*/
|
|
230 |
int getUnicodeOrder(int ch)
|
|
231 |
{
|
|
232 |
return mapping.elementAt(ch);
|
|
233 |
}
|
|
234 |
|
|
235 |
short getMaxSecOrder() {
|
|
236 |
return maxSecOrder;
|
|
237 |
}
|
|
238 |
|
|
239 |
short getMaxTerOrder() {
|
|
240 |
return maxTerOrder;
|
|
241 |
}
|
|
242 |
|
|
243 |
/**
|
|
244 |
* Reverse a string.
|
|
245 |
*/
|
|
246 |
//shemran/Note: this is used for secondary order value reverse, no
|
|
247 |
// need to consider supplementary pair.
|
|
248 |
static void reverse (StringBuffer result, int from, int to)
|
|
249 |
{
|
|
250 |
int i = from;
|
|
251 |
char swap;
|
|
252 |
|
|
253 |
int j = to - 1;
|
|
254 |
while (i < j) {
|
|
255 |
swap = result.charAt(i);
|
|
256 |
result.setCharAt(i, result.charAt(j));
|
|
257 |
result.setCharAt(j, swap);
|
|
258 |
i++;
|
|
259 |
j--;
|
|
260 |
}
|
|
261 |
}
|
|
262 |
|
|
263 |
final static int getEntry(Vector list, String name, boolean fwd) {
|
|
264 |
for (int i = 0; i < list.size(); i++) {
|
|
265 |
EntryPair pair = (EntryPair)list.elementAt(i);
|
|
266 |
if (pair.fwd == fwd && pair.entryName.equals(name)) {
|
|
267 |
return i;
|
|
268 |
}
|
|
269 |
}
|
|
270 |
return UNMAPPED;
|
|
271 |
}
|
|
272 |
|
|
273 |
// ==============================================================
|
|
274 |
// constants
|
|
275 |
// ==============================================================
|
|
276 |
//sherman/Todo: is the value big enough?????
|
|
277 |
final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
|
|
278 |
final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
|
|
279 |
final static int UNMAPPED = 0xFFFFFFFF;
|
|
280 |
|
|
281 |
final static int PRIMARYORDERMASK = 0xffff0000;
|
|
282 |
final static int SECONDARYORDERMASK = 0x0000ff00;
|
|
283 |
final static int TERTIARYORDERMASK = 0x000000ff;
|
|
284 |
final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
|
|
285 |
final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
|
|
286 |
final static int PRIMARYORDERSHIFT = 16;
|
|
287 |
final static int SECONDARYORDERSHIFT = 8;
|
|
288 |
|
|
289 |
// ==============================================================
|
|
290 |
// instance variables
|
|
291 |
// ==============================================================
|
|
292 |
private String rules = null;
|
|
293 |
private boolean frenchSec = false;
|
|
294 |
private boolean seAsianSwapping = false;
|
|
295 |
|
|
296 |
private UCompactIntArray mapping = null;
|
|
297 |
private Vector contractTable = null;
|
|
298 |
private Vector expandTable = null;
|
|
299 |
private IntHashtable contractFlags = null;
|
|
300 |
|
|
301 |
private short maxSecOrder = 0;
|
|
302 |
private short maxTerOrder = 0;
|
|
303 |
}
|