author | naoto |
Tue, 15 Oct 2019 09:25:59 -0700 | |
changeset 58603 | 2312d1a04c49 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
14342
8435a30053c1
7197491: update copyright year to match last edit in jdk8 jdk repository
alanb
parents:
14014
diff
changeset
|
2 |
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
/* |
|
27 |
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved |
|
28 |
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved |
|
29 |
* |
|
30 |
* The original version of this source code and documentation is copyrighted |
|
31 |
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These |
|
32 |
* materials are provided under terms of a License Agreement between Taligent |
|
33 |
* and Sun. This technology is protected by multiple US and International |
|
34 |
* patents. This notice and attribution to Taligent may not be removed. |
|
35 |
* Taligent is a registered trademark of Taligent, Inc. |
|
36 |
* |
|
37 |
*/ |
|
38 |
||
39 |
package java.text; |
|
40 |
||
41 |
import java.util.Vector; |
|
42 |
import sun.text.UCompactIntArray; |
|
43 |
import sun.text.IntHashtable; |
|
44 |
||
45 |
/** |
|
46 |
* This class contains the static state of a RuleBasedCollator: The various |
|
47 |
* tables that are used by the collation routines. Several RuleBasedCollators |
|
48 |
* can share a single RBCollationTables object, easing memory requirements and |
|
49 |
* improving performance. |
|
50 |
*/ |
|
51 |
final class RBCollationTables { |
|
52 |
//=========================================================================================== |
|
53 |
// The following diagram shows the data structure of the RBCollationTables object. |
|
54 |
// Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6. |
|
55 |
// "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...". |
|
56 |
// What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and |
|
57 |
// sorts 'o-umlaut' as if it's always expanded with 'e'. |
|
58 |
// |
|
59 |
// mapping table contracting list expanding list |
|
60 |
// (contains all unicode char |
|
61 |
// entries) ___ ____________ _________________________ |
|
62 |
// ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')| |
|
63 |
// |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------| |
|
64 |
// |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : | |
|
65 |
// |____:___| | |_:_| |------------| | |-------------------------| |
|
66 |
// |____:___| | |'cH'|v('cH')| | | : | |
|
67 |
// |__'a'___|-> v('a') | |------------| | |-------------------------| |
|
68 |
// |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : | |
|
69 |
// |____:___| | |------------| | |-------------------------| |
|
70 |
// |____:___| | |'CH'|v('CH')| | | : | |
|
71 |
// |___'c'__|---------------- ------------ | |-------------------------| |
|
72 |
// |____:___| | | : | |
|
73 |
// |o-umlaut|---------------------------------------- |_________________________| |
|
74 |
// |____:___| |
|
75 |
// |
|
76 |
// Noted by Helena Shih on 6/23/97 |
|
77 |
//============================================================================================ |
|
78 |
||
79 |
public RBCollationTables(String rules, int decmp) throws ParseException { |
|
80 |
this.rules = rules; |
|
81 |
||
82 |
RBTableBuilder builder = new RBTableBuilder(new BuildAPI()); |
|
83 |
builder.build(rules, decmp); // this object is filled in through |
|
84 |
// the BuildAPI object |
|
85 |
} |
|
86 |
||
87 |
final class BuildAPI { |
|
88 |
/** |
|
89 |
* Private constructor. Prevents anyone else besides RBTableBuilder |
|
90 |
* from gaining direct access to the internals of this class. |
|
91 |
*/ |
|
92 |
private BuildAPI() { |
|
93 |
} |
|
94 |
||
95 |
/** |
|
96 |
* This function is used by RBTableBuilder to fill in all the members of this |
|
97 |
* object. (Effectively, the builder class functions as a "friend" of this |
|
98 |
* class, but to avoid changing too much of the logic, it carries around "shadow" |
|
99 |
* copies of all these variables until the end of the build process and then |
|
100 |
* copies them en masse into the actual tables object once all the construction |
|
101 |
* logic is complete. This function does that "copying en masse". |
|
102 |
* @param f2ary The value for frenchSec (the French-secondary flag) |
|
103 |
* @param swap The value for SE Asian swapping rule |
|
104 |
* @param map The collator's character-mapping table (the value for mapping) |
|
105 |
* @param cTbl The collator's contracting-character table (the value for contractTable) |
|
106 |
* @param eTbl The collator's expanding-character table (the value for expandTable) |
|
107 |
* @param cFlgs The hash table of characters that participate in contracting- |
|
108 |
* character sequences (the value for contractFlags) |
|
109 |
* @param mso The value for maxSecOrder |
|
110 |
* @param mto The value for maxTerOrder |
|
111 |
*/ |
|
112 |
void fillInTables(boolean f2ary, |
|
113 |
boolean swap, |
|
114 |
UCompactIntArray map, |
|
12848 | 115 |
Vector<Vector<EntryPair>> cTbl, |
116 |
Vector<int[]> eTbl, |
|
2 | 117 |
IntHashtable cFlgs, |
118 |
short mso, |
|
119 |
short mto) { |
|
120 |
frenchSec = f2ary; |
|
121 |
seAsianSwapping = swap; |
|
122 |
mapping = map; |
|
123 |
contractTable = cTbl; |
|
124 |
expandTable = eTbl; |
|
125 |
contractFlags = cFlgs; |
|
126 |
maxSecOrder = mso; |
|
127 |
maxTerOrder = mto; |
|
128 |
} |
|
129 |
} |
|
130 |
||
131 |
/** |
|
132 |
* Gets the table-based rules for the collation object. |
|
133 |
* @return returns the collation rules that the table collation object |
|
134 |
* was created from. |
|
135 |
*/ |
|
136 |
public String getRules() |
|
137 |
{ |
|
138 |
return rules; |
|
139 |
} |
|
140 |
||
141 |
public boolean isFrenchSec() { |
|
142 |
return frenchSec; |
|
143 |
} |
|
144 |
||
145 |
public boolean isSEAsianSwapping() { |
|
146 |
return seAsianSwapping; |
|
147 |
} |
|
148 |
||
149 |
// ============================================================== |
|
150 |
// internal (for use by CollationElementIterator) |
|
151 |
// ============================================================== |
|
152 |
||
153 |
/** |
|
154 |
* Get the entry of hash table of the contracting string in the collation |
|
155 |
* table. |
|
156 |
* @param ch the starting character of the contracting string |
|
157 |
*/ |
|
12848 | 158 |
Vector<EntryPair> getContractValues(int ch) |
2 | 159 |
{ |
160 |
int index = mapping.elementAt(ch); |
|
161 |
return getContractValuesImpl(index - CONTRACTCHARINDEX); |
|
162 |
} |
|
163 |
||
164 |
//get contract values from contractTable by index |
|
12848 | 165 |
private Vector<EntryPair> getContractValuesImpl(int index) |
2 | 166 |
{ |
167 |
if (index >= 0) |
|
168 |
{ |
|
12848 | 169 |
return contractTable.elementAt(index); |
2 | 170 |
} |
171 |
else // not found |
|
172 |
{ |
|
173 |
return null; |
|
174 |
} |
|
175 |
} |
|
176 |
||
177 |
/** |
|
178 |
* Returns true if this character appears anywhere in a contracting |
|
179 |
* character sequence. (Used by CollationElementIterator.setOffset().) |
|
180 |
*/ |
|
181 |
boolean usedInContractSeq(int c) { |
|
182 |
return contractFlags.get(c) == 1; |
|
183 |
} |
|
184 |
||
185 |
/** |
|
186 |
* Return the maximum length of any expansion sequences that end |
|
187 |
* with the specified comparison order. |
|
188 |
* |
|
189 |
* @param order a collation order returned by previous or next. |
|
190 |
* @return the maximum length of any expansion seuences ending |
|
191 |
* with the specified order. |
|
192 |
* |
|
193 |
* @see CollationElementIterator#getMaxExpansion |
|
194 |
*/ |
|
14014 | 195 |
int getMaxExpansion(int order) { |
2 | 196 |
int result = 1; |
197 |
||
198 |
if (expandTable != null) { |
|
199 |
// Right now this does a linear search through the entire |
|
14014 | 200 |
// expansion table. If a collator had a large number of expansions, |
2 | 201 |
// this could cause a performance problem, but in practise that |
202 |
// rarely happens |
|
203 |
for (int i = 0; i < expandTable.size(); i++) { |
|
12848 | 204 |
int[] valueList = expandTable.elementAt(i); |
2 | 205 |
int length = valueList.length; |
206 |
||
207 |
if (length > result && valueList[length-1] == order) { |
|
208 |
result = length; |
|
209 |
} |
|
210 |
} |
|
211 |
} |
|
212 |
||
213 |
return result; |
|
214 |
} |
|
215 |
||
216 |
/** |
|
14014 | 217 |
* Get the entry of hash table of the expanding string in the collation |
218 |
* table. |
|
219 |
* @param idx the index of the expanding string value list |
|
2 | 220 |
*/ |
14014 | 221 |
final int[] getExpandValueList(int idx) { |
222 |
return expandTable.elementAt(idx - EXPANDCHARINDEX); |
|
2 | 223 |
} |
224 |
||
225 |
/** |
|
14014 | 226 |
* Get the comarison order of a character from the collation table. |
227 |
* @return the comparison order of a character. |
|
2 | 228 |
*/ |
14014 | 229 |
int getUnicodeOrder(int ch) { |
2 | 230 |
return mapping.elementAt(ch); |
231 |
} |
|
232 |
||
233 |
short getMaxSecOrder() { |
|
234 |
return maxSecOrder; |
|
235 |
} |
|
236 |
||
237 |
short getMaxTerOrder() { |
|
238 |
return maxTerOrder; |
|
239 |
} |
|
240 |
||
241 |
/** |
|
242 |
* Reverse a string. |
|
243 |
*/ |
|
244 |
//shemran/Note: this is used for secondary order value reverse, no |
|
245 |
// need to consider supplementary pair. |
|
246 |
static void reverse (StringBuffer result, int from, int to) |
|
247 |
{ |
|
248 |
int i = from; |
|
249 |
char swap; |
|
250 |
||
251 |
int j = to - 1; |
|
252 |
while (i < j) { |
|
253 |
swap = result.charAt(i); |
|
254 |
result.setCharAt(i, result.charAt(j)); |
|
255 |
result.setCharAt(j, swap); |
|
256 |
i++; |
|
257 |
j--; |
|
258 |
} |
|
259 |
} |
|
260 |
||
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
261 |
static final int getEntry(Vector<EntryPair> list, String name, boolean fwd) { |
2 | 262 |
for (int i = 0; i < list.size(); i++) { |
12848 | 263 |
EntryPair pair = list.elementAt(i); |
2 | 264 |
if (pair.fwd == fwd && pair.entryName.equals(name)) { |
265 |
return i; |
|
266 |
} |
|
267 |
} |
|
268 |
return UNMAPPED; |
|
269 |
} |
|
270 |
||
271 |
// ============================================================== |
|
272 |
// constants |
|
273 |
// ============================================================== |
|
274 |
//sherman/Todo: is the value big enough????? |
|
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
275 |
static final int EXPANDCHARINDEX = 0x7E000000; // Expand index follows |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
276 |
static final int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
277 |
static final int UNMAPPED = 0xFFFFFFFF; |
2 | 278 |
|
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
279 |
static final int PRIMARYORDERMASK = 0xffff0000; |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
280 |
static final int SECONDARYORDERMASK = 0x0000ff00; |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
281 |
static final int TERTIARYORDERMASK = 0x000000ff; |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
282 |
static final int PRIMARYDIFFERENCEONLY = 0xffff0000; |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
283 |
static final int SECONDARYDIFFERENCEONLY = 0xffffff00; |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
284 |
static final int PRIMARYORDERSHIFT = 16; |
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
285 |
static final int SECONDARYORDERSHIFT = 8; |
2 | 286 |
|
287 |
// ============================================================== |
|
288 |
// instance variables |
|
289 |
// ============================================================== |
|
290 |
private String rules = null; |
|
291 |
private boolean frenchSec = false; |
|
292 |
private boolean seAsianSwapping = false; |
|
293 |
||
294 |
private UCompactIntArray mapping = null; |
|
12848 | 295 |
private Vector<Vector<EntryPair>> contractTable = null; |
296 |
private Vector<int[]> expandTable = null; |
|
2 | 297 |
private IntHashtable contractFlags = null; |
298 |
||
299 |
private short maxSecOrder = 0; |
|
300 |
private short maxTerOrder = 0; |
|
301 |
} |