author | naoto |
Tue, 15 Oct 2019 09:25:59 -0700 | |
changeset 58603 | 2312d1a04c49 |
parent 58242 | 94bb65cb37d3 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
58242
94bb65cb37d3
8230648: Replace @exception tag with @throws in java.base
jboes
parents:
53018
diff
changeset
|
2 |
* Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
/* |
|
27 |
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved |
|
28 |
* (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved |
|
29 |
* |
|
30 |
* The original version of this source code and documentation is copyrighted |
|
31 |
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These |
|
32 |
* materials are provided under terms of a License Agreement between Taligent |
|
33 |
* and Sun. This technology is protected by multiple US and International |
|
34 |
* patents. This notice and attribution to Taligent may not be removed. |
|
35 |
* Taligent is a registered trademark of Taligent, Inc. |
|
36 |
* |
|
37 |
*/ |
|
38 |
||
39 |
package java.text; |
|
40 |
||
41 |
import java.util.ArrayList; |
|
42 |
||
43 |
/** |
|
44 |
* Utility class for normalizing and merging patterns for collation. |
|
45 |
* Patterns are strings of the form <entry>*, where <entry> has the |
|
46 |
* form: |
|
47 |
* <pattern> := <entry>* |
|
48 |
* <entry> := <separator><chars>{"/"<extension>} |
|
49 |
* <separator> := "=", ",", ";", "<", "&" |
|
50 |
* <chars>, and <extension> are both arbitrary strings. |
|
51 |
* unquoted whitespaces are ignored. |
|
52 |
* 'xxx' can be used to quote characters |
|
53 |
* One difference from Collator is that & is used to reset to a current |
|
54 |
* point. Or, in other words, it introduces a new sequence which is to |
|
55 |
* be added to the old. |
|
56 |
* That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR |
|
57 |
* "a < b < d & b < c" |
|
58 |
* XXX: make '' be a single quote. |
|
59 |
* @see PatternEntry |
|
60 |
* @author Mark Davis, Helena Shih |
|
61 |
*/ |
|
62 |
||
63 |
final class MergeCollation { |
|
64 |
||
65 |
/** |
|
66 |
* Creates from a pattern |
|
58242
94bb65cb37d3
8230648: Replace @exception tag with @throws in java.base
jboes
parents:
53018
diff
changeset
|
67 |
* @throws ParseException If the input pattern is incorrect. |
2 | 68 |
*/ |
69 |
public MergeCollation(String pattern) throws ParseException |
|
70 |
{ |
|
71 |
for (int i = 0; i < statusArray.length; i++) |
|
72 |
statusArray[i] = 0; |
|
73 |
setPattern(pattern); |
|
74 |
} |
|
75 |
||
76 |
/** |
|
77 |
* recovers current pattern |
|
78 |
*/ |
|
79 |
public String getPattern() { |
|
80 |
return getPattern(true); |
|
81 |
} |
|
82 |
||
83 |
/** |
|
84 |
* recovers current pattern. |
|
85 |
* @param withWhiteSpace puts spacing around the entries, and \n |
|
86 |
* before & and < |
|
87 |
*/ |
|
88 |
public String getPattern(boolean withWhiteSpace) { |
|
89 |
StringBuffer result = new StringBuffer(); |
|
90 |
PatternEntry tmp = null; |
|
12848 | 91 |
ArrayList<PatternEntry> extList = null; |
2 | 92 |
int i; |
93 |
for (i = 0; i < patterns.size(); ++i) { |
|
12848 | 94 |
PatternEntry entry = patterns.get(i); |
53018
8bf9268df0e2
8215281: Use String.isEmpty() when applicable in java.base
redestad
parents:
47216
diff
changeset
|
95 |
if (!entry.extension.isEmpty()) { |
2 | 96 |
if (extList == null) |
12848 | 97 |
extList = new ArrayList<>(); |
2 | 98 |
extList.add(entry); |
99 |
} else { |
|
100 |
if (extList != null) { |
|
101 |
PatternEntry last = findLastWithNoExtension(i-1); |
|
102 |
for (int j = extList.size() - 1; j >= 0 ; j--) { |
|
12848 | 103 |
tmp = extList.get(j); |
2 | 104 |
tmp.addToBuffer(result, false, withWhiteSpace, last); |
105 |
} |
|
106 |
extList = null; |
|
107 |
} |
|
108 |
entry.addToBuffer(result, false, withWhiteSpace, null); |
|
109 |
} |
|
110 |
} |
|
111 |
if (extList != null) { |
|
112 |
PatternEntry last = findLastWithNoExtension(i-1); |
|
113 |
for (int j = extList.size() - 1; j >= 0 ; j--) { |
|
12848 | 114 |
tmp = extList.get(j); |
2 | 115 |
tmp.addToBuffer(result, false, withWhiteSpace, last); |
116 |
} |
|
117 |
extList = null; |
|
118 |
} |
|
119 |
return result.toString(); |
|
120 |
} |
|
121 |
||
122 |
private final PatternEntry findLastWithNoExtension(int i) { |
|
123 |
for (--i;i >= 0; --i) { |
|
12848 | 124 |
PatternEntry entry = patterns.get(i); |
53018
8bf9268df0e2
8215281: Use String.isEmpty() when applicable in java.base
redestad
parents:
47216
diff
changeset
|
125 |
if (entry.extension.isEmpty()) { |
2 | 126 |
return entry; |
127 |
} |
|
128 |
} |
|
129 |
return null; |
|
130 |
} |
|
131 |
||
132 |
/** |
|
133 |
* emits the pattern for collation builder. |
|
134 |
* @return emits the string in the format understable to the collation |
|
135 |
* builder. |
|
136 |
*/ |
|
137 |
public String emitPattern() { |
|
138 |
return emitPattern(true); |
|
139 |
} |
|
140 |
||
141 |
/** |
|
142 |
* emits the pattern for collation builder. |
|
143 |
* @param withWhiteSpace puts spacing around the entries, and \n |
|
144 |
* before & and < |
|
145 |
* @return emits the string in the format understable to the collation |
|
146 |
* builder. |
|
147 |
*/ |
|
148 |
public String emitPattern(boolean withWhiteSpace) { |
|
149 |
StringBuffer result = new StringBuffer(); |
|
150 |
for (int i = 0; i < patterns.size(); ++i) |
|
151 |
{ |
|
12848 | 152 |
PatternEntry entry = patterns.get(i); |
2 | 153 |
if (entry != null) { |
154 |
entry.addToBuffer(result, true, withWhiteSpace, null); |
|
155 |
} |
|
156 |
} |
|
157 |
return result.toString(); |
|
158 |
} |
|
159 |
||
160 |
/** |
|
161 |
* sets the pattern. |
|
162 |
*/ |
|
163 |
public void setPattern(String pattern) throws ParseException |
|
164 |
{ |
|
165 |
patterns.clear(); |
|
166 |
addPattern(pattern); |
|
167 |
} |
|
168 |
||
169 |
/** |
|
170 |
* adds a pattern to the current one. |
|
171 |
* @param pattern the new pattern to be added |
|
172 |
*/ |
|
173 |
public void addPattern(String pattern) throws ParseException |
|
174 |
{ |
|
175 |
if (pattern == null) |
|
176 |
return; |
|
177 |
||
178 |
PatternEntry.Parser parser = new PatternEntry.Parser(pattern); |
|
179 |
||
180 |
PatternEntry entry = parser.next(); |
|
181 |
while (entry != null) { |
|
182 |
fixEntry(entry); |
|
183 |
entry = parser.next(); |
|
184 |
} |
|
185 |
} |
|
186 |
||
187 |
/** |
|
188 |
* gets count of separate entries |
|
189 |
* @return the size of pattern entries |
|
190 |
*/ |
|
191 |
public int getCount() { |
|
192 |
return patterns.size(); |
|
193 |
} |
|
194 |
||
195 |
/** |
|
196 |
* gets count of separate entries |
|
197 |
* @param index the offset of the desired pattern entry |
|
198 |
* @return the requested pattern entry |
|
199 |
*/ |
|
200 |
public PatternEntry getItemAt(int index) { |
|
12848 | 201 |
return patterns.get(index); |
2 | 202 |
} |
203 |
||
204 |
//============================================================ |
|
205 |
// privates |
|
206 |
//============================================================ |
|
12848 | 207 |
ArrayList<PatternEntry> patterns = new ArrayList<>(); // a list of PatternEntries |
2 | 208 |
|
209 |
private transient PatternEntry saveEntry = null; |
|
210 |
private transient PatternEntry lastEntry = null; |
|
211 |
||
212 |
// This is really used as a local variable inside fixEntry, but we cache |
|
213 |
// it here to avoid newing it up every time the method is called. |
|
214 |
private transient StringBuffer excess = new StringBuffer(); |
|
215 |
||
216 |
// |
|
217 |
// When building a MergeCollation, we need to do lots of searches to see |
|
218 |
// whether a given entry is already in the table. Since we're using an |
|
219 |
// array, this would make the algorithm O(N*N). To speed things up, we |
|
220 |
// use this bit array to remember whether the array contains any entries |
|
221 |
// starting with each Unicode character. If not, we can avoid the search. |
|
222 |
// Using BitSet would make this easier, but it's significantly slower. |
|
223 |
// |
|
224 |
private transient byte[] statusArray = new byte[8192]; |
|
225 |
private final byte BITARRAYMASK = (byte)0x1; |
|
226 |
private final int BYTEPOWER = 3; |
|
227 |
private final int BYTEMASK = (1 << BYTEPOWER) - 1; |
|
228 |
||
229 |
/* |
|
230 |
If the strength is RESET, then just change the lastEntry to |
|
231 |
be the current. (If the current is not in patterns, signal an error). |
|
232 |
If not, then remove the current entry, and add it after lastEntry |
|
233 |
(which is usually at the end). |
|
234 |
*/ |
|
235 |
private final void fixEntry(PatternEntry newEntry) throws ParseException |
|
236 |
{ |
|
237 |
// check to see whether the new entry has the same characters as the previous |
|
238 |
// entry did (this can happen when a pattern declaring a difference between two |
|
239 |
// strings that are canonically equivalent is normalized). If so, and the strength |
|
240 |
// is anything other than IDENTICAL or RESET, throw an exception (you can't |
|
241 |
// declare a string to be unequal to itself). --rtg 5/24/99 |
|
242 |
if (lastEntry != null && newEntry.chars.equals(lastEntry.chars) |
|
243 |
&& newEntry.extension.equals(lastEntry.extension)) { |
|
244 |
if (newEntry.strength != Collator.IDENTICAL |
|
245 |
&& newEntry.strength != PatternEntry.RESET) { |
|
246 |
throw new ParseException("The entries " + lastEntry + " and " |
|
247 |
+ newEntry + " are adjacent in the rules, but have conflicting " |
|
248 |
+ "strengths: A character can't be unequal to itself.", -1); |
|
249 |
} else { |
|
250 |
// otherwise, just skip this entry and behave as though you never saw it |
|
251 |
return; |
|
252 |
} |
|
253 |
} |
|
254 |
||
255 |
boolean changeLastEntry = true; |
|
256 |
if (newEntry.strength != PatternEntry.RESET) { |
|
257 |
int oldIndex = -1; |
|
258 |
||
259 |
if ((newEntry.chars.length() == 1)) { |
|
260 |
||
261 |
char c = newEntry.chars.charAt(0); |
|
262 |
int statusIndex = c >> BYTEPOWER; |
|
263 |
byte bitClump = statusArray[statusIndex]; |
|
264 |
byte setBit = (byte)(BITARRAYMASK << (c & BYTEMASK)); |
|
265 |
||
266 |
if (bitClump != 0 && (bitClump & setBit) != 0) { |
|
267 |
oldIndex = patterns.lastIndexOf(newEntry); |
|
268 |
} else { |
|
269 |
// We're going to add an element that starts with this |
|
270 |
// character, so go ahead and set its bit. |
|
271 |
statusArray[statusIndex] = (byte)(bitClump | setBit); |
|
272 |
} |
|
273 |
} else { |
|
274 |
oldIndex = patterns.lastIndexOf(newEntry); |
|
275 |
} |
|
276 |
if (oldIndex != -1) { |
|
277 |
patterns.remove(oldIndex); |
|
278 |
} |
|
279 |
||
280 |
excess.setLength(0); |
|
281 |
int lastIndex = findLastEntry(lastEntry, excess); |
|
282 |
||
283 |
if (excess.length() != 0) { |
|
284 |
newEntry.extension = excess + newEntry.extension; |
|
285 |
if (lastIndex != patterns.size()) { |
|
286 |
lastEntry = saveEntry; |
|
287 |
changeLastEntry = false; |
|
288 |
} |
|
289 |
} |
|
290 |
if (lastIndex == patterns.size()) { |
|
291 |
patterns.add(newEntry); |
|
292 |
saveEntry = newEntry; |
|
293 |
} else { |
|
294 |
patterns.add(lastIndex, newEntry); |
|
295 |
} |
|
296 |
} |
|
297 |
if (changeLastEntry) { |
|
298 |
lastEntry = newEntry; |
|
299 |
} |
|
300 |
} |
|
301 |
||
302 |
private final int findLastEntry(PatternEntry entry, |
|
303 |
StringBuffer excessChars) throws ParseException |
|
304 |
{ |
|
305 |
if (entry == null) |
|
306 |
return 0; |
|
307 |
||
308 |
if (entry.strength != PatternEntry.RESET) { |
|
309 |
// Search backwards for string that contains this one; |
|
310 |
// most likely entry is last one |
|
311 |
||
312 |
int oldIndex = -1; |
|
313 |
if ((entry.chars.length() == 1)) { |
|
314 |
int index = entry.chars.charAt(0) >> BYTEPOWER; |
|
315 |
if ((statusArray[index] & |
|
316 |
(BITARRAYMASK << (entry.chars.charAt(0) & BYTEMASK))) != 0) { |
|
317 |
oldIndex = patterns.lastIndexOf(entry); |
|
318 |
} |
|
319 |
} else { |
|
320 |
oldIndex = patterns.lastIndexOf(entry); |
|
321 |
} |
|
322 |
if ((oldIndex == -1)) |
|
323 |
throw new ParseException("couldn't find last entry: " |
|
324 |
+ entry, oldIndex); |
|
325 |
return oldIndex + 1; |
|
326 |
} else { |
|
327 |
int i; |
|
328 |
for (i = patterns.size() - 1; i >= 0; --i) { |
|
12848 | 329 |
PatternEntry e = patterns.get(i); |
2 | 330 |
if (e.chars.regionMatches(0,entry.chars,0, |
331 |
e.chars.length())) { |
|
31471
ae27c6f1d8bf
8077242: (str) Optimize AbstractStringBuilder.append(CharSequence, int, int) for String argument
igerasim
parents:
25859
diff
changeset
|
332 |
excessChars.append(entry.chars, e.chars.length(), |
ae27c6f1d8bf
8077242: (str) Optimize AbstractStringBuilder.append(CharSequence, int, int) for String argument
igerasim
parents:
25859
diff
changeset
|
333 |
entry.chars.length()); |
2 | 334 |
break; |
335 |
} |
|
336 |
} |
|
337 |
if (i == -1) |
|
338 |
throw new ParseException("couldn't find: " + entry, i); |
|
339 |
return i + 1; |
|
340 |
} |
|
341 |
} |
|
342 |
} |