author | pliden |
Thu, 26 Sep 2019 13:56:58 +0200 | |
changeset 58355 | de246fd65587 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
23010
6dadb192ad81
8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013
lana
parents:
21805
diff
changeset
|
2 |
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package build.tools.generatecharacter; |
|
27 |
||
28 |
import java.io.*; |
|
29 |
import java.util.*; |
|
30 |
import java.lang.*; |
|
31 |
||
32 |
/** |
|
33 |
* SpecialCaseMap has the responsibility of storing the |
|
34 |
* 1:M, locale-sensitive, and context sensitive case mappings |
|
35 |
* that occur when uppercasing Unicode 4.0 characters. This class can |
|
36 |
* read and parse the SpecialCasing.txt file that contains those mappings. |
|
37 |
* <p> |
|
38 |
* A single SpecialCaseMap contains the mapping for one character. |
|
39 |
* <p> |
|
40 |
* @author John O'Conner |
|
41 |
*/ |
|
10110 | 42 |
public class SpecialCaseMap implements Comparable<SpecialCaseMap> { |
2 | 43 |
|
44 |
SpecialCaseMap() { |
|
45 |
chSource = 0xFFFF; |
|
46 |
} |
|
47 |
||
48 |
||
49 |
/** |
|
50 |
* Read and parse a Unicode special case map file. |
|
51 |
* |
|
52 |
* @param file a file specifying the Unicode special case mappings |
|
53 |
* @return an array of SpecialCaseMap objects, one for each line of the |
|
54 |
* special case map data file that could be successfully parsed |
|
55 |
*/ |
|
56 |
||
57 |
public static SpecialCaseMap[] readSpecFile(File file, int plane) throws FileNotFoundException { |
|
10110 | 58 |
ArrayList<SpecialCaseMap> caseMaps = new ArrayList<>(150); |
2 | 59 |
int count = 0; |
60 |
BufferedReader f = new BufferedReader(new FileReader(file)); |
|
61 |
String line = null; |
|
62 |
loop: |
|
63 |
while(true) { |
|
64 |
try { |
|
65 |
line = f.readLine(); |
|
66 |
} |
|
67 |
catch (IOException e) { break loop; } |
|
68 |
if (line == null) break loop; |
|
69 |
SpecialCaseMap item = parse(line.trim()); |
|
70 |
if (item != null) { |
|
71 |
if(item.getCharSource() >> 16 < plane) continue; |
|
10110 | 72 |
if(item.getCharSource() >> 16 > plane) break; |
2 | 73 |
caseMaps.add(item); |
74 |
++count; |
|
75 |
} |
|
76 |
||
77 |
} |
|
78 |
caseMaps.trimToSize(); |
|
79 |
SpecialCaseMap[] result = new SpecialCaseMap[caseMaps.size()]; |
|
80 |
caseMaps.toArray(result); |
|
81 |
Arrays.sort(result); |
|
82 |
return result; |
|
83 |
||
84 |
} |
|
85 |
||
10110 | 86 |
/** |
2 | 87 |
* Given one line of a Unicode special casing data file as a String, parse the line |
88 |
* and return a SpecialCaseMap object that contains the case mapping. |
|
89 |
* |
|
90 |
* @param s a line of the Unicode special case map data file to be parsed |
|
91 |
* @return a SpecialCaseMap object, or null if the parsing process failed for some reason |
|
92 |
*/ |
|
93 |
public static SpecialCaseMap parse(String s) { |
|
94 |
SpecialCaseMap spec = null; |
|
95 |
String[] tokens = new String[REQUIRED_FIELDS]; |
|
96 |
if ( s != null && s.length() != 0 && s.charAt(0) != '#') { |
|
97 |
try { |
|
98 |
int x = 0, tokenStart = 0, tokenEnd = 0; |
|
99 |
for (x=0; x<REQUIRED_FIELDS-1; x++) { |
|
100 |
tokenEnd = s.indexOf(';', tokenStart); |
|
101 |
tokens[x] = s.substring(tokenStart, tokenEnd); |
|
102 |
tokenStart = tokenEnd+1; |
|
103 |
} |
|
104 |
tokens[x] = s.substring(tokenStart); |
|
105 |
||
106 |
if(tokens[FIELD_CONDITIONS].indexOf(';') == -1) { |
|
107 |
spec = new SpecialCaseMap(); |
|
108 |
spec.setCharSource(parseChar(tokens[FIELD_SOURCE])); |
|
109 |
spec.setUpperCaseMap(parseCaseMap(tokens[FIELD_UPPERCASE])); |
|
110 |
spec.setLowerCaseMap(parseCaseMap(tokens[FIELD_LOWERCASE])); |
|
111 |
spec.setTitleCaseMap(parseCaseMap(tokens[FIELD_TITLECASE])); |
|
112 |
spec.setLocale(parseLocale(tokens[FIELD_CONDITIONS])); |
|
113 |
spec.setContext(parseContext(tokens[FIELD_CONDITIONS])); |
|
114 |
} |
|
115 |
} |
|
116 |
catch(Exception e) { |
|
117 |
spec = null; |
|
118 |
System.out.println("Error parsing spec line."); |
|
119 |
} |
|
120 |
} |
|
121 |
return spec; |
|
122 |
} |
|
123 |
||
124 |
static int parseChar(String token) throws NumberFormatException { |
|
125 |
return Integer.parseInt(token, 16); |
|
126 |
} |
|
127 |
||
128 |
static char[] parseCaseMap(String token ) throws NumberFormatException { |
|
129 |
int pos = 0; |
|
130 |
StringBuffer buff = new StringBuffer(); |
|
131 |
int start = 0, end = 0; |
|
132 |
while(pos < token.length() ){ |
|
133 |
while(Character.isSpaceChar(token.charAt(pos++))); |
|
134 |
--pos; |
|
135 |
start = pos; |
|
136 |
while(pos < token.length() && !Character.isSpaceChar(token.charAt(pos))) pos++; |
|
137 |
end = pos; |
|
138 |
int ch = parseChar(token.substring(start,end)); |
|
139 |
if (ch > 0xFFFF) { |
|
140 |
buff.append(getHighSurrogate(ch)); |
|
141 |
buff.append(getLowSurrogate(ch)); |
|
142 |
} else { |
|
143 |
buff.append((char)ch); |
|
144 |
} |
|
145 |
} |
|
146 |
char[] map = new char[buff.length()]; |
|
147 |
buff.getChars(0, buff.length(), map, 0); |
|
148 |
return map; |
|
149 |
} |
|
150 |
||
151 |
static Locale parseLocale(String token) { |
|
152 |
return null; |
|
153 |
} |
|
154 |
||
155 |
static String[] parseContext(String token) { |
|
156 |
return null; |
|
157 |
} |
|
158 |
||
159 |
static int find(int ch, SpecialCaseMap[] map) { |
|
160 |
if ((map == null) || (map.length == 0)) { |
|
161 |
return -1; |
|
162 |
} |
|
163 |
int top, bottom, current; |
|
164 |
bottom = 0; |
|
165 |
top = map.length; |
|
166 |
current = top/2; |
|
167 |
// invariant: top > current >= bottom && ch >= map.chSource |
|
168 |
while (top - bottom > 1) { |
|
169 |
if (ch >= map[current].getCharSource()) { |
|
170 |
bottom = current; |
|
171 |
} else { |
|
172 |
top = current; |
|
173 |
} |
|
174 |
current = (top + bottom) / 2; |
|
175 |
} |
|
176 |
if (ch == map[current].getCharSource()) return current; |
|
177 |
else return -1; |
|
178 |
} |
|
179 |
||
10110 | 180 |
/* |
2 | 181 |
* Extracts and returns the high surrogate value from a UTF-32 code point. |
182 |
* If argument is a BMP character, then it is converted to a char and returned; |
|
183 |
* otherwise the high surrogate value is extracted. |
|
184 |
* @param codePoint a UTF-32 codePoint with value greater than 0xFFFF. |
|
185 |
* @return the high surrogate value that helps create <code>codePoint</code>; else |
|
186 |
* the char representation of <code>codePoint</code> if it is a BMP character. |
|
10110 | 187 |
* @since 1.5 |
2 | 188 |
*/ |
10110 | 189 |
static char getHighSurrogate(int codePoint) { |
190 |
char high = (char)codePoint; |
|
191 |
if (codePoint > 0xFFFF) { |
|
192 |
high = (char)((codePoint - 0x10000)/0x0400 + 0xD800); |
|
2 | 193 |
} |
10110 | 194 |
return high; |
195 |
} |
|
2 | 196 |
|
197 |
||
10110 | 198 |
/* |
2 | 199 |
* Extracts and returns the low surrogate value from a UTF-32 code point. |
200 |
* If argument is a BMP character, then it is converted to a char and returned; |
|
201 |
* otherwise the high surrogate value is extracted. |
|
202 |
* @param codePoint a UTF-32 codePoint with value greater than 0xFFFF. |
|
203 |
* @return the low surrogate value that helps create <code>codePoint</code>; else |
|
204 |
* the char representation of <code>codePoint</code> if it is a BMP character. |
|
205 |
* @since 1.5 |
|
206 |
*/ |
|
10110 | 207 |
static char getLowSurrogate(int codePoint) { |
208 |
char low = (char)codePoint; |
|
209 |
if(codePoint > 0xFFFF) { |
|
210 |
low = (char)((codePoint - 0x10000)%0x0400 + 0xDC00); |
|
2 | 211 |
} |
10110 | 212 |
return low; |
213 |
} |
|
2 | 214 |
|
10110 | 215 |
static String hex6(int n) { |
216 |
String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase(); |
|
217 |
return "000000".substring(Math.min(6, str.length())) + str; |
|
218 |
} |
|
2 | 219 |
|
10110 | 220 |
static String hex6(char[] map){ |
221 |
StringBuffer buff = new StringBuffer(); |
|
222 |
int x=0; |
|
223 |
buff.append(hex6(map[x++])); |
|
224 |
while(x<map.length) { |
|
225 |
buff.append(" " + hex6(map[x++])); |
|
2 | 226 |
} |
10110 | 227 |
return buff.toString(); |
228 |
} |
|
2 | 229 |
|
230 |
void setCharSource(int ch) { |
|
231 |
chSource = ch; |
|
232 |
} |
|
233 |
||
234 |
void setLowerCaseMap(char[] map) { |
|
235 |
lowerCaseMap = map; |
|
236 |
} |
|
237 |
||
238 |
void setUpperCaseMap(char[] map) { |
|
239 |
upperCaseMap = map; |
|
240 |
} |
|
241 |
||
242 |
void setTitleCaseMap(char[] map) { |
|
243 |
titleCaseMap = map; |
|
244 |
} |
|
245 |
||
246 |
void setLocale(Locale locale) { |
|
247 |
this.locale = locale; |
|
248 |
} |
|
249 |
||
250 |
void setContext(String[] context) { |
|
251 |
this.context = context; |
|
252 |
} |
|
253 |
||
254 |
public int getCharSource() { |
|
255 |
return chSource; |
|
256 |
} |
|
257 |
||
258 |
public char[] getLowerCaseMap() { |
|
259 |
return lowerCaseMap; |
|
260 |
} |
|
261 |
||
262 |
public char[] getUpperCaseMap() { |
|
263 |
return upperCaseMap; |
|
264 |
} |
|
265 |
||
266 |
public char[] getTitleCaseMap() { |
|
267 |
return titleCaseMap; |
|
268 |
} |
|
269 |
||
270 |
public Locale getLocale() { |
|
271 |
return locale; |
|
272 |
} |
|
273 |
||
274 |
public String[] getContext() { |
|
275 |
return context; |
|
276 |
} |
|
277 |
||
278 |
||
279 |
int chSource; |
|
280 |
Locale locale; |
|
281 |
char[] lowerCaseMap; |
|
282 |
char[] upperCaseMap; |
|
283 |
char[] titleCaseMap; |
|
284 |
String[] context; |
|
285 |
||
286 |
/** |
|
287 |
* Fields that can be found in the SpecialCasing.txt file. |
|
288 |
*/ |
|
289 |
static int REQUIRED_FIELDS = 5; |
|
290 |
static int FIELD_SOURCE = 0; |
|
291 |
static int FIELD_LOWERCASE = 1; |
|
292 |
static int FIELD_TITLECASE = 2; |
|
293 |
static int FIELD_UPPERCASE = 3; |
|
294 |
static int FIELD_CONDITIONS = 4; |
|
295 |
||
296 |
/** |
|
297 |
* Context values |
|
298 |
*/ |
|
299 |
static String CONTEXT_FINAL = "FINAL"; |
|
300 |
static String CONTEXT_NONFINAL = "NON_FINAL"; |
|
301 |
static String CONTEXT_MODERN = "MODERN"; |
|
302 |
static String CONTEXT_NONMODERN = "NON_MODERN"; |
|
303 |
||
10110 | 304 |
public int compareTo(SpecialCaseMap otherObject) { |
305 |
if (chSource < otherObject.chSource) { |
|
2 | 306 |
return -1; |
307 |
} |
|
10110 | 308 |
else if (chSource > otherObject.chSource) { |
2 | 309 |
return 1; |
310 |
} |
|
311 |
else return 0; |
|
312 |
} |
|
313 |
||
314 |
public boolean equals(Object o1) { |
|
10110 | 315 |
if (this == o1) { |
316 |
return true; |
|
317 |
} |
|
318 |
if (o1 == null || !(o1 instanceof SpecialCaseMap)) { |
|
319 |
return false; |
|
320 |
} |
|
321 |
SpecialCaseMap other = (SpecialCaseMap)o1; |
|
322 |
boolean bEqual = false; |
|
323 |
if (0 == compareTo(other)) { |
|
324 |
bEqual = true; |
|
325 |
} |
|
2 | 326 |
return bEqual; |
327 |
} |
|
328 |
||
10110 | 329 |
public String toString() { |
330 |
StringBuffer buff = new StringBuffer(); |
|
331 |
buff.append(hex6(getCharSource())); |
|
332 |
buff.append("|" + hex6(lowerCaseMap)); |
|
333 |
buff.append("|" + hex6(upperCaseMap)); |
|
334 |
buff.append("|" + hex6(titleCaseMap)); |
|
335 |
buff.append("|" + context); |
|
336 |
return buff.toString(); |
|
337 |
} |
|
338 |
||
339 |
public int hashCode() { |
|
340 |
return chSource; |
|
341 |
} |
|
2 | 342 |
|
10110 | 343 |
public static void main(String[] args) { |
344 |
SpecialCaseMap[] spec = null; |
|
345 |
if (args.length == 2 ) { |
|
346 |
try { |
|
347 |
File file = new File(args[0]); |
|
348 |
int plane = Integer.parseInt(args[1]); |
|
349 |
spec = SpecialCaseMap.readSpecFile(file, plane); |
|
350 |
System.out.println("SpecialCaseMap[" + spec.length + "]:"); |
|
351 |
for (int x=0; x<spec.length; x++) { |
|
352 |
System.out.println(spec[x].toString()); |
|
353 |
} |
|
354 |
} |
|
355 |
catch(Exception e) { |
|
356 |
e.printStackTrace(); |
|
357 |
} |
|
2 | 358 |
} |
359 |
||
10110 | 360 |
} |
2 | 361 |
|
362 |
} |