author | sherman |
Thu, 28 Apr 2011 20:18:57 -0700 | |
changeset 9535 | d930011fd275 |
parent 7247 | 20bd166a1ad6 |
child 12300 | c795ca195227 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
7247 | 2 |
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package java.lang; |
|
27 |
||
28 |
/** |
|
29 |
* The CharacterData00 class encapsulates the large tables once found in |
|
30 |
* java.lang.Character |
|
31 |
*/ |
|
32 |
||
33 |
class CharacterData00 extends CharacterData { |
|
34 |
/* The character properties are currently encoded into 32 bits in the following manner: |
|
35 |
1 bit mirrored property |
|
36 |
4 bits directionality property |
|
37 |
9 bits signed offset used for converting case |
|
38 |
1 bit if 1, adding the signed offset converts the character to lowercase |
|
39 |
1 bit if 1, subtracting the signed offset converts the character to uppercase |
|
40 |
1 bit if 1, this character has a titlecase equivalent (possibly itself) |
|
41 |
3 bits 0 may not be part of an identifier |
|
42 |
1 ignorable control; may continue a Unicode identifier or Java identifier |
|
43 |
2 may continue a Java identifier but not a Unicode identifier (unused) |
|
44 |
3 may continue a Unicode identifier or Java identifier |
|
45 |
4 is a Java whitespace character |
|
46 |
5 may start or continue a Java identifier; |
|
47 |
may continue but not start a Unicode identifier (underscores) |
|
48 |
6 may start or continue a Java identifier but not a Unicode identifier ($) |
|
49 |
7 may start or continue a Unicode identifier or Java identifier |
|
50 |
Thus: |
|
51 |
5, 6, 7 may start a Java identifier |
|
52 |
1, 2, 3, 5, 6, 7 may continue a Java identifier |
|
53 |
7 may start a Unicode identifier |
|
54 |
1, 3, 5, 7 may continue a Unicode identifier |
|
55 |
1 is ignorable within an identifier |
|
56 |
4 is Java whitespace |
|
57 |
2 bits 0 this character has no numeric property |
|
58 |
1 adding the digit offset to the character code and then |
|
59 |
masking with 0x1F will produce the desired numeric value |
|
60 |
2 this character has a "strange" numeric value |
|
61 |
3 a Java supradecimal digit: adding the digit offset to the |
|
62 |
character code, then masking with 0x1F, then adding 10 |
|
63 |
will produce the desired numeric value |
|
64 |
5 bits digit offset |
|
65 |
5 bits character type |
|
66 |
||
67 |
The encoding of character properties is subject to change at any time. |
|
68 |
*/ |
|
69 |
||
70 |
int getProperties(int ch) { |
|
71 |
char offset = (char)ch; |
|
72 |
int props = $$Lookup(offset); |
|
73 |
return props; |
|
74 |
} |
|
75 |
||
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
76 |
int getPropertiesEx(int ch) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
77 |
char offset = (char)ch; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
78 |
int props = $$LookupEx(offset); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
79 |
return props; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
80 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
81 |
|
2 | 82 |
int getType(int ch) { |
83 |
int props = getProperties(ch); |
|
84 |
return (props & $$maskType); |
|
85 |
} |
|
86 |
||
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
87 |
boolean isOtherLowercase(int ch) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
88 |
int props = getPropertiesEx(ch); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
89 |
return (props & $$maskOtherLowercase) != 0; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
90 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
91 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
92 |
boolean isOtherUppercase(int ch) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
93 |
int props = getPropertiesEx(ch); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
94 |
return (props & $$maskOtherUppercase) != 0; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
95 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
96 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
97 |
boolean isOtherAlphabetic(int ch) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
98 |
int props = getPropertiesEx(ch); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
99 |
return (props & $$maskOtherAlphabetic) != 0; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
100 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
101 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
102 |
boolean isIdeographic(int ch) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
103 |
int props = getPropertiesEx(ch); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
104 |
return (props & $$maskIdeographic) != 0; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
105 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
7247
diff
changeset
|
106 |
|
2 | 107 |
boolean isJavaIdentifierStart(int ch) { |
108 |
int props = getProperties(ch); |
|
109 |
return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); |
|
110 |
} |
|
111 |
||
112 |
boolean isJavaIdentifierPart(int ch) { |
|
113 |
int props = getProperties(ch); |
|
114 |
return ((props & $$nonzeroJavaPart) != 0); |
|
115 |
} |
|
116 |
||
117 |
boolean isUnicodeIdentifierStart(int ch) { |
|
118 |
int props = getProperties(ch); |
|
119 |
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart); |
|
120 |
} |
|
121 |
||
122 |
boolean isUnicodeIdentifierPart(int ch) { |
|
123 |
int props = getProperties(ch); |
|
124 |
return ((props & $$maskUnicodePart) != 0); |
|
125 |
} |
|
126 |
||
127 |
boolean isIdentifierIgnorable(int ch) { |
|
128 |
int props = getProperties(ch); |
|
129 |
return ((props & $$maskIdentifierInfo) == $$valueIgnorable); |
|
130 |
} |
|
131 |
||
132 |
int toLowerCase(int ch) { |
|
133 |
int mapChar = ch; |
|
134 |
int val = getProperties(ch); |
|
135 |
||
136 |
if ((val & $$maskLowerCase) != 0) { |
|
137 |
if ((val & $$maskCaseOffset) == $$maskCaseOffset) { |
|
138 |
switch(ch) { |
|
139 |
// map the offset overflow chars |
|
7247 | 140 |
case 0x0130 : mapChar = 0x0069; break; |
2 | 141 |
case 0x2126 : mapChar = 0x03C9; break; |
142 |
case 0x212A : mapChar = 0x006B; break; |
|
143 |
case 0x212B : mapChar = 0x00E5; break; |
|
144 |
// map the titlecase chars with both a 1:M uppercase map |
|
145 |
// and a lowercase map |
|
146 |
case 0x1F88 : mapChar = 0x1F80; break; |
|
147 |
case 0x1F89 : mapChar = 0x1F81; break; |
|
148 |
case 0x1F8A : mapChar = 0x1F82; break; |
|
149 |
case 0x1F8B : mapChar = 0x1F83; break; |
|
150 |
case 0x1F8C : mapChar = 0x1F84; break; |
|
151 |
case 0x1F8D : mapChar = 0x1F85; break; |
|
152 |
case 0x1F8E : mapChar = 0x1F86; break; |
|
153 |
case 0x1F8F : mapChar = 0x1F87; break; |
|
154 |
case 0x1F98 : mapChar = 0x1F90; break; |
|
155 |
case 0x1F99 : mapChar = 0x1F91; break; |
|
156 |
case 0x1F9A : mapChar = 0x1F92; break; |
|
157 |
case 0x1F9B : mapChar = 0x1F93; break; |
|
158 |
case 0x1F9C : mapChar = 0x1F94; break; |
|
159 |
case 0x1F9D : mapChar = 0x1F95; break; |
|
160 |
case 0x1F9E : mapChar = 0x1F96; break; |
|
161 |
case 0x1F9F : mapChar = 0x1F97; break; |
|
162 |
case 0x1FA8 : mapChar = 0x1FA0; break; |
|
163 |
case 0x1FA9 : mapChar = 0x1FA1; break; |
|
164 |
case 0x1FAA : mapChar = 0x1FA2; break; |
|
165 |
case 0x1FAB : mapChar = 0x1FA3; break; |
|
166 |
case 0x1FAC : mapChar = 0x1FA4; break; |
|
167 |
case 0x1FAD : mapChar = 0x1FA5; break; |
|
168 |
case 0x1FAE : mapChar = 0x1FA6; break; |
|
169 |
case 0x1FAF : mapChar = 0x1FA7; break; |
|
170 |
case 0x1FBC : mapChar = 0x1FB3; break; |
|
171 |
case 0x1FCC : mapChar = 0x1FC3; break; |
|
172 |
case 0x1FFC : mapChar = 0x1FF3; break; |
|
2497 | 173 |
|
174 |
case 0x023A : mapChar = 0x2C65; break; |
|
175 |
case 0x023E : mapChar = 0x2C66; break; |
|
176 |
case 0x10A0 : mapChar = 0x2D00; break; |
|
177 |
case 0x10A1 : mapChar = 0x2D01; break; |
|
178 |
case 0x10A2 : mapChar = 0x2D02; break; |
|
179 |
case 0x10A3 : mapChar = 0x2D03; break; |
|
180 |
case 0x10A4 : mapChar = 0x2D04; break; |
|
181 |
case 0x10A5 : mapChar = 0x2D05; break; |
|
182 |
case 0x10A6 : mapChar = 0x2D06; break; |
|
183 |
case 0x10A7 : mapChar = 0x2D07; break; |
|
184 |
case 0x10A8 : mapChar = 0x2D08; break; |
|
185 |
case 0x10A9 : mapChar = 0x2D09; break; |
|
186 |
case 0x10AA : mapChar = 0x2D0A; break; |
|
187 |
case 0x10AB : mapChar = 0x2D0B; break; |
|
188 |
case 0x10AC : mapChar = 0x2D0C; break; |
|
189 |
case 0x10AD : mapChar = 0x2D0D; break; |
|
190 |
case 0x10AE : mapChar = 0x2D0E; break; |
|
191 |
case 0x10AF : mapChar = 0x2D0F; break; |
|
192 |
case 0x10B0 : mapChar = 0x2D10; break; |
|
193 |
case 0x10B1 : mapChar = 0x2D11; break; |
|
194 |
case 0x10B2 : mapChar = 0x2D12; break; |
|
195 |
case 0x10B3 : mapChar = 0x2D13; break; |
|
196 |
case 0x10B4 : mapChar = 0x2D14; break; |
|
197 |
case 0x10B5 : mapChar = 0x2D15; break; |
|
198 |
case 0x10B6 : mapChar = 0x2D16; break; |
|
199 |
case 0x10B7 : mapChar = 0x2D17; break; |
|
200 |
case 0x10B8 : mapChar = 0x2D18; break; |
|
201 |
case 0x10B9 : mapChar = 0x2D19; break; |
|
202 |
case 0x10BA : mapChar = 0x2D1A; break; |
|
203 |
case 0x10BB : mapChar = 0x2D1B; break; |
|
204 |
case 0x10BC : mapChar = 0x2D1C; break; |
|
205 |
case 0x10BD : mapChar = 0x2D1D; break; |
|
206 |
case 0x10BE : mapChar = 0x2D1E; break; |
|
207 |
case 0x10BF : mapChar = 0x2D1F; break; |
|
208 |
case 0x10C0 : mapChar = 0x2D20; break; |
|
209 |
case 0x10C1 : mapChar = 0x2D21; break; |
|
210 |
case 0x10C2 : mapChar = 0x2D22; break; |
|
211 |
case 0x10C3 : mapChar = 0x2D23; break; |
|
212 |
case 0x10C4 : mapChar = 0x2D24; break; |
|
213 |
case 0x10C5 : mapChar = 0x2D25; break; |
|
214 |
case 0x1E9E : mapChar = 0x00DF; break; |
|
215 |
case 0x2C62 : mapChar = 0x026B; break; |
|
216 |
case 0x2C63 : mapChar = 0x1D7D; break; |
|
217 |
case 0x2C64 : mapChar = 0x027D; break; |
|
218 |
case 0x2C6D : mapChar = 0x0251; break; |
|
219 |
case 0x2C6E : mapChar = 0x0271; break; |
|
220 |
case 0x2C6F : mapChar = 0x0250; break; |
|
7247 | 221 |
case 0x2C70 : mapChar = 0x0252; break; |
222 |
case 0x2C7E : mapChar = 0x023F; break; |
|
223 |
case 0x2C7F : mapChar = 0x0240; break; |
|
2497 | 224 |
case 0xA77D : mapChar = 0x1D79; break; |
7247 | 225 |
case 0xA78D : mapChar = 0x0265; break; |
2 | 226 |
// default mapChar is already set, so no |
227 |
// need to redo it here. |
|
228 |
// default : mapChar = ch; |
|
229 |
} |
|
230 |
} |
|
231 |
else { |
|
232 |
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); |
|
233 |
mapChar = ch + offset; |
|
234 |
} |
|
235 |
} |
|
236 |
return mapChar; |
|
237 |
} |
|
238 |
||
239 |
int toUpperCase(int ch) { |
|
240 |
int mapChar = ch; |
|
241 |
int val = getProperties(ch); |
|
242 |
||
243 |
if ((val & $$maskUpperCase) != 0) { |
|
244 |
if ((val & $$maskCaseOffset) == $$maskCaseOffset) { |
|
245 |
switch(ch) { |
|
246 |
// map chars with overflow offsets |
|
247 |
case 0x00B5 : mapChar = 0x039C; break; |
|
248 |
case 0x017F : mapChar = 0x0053; break; |
|
249 |
case 0x1FBE : mapChar = 0x0399; break; |
|
250 |
// map char that have both a 1:1 and 1:M map |
|
251 |
case 0x1F80 : mapChar = 0x1F88; break; |
|
252 |
case 0x1F81 : mapChar = 0x1F89; break; |
|
253 |
case 0x1F82 : mapChar = 0x1F8A; break; |
|
254 |
case 0x1F83 : mapChar = 0x1F8B; break; |
|
255 |
case 0x1F84 : mapChar = 0x1F8C; break; |
|
256 |
case 0x1F85 : mapChar = 0x1F8D; break; |
|
257 |
case 0x1F86 : mapChar = 0x1F8E; break; |
|
258 |
case 0x1F87 : mapChar = 0x1F8F; break; |
|
259 |
case 0x1F90 : mapChar = 0x1F98; break; |
|
260 |
case 0x1F91 : mapChar = 0x1F99; break; |
|
261 |
case 0x1F92 : mapChar = 0x1F9A; break; |
|
262 |
case 0x1F93 : mapChar = 0x1F9B; break; |
|
263 |
case 0x1F94 : mapChar = 0x1F9C; break; |
|
264 |
case 0x1F95 : mapChar = 0x1F9D; break; |
|
265 |
case 0x1F96 : mapChar = 0x1F9E; break; |
|
266 |
case 0x1F97 : mapChar = 0x1F9F; break; |
|
267 |
case 0x1FA0 : mapChar = 0x1FA8; break; |
|
268 |
case 0x1FA1 : mapChar = 0x1FA9; break; |
|
269 |
case 0x1FA2 : mapChar = 0x1FAA; break; |
|
270 |
case 0x1FA3 : mapChar = 0x1FAB; break; |
|
271 |
case 0x1FA4 : mapChar = 0x1FAC; break; |
|
272 |
case 0x1FA5 : mapChar = 0x1FAD; break; |
|
273 |
case 0x1FA6 : mapChar = 0x1FAE; break; |
|
274 |
case 0x1FA7 : mapChar = 0x1FAF; break; |
|
275 |
case 0x1FB3 : mapChar = 0x1FBC; break; |
|
276 |
case 0x1FC3 : mapChar = 0x1FCC; break; |
|
277 |
case 0x1FF3 : mapChar = 0x1FFC; break; |
|
2497 | 278 |
|
7247 | 279 |
case 0x023F : mapChar = 0x2C7E; break; |
280 |
case 0x0240 : mapChar = 0x2C7F; break; |
|
2497 | 281 |
case 0x0250 : mapChar = 0x2C6F; break; |
282 |
case 0x0251 : mapChar = 0x2C6D; break; |
|
7247 | 283 |
case 0x0252 : mapChar = 0x2C70; break; |
284 |
case 0x0265 : mapChar = 0xA78D; break; |
|
2497 | 285 |
case 0x026B : mapChar = 0x2C62; break; |
286 |
case 0x0271 : mapChar = 0x2C6E; break; |
|
287 |
case 0x027D : mapChar = 0x2C64; break; |
|
288 |
case 0x1D79 : mapChar = 0xA77D; break; |
|
289 |
case 0x1D7D : mapChar = 0x2C63; break; |
|
290 |
case 0x2C65 : mapChar = 0x023A; break; |
|
291 |
case 0x2C66 : mapChar = 0x023E; break; |
|
292 |
case 0x2D00 : mapChar = 0x10A0; break; |
|
293 |
case 0x2D01 : mapChar = 0x10A1; break; |
|
294 |
case 0x2D02 : mapChar = 0x10A2; break; |
|
295 |
case 0x2D03 : mapChar = 0x10A3; break; |
|
296 |
case 0x2D04 : mapChar = 0x10A4; break; |
|
297 |
case 0x2D05 : mapChar = 0x10A5; break; |
|
298 |
case 0x2D06 : mapChar = 0x10A6; break; |
|
299 |
case 0x2D07 : mapChar = 0x10A7; break; |
|
300 |
case 0x2D08 : mapChar = 0x10A8; break; |
|
301 |
case 0x2D09 : mapChar = 0x10A9; break; |
|
302 |
case 0x2D0A : mapChar = 0x10AA; break; |
|
303 |
case 0x2D0B : mapChar = 0x10AB; break; |
|
304 |
case 0x2D0C : mapChar = 0x10AC; break; |
|
305 |
case 0x2D0D : mapChar = 0x10AD; break; |
|
306 |
case 0x2D0E : mapChar = 0x10AE; break; |
|
307 |
case 0x2D0F : mapChar = 0x10AF; break; |
|
308 |
case 0x2D10 : mapChar = 0x10B0; break; |
|
309 |
case 0x2D11 : mapChar = 0x10B1; break; |
|
310 |
case 0x2D12 : mapChar = 0x10B2; break; |
|
311 |
case 0x2D13 : mapChar = 0x10B3; break; |
|
312 |
case 0x2D14 : mapChar = 0x10B4; break; |
|
313 |
case 0x2D15 : mapChar = 0x10B5; break; |
|
314 |
case 0x2D16 : mapChar = 0x10B6; break; |
|
315 |
case 0x2D17 : mapChar = 0x10B7; break; |
|
316 |
case 0x2D18 : mapChar = 0x10B8; break; |
|
317 |
case 0x2D19 : mapChar = 0x10B9; break; |
|
318 |
case 0x2D1A : mapChar = 0x10BA; break; |
|
319 |
case 0x2D1B : mapChar = 0x10BB; break; |
|
320 |
case 0x2D1C : mapChar = 0x10BC; break; |
|
321 |
case 0x2D1D : mapChar = 0x10BD; break; |
|
322 |
case 0x2D1E : mapChar = 0x10BE; break; |
|
323 |
case 0x2D1F : mapChar = 0x10BF; break; |
|
324 |
case 0x2D20 : mapChar = 0x10C0; break; |
|
325 |
case 0x2D21 : mapChar = 0x10C1; break; |
|
326 |
case 0x2D22 : mapChar = 0x10C2; break; |
|
327 |
case 0x2D23 : mapChar = 0x10C3; break; |
|
328 |
case 0x2D24 : mapChar = 0x10C4; break; |
|
329 |
case 0x2D25 : mapChar = 0x10C5; break; |
|
2 | 330 |
// ch must have a 1:M case mapping, but we |
331 |
// can't handle it here. Return ch. |
|
332 |
// since mapChar is already set, no need |
|
333 |
// to redo it here. |
|
334 |
//default : mapChar = ch; |
|
335 |
} |
|
336 |
} |
|
337 |
else { |
|
338 |
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); |
|
339 |
mapChar = ch - offset; |
|
340 |
} |
|
341 |
} |
|
342 |
return mapChar; |
|
343 |
} |
|
344 |
||
345 |
int toTitleCase(int ch) { |
|
346 |
int mapChar = ch; |
|
347 |
int val = getProperties(ch); |
|
348 |
||
349 |
if ((val & $$maskTitleCase) != 0) { |
|
350 |
// There is a titlecase equivalent. Perform further checks: |
|
351 |
if ((val & $$maskUpperCase) == 0) { |
|
352 |
// The character does not have an uppercase equivalent, so it must |
|
353 |
// already be uppercase; so add 1 to get the titlecase form. |
|
354 |
mapChar = ch + 1; |
|
355 |
} |
|
356 |
else if ((val & $$maskLowerCase) == 0) { |
|
357 |
// The character does not have a lowercase equivalent, so it must |
|
358 |
// already be lowercase; so subtract 1 to get the titlecase form. |
|
359 |
mapChar = ch - 1; |
|
360 |
} |
|
361 |
// else { |
|
362 |
// The character has both an uppercase equivalent and a lowercase |
|
363 |
// equivalent, so it must itself be a titlecase form; return it. |
|
364 |
// return ch; |
|
365 |
//} |
|
366 |
} |
|
367 |
else if ((val & $$maskUpperCase) != 0) { |
|
368 |
// This character has no titlecase equivalent but it does have an |
|
369 |
// uppercase equivalent, so use that (subtract the signed case offset). |
|
370 |
mapChar = toUpperCase(ch); |
|
371 |
} |
|
372 |
return mapChar; |
|
373 |
} |
|
374 |
||
375 |
int digit(int ch, int radix) { |
|
376 |
int value = -1; |
|
377 |
if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) { |
|
378 |
int val = getProperties(ch); |
|
379 |
int kind = val & $$maskType; |
|
380 |
if (kind == Character.DECIMAL_DIGIT_NUMBER) { |
|
381 |
value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; |
|
382 |
} |
|
383 |
else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) { |
|
384 |
// Java supradecimal digit |
|
385 |
value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; |
|
386 |
} |
|
387 |
} |
|
388 |
return (value < radix) ? value : -1; |
|
389 |
} |
|
390 |
||
391 |
int getNumericValue(int ch) { |
|
392 |
int val = getProperties(ch); |
|
393 |
int retval = -1; |
|
394 |
||
395 |
switch (val & $$maskNumericType) { |
|
396 |
default: // cannot occur |
|
397 |
case ($$valueNotNumeric): // not numeric |
|
398 |
retval = -1; |
|
399 |
break; |
|
400 |
case ($$valueDigit): // simple numeric |
|
401 |
retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit; |
|
402 |
break; |
|
403 |
case ($$valueStrangeNumeric) : // "strange" numeric |
|
404 |
switch (ch) { |
|
405 |
case 0x0BF1: retval = 100; break; // TAMIL NUMBER ONE HUNDRED |
|
406 |
case 0x0BF2: retval = 1000; break; // TAMIL NUMBER ONE THOUSAND |
|
407 |
case 0x1375: retval = 40; break; // ETHIOPIC NUMBER FORTY |
|
408 |
case 0x1376: retval = 50; break; // ETHIOPIC NUMBER FIFTY |
|
409 |
case 0x1377: retval = 60; break; // ETHIOPIC NUMBER SIXTY |
|
410 |
case 0x1378: retval = 70; break; // ETHIOPIC NUMBER SEVENTY |
|
411 |
case 0x1379: retval = 80; break; // ETHIOPIC NUMBER EIGHTY |
|
412 |
case 0x137A: retval = 90; break; // ETHIOPIC NUMBER NINETY |
|
413 |
case 0x137B: retval = 100; break; // ETHIOPIC NUMBER HUNDRED |
|
414 |
case 0x137C: retval = 10000; break; // ETHIOPIC NUMBER TEN THOUSAND |
|
415 |
case 0x215F: retval = 1; break; // FRACTION NUMERATOR ONE |
|
416 |
case 0x216C: retval = 50; break; // ROMAN NUMERAL FIFTY |
|
417 |
case 0x216D: retval = 100; break; // ROMAN NUMERAL ONE HUNDRED |
|
418 |
case 0x216E: retval = 500; break; // ROMAN NUMERAL FIVE HUNDRED |
|
419 |
case 0x216F: retval = 1000; break; // ROMAN NUMERAL ONE THOUSAND |
|
420 |
case 0x217C: retval = 50; break; // SMALL ROMAN NUMERAL FIFTY |
|
421 |
case 0x217D: retval = 100; break; // SMALL ROMAN NUMERAL ONE HUNDRED |
|
422 |
case 0x217E: retval = 500; break; // SMALL ROMAN NUMERAL FIVE HUNDRED |
|
423 |
case 0x217F: retval = 1000; break; // SMALL ROMAN NUMERAL ONE THOUSAND |
|
424 |
case 0x2180: retval = 1000; break; // ROMAN NUMERAL ONE THOUSAND C D |
|
425 |
case 0x2181: retval = 5000; break; // ROMAN NUMERAL FIVE THOUSAND |
|
426 |
case 0x2182: retval = 10000; break; // ROMAN NUMERAL TEN THOUSAND |
|
427 |
||
428 |
case 0x325C: retval = 32; break; |
|
429 |
||
430 |
case 0x325D: retval = 33; break; // CIRCLED NUMBER THIRTY THREE |
|
431 |
case 0x325E: retval = 34; break; // CIRCLED NUMBER THIRTY FOUR |
|
432 |
case 0x325F: retval = 35; break; // CIRCLED NUMBER THIRTY FIVE |
|
433 |
case 0x32B1: retval = 36; break; // CIRCLED NUMBER THIRTY SIX |
|
434 |
case 0x32B2: retval = 37; break; // CIRCLED NUMBER THIRTY SEVEN |
|
435 |
case 0x32B3: retval = 38; break; // CIRCLED NUMBER THIRTY EIGHT |
|
436 |
case 0x32B4: retval = 39; break; // CIRCLED NUMBER THIRTY NINE |
|
437 |
case 0x32B5: retval = 40; break; // CIRCLED NUMBER FORTY |
|
438 |
case 0x32B6: retval = 41; break; // CIRCLED NUMBER FORTY ONE |
|
439 |
case 0x32B7: retval = 42; break; // CIRCLED NUMBER FORTY TWO |
|
440 |
case 0x32B8: retval = 43; break; // CIRCLED NUMBER FORTY THREE |
|
441 |
case 0x32B9: retval = 44; break; // CIRCLED NUMBER FORTY FOUR |
|
442 |
case 0x32BA: retval = 45; break; // CIRCLED NUMBER FORTY FIVE |
|
443 |
case 0x32BB: retval = 46; break; // CIRCLED NUMBER FORTY SIX |
|
444 |
case 0x32BC: retval = 47; break; // CIRCLED NUMBER FORTY SEVEN |
|
445 |
case 0x32BD: retval = 48; break; // CIRCLED NUMBER FORTY EIGHT |
|
446 |
case 0x32BE: retval = 49; break; // CIRCLED NUMBER FORTY NINE |
|
447 |
case 0x32BF: retval = 50; break; // CIRCLED NUMBER FIFTY |
|
448 |
||
2497 | 449 |
case 0x0D71: retval = 100; break; // MALAYALAM NUMBER ONE HUNDRED |
450 |
case 0x0D72: retval = 1000; break; // MALAYALAM NUMBER ONE THOUSAND |
|
451 |
case 0x2186: retval = 50; break; // ROMAN NUMERAL FIFTY EARLY FORM |
|
452 |
case 0x2187: retval = 50000; break; // ROMAN NUMERAL FIFTY THOUSAND |
|
453 |
case 0x2188: retval = 100000; break; // ROMAN NUMERAL ONE HUNDRED THOUSAND |
|
454 |
||
2 | 455 |
default: retval = -2; break; |
456 |
} |
|
457 |
break; |
|
458 |
case ($$valueJavaSupradecimal): // Java supradecimal |
|
459 |
retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10; |
|
460 |
break; |
|
461 |
} |
|
462 |
return retval; |
|
463 |
} |
|
464 |
||
465 |
boolean isWhitespace(int ch) { |
|
466 |
int props = getProperties(ch); |
|
467 |
return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace); |
|
468 |
} |
|
469 |
||
470 |
byte getDirectionality(int ch) { |
|
471 |
int val = getProperties(ch); |
|
472 |
byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi); |
|
473 |
if (directionality == 0xF ) { |
|
474 |
switch(ch) { |
|
475 |
case 0x202A : |
|
476 |
// This is the only char with LRE |
|
477 |
directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING; |
|
478 |
break; |
|
479 |
case 0x202B : |
|
480 |
// This is the only char with RLE |
|
481 |
directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING; |
|
482 |
break; |
|
483 |
case 0x202C : |
|
484 |
// This is the only char with PDF |
|
485 |
directionality = Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT; |
|
486 |
break; |
|
487 |
case 0x202D : |
|
488 |
// This is the only char with LRO |
|
489 |
directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE; |
|
490 |
break; |
|
491 |
case 0x202E : |
|
492 |
// This is the only char with RLO |
|
493 |
directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE; |
|
494 |
break; |
|
495 |
default : |
|
496 |
directionality = Character.DIRECTIONALITY_UNDEFINED; |
|
497 |
break; |
|
498 |
} |
|
499 |
} |
|
500 |
return directionality; |
|
501 |
} |
|
502 |
||
503 |
boolean isMirrored(int ch) { |
|
504 |
int props = getProperties(ch); |
|
505 |
return ((props & $$maskMirrored) != 0); |
|
506 |
} |
|
507 |
||
508 |
int toUpperCaseEx(int ch) { |
|
509 |
int mapChar = ch; |
|
510 |
int val = getProperties(ch); |
|
511 |
||
512 |
if ((val & $$maskUpperCase) != 0) { |
|
513 |
if ((val & $$maskCaseOffset) != $$maskCaseOffset) { |
|
514 |
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); |
|
515 |
mapChar = ch - offset; |
|
516 |
} |
|
517 |
else { |
|
518 |
switch(ch) { |
|
519 |
// map overflow characters |
|
520 |
case 0x00B5 : mapChar = 0x039C; break; |
|
521 |
case 0x017F : mapChar = 0x0053; break; |
|
522 |
case 0x1FBE : mapChar = 0x0399; break; |
|
2497 | 523 |
|
7247 | 524 |
case 0x023F : mapChar = 0x2C7E; break; |
525 |
case 0x0240 : mapChar = 0x2C7F; break; |
|
2497 | 526 |
case 0x0250 : mapChar = 0x2C6F; break; |
527 |
case 0x0251 : mapChar = 0x2C6D; break; |
|
7247 | 528 |
case 0x0252 : mapChar = 0x2C70; break; |
529 |
case 0x0265 : mapChar = 0xA78D; break; |
|
2497 | 530 |
case 0x026B : mapChar = 0x2C62; break; |
531 |
case 0x0271 : mapChar = 0x2C6E; break; |
|
532 |
case 0x027D : mapChar = 0x2C64; break; |
|
533 |
case 0x1D79 : mapChar = 0xA77D; break; |
|
534 |
case 0x1D7D : mapChar = 0x2C63; break; |
|
535 |
case 0x2C65 : mapChar = 0x023A; break; |
|
536 |
case 0x2C66 : mapChar = 0x023E; break; |
|
537 |
case 0x2D00 : mapChar = 0x10A0; break; |
|
538 |
case 0x2D01 : mapChar = 0x10A1; break; |
|
539 |
case 0x2D02 : mapChar = 0x10A2; break; |
|
540 |
case 0x2D03 : mapChar = 0x10A3; break; |
|
541 |
case 0x2D04 : mapChar = 0x10A4; break; |
|
542 |
case 0x2D05 : mapChar = 0x10A5; break; |
|
543 |
case 0x2D06 : mapChar = 0x10A6; break; |
|
544 |
case 0x2D07 : mapChar = 0x10A7; break; |
|
545 |
case 0x2D08 : mapChar = 0x10A8; break; |
|
546 |
case 0x2D09 : mapChar = 0x10A9; break; |
|
547 |
case 0x2D0A : mapChar = 0x10AA; break; |
|
548 |
case 0x2D0B : mapChar = 0x10AB; break; |
|
549 |
case 0x2D0C : mapChar = 0x10AC; break; |
|
550 |
case 0x2D0D : mapChar = 0x10AD; break; |
|
551 |
case 0x2D0E : mapChar = 0x10AE; break; |
|
552 |
case 0x2D0F : mapChar = 0x10AF; break; |
|
553 |
case 0x2D10 : mapChar = 0x10B0; break; |
|
554 |
case 0x2D11 : mapChar = 0x10B1; break; |
|
555 |
case 0x2D12 : mapChar = 0x10B2; break; |
|
556 |
case 0x2D13 : mapChar = 0x10B3; break; |
|
557 |
case 0x2D14 : mapChar = 0x10B4; break; |
|
558 |
case 0x2D15 : mapChar = 0x10B5; break; |
|
559 |
case 0x2D16 : mapChar = 0x10B6; break; |
|
560 |
case 0x2D17 : mapChar = 0x10B7; break; |
|
561 |
case 0x2D18 : mapChar = 0x10B8; break; |
|
562 |
case 0x2D19 : mapChar = 0x10B9; break; |
|
563 |
case 0x2D1A : mapChar = 0x10BA; break; |
|
564 |
case 0x2D1B : mapChar = 0x10BB; break; |
|
565 |
case 0x2D1C : mapChar = 0x10BC; break; |
|
566 |
case 0x2D1D : mapChar = 0x10BD; break; |
|
567 |
case 0x2D1E : mapChar = 0x10BE; break; |
|
568 |
case 0x2D1F : mapChar = 0x10BF; break; |
|
569 |
case 0x2D20 : mapChar = 0x10C0; break; |
|
570 |
case 0x2D21 : mapChar = 0x10C1; break; |
|
571 |
case 0x2D22 : mapChar = 0x10C2; break; |
|
572 |
case 0x2D23 : mapChar = 0x10C3; break; |
|
573 |
case 0x2D24 : mapChar = 0x10C4; break; |
|
574 |
case 0x2D25 : mapChar = 0x10C5; break; |
|
2 | 575 |
default : mapChar = Character.ERROR; break; |
576 |
} |
|
577 |
} |
|
578 |
} |
|
579 |
return mapChar; |
|
580 |
} |
|
581 |
||
582 |
char[] toUpperCaseCharArray(int ch) { |
|
583 |
char[] upperMap = {(char)ch}; |
|
584 |
int location = findInCharMap(ch); |
|
585 |
if (location != -1) { |
|
586 |
upperMap = charMap[location][1]; |
|
587 |
} |
|
588 |
return upperMap; |
|
589 |
} |
|
590 |
||
591 |
||
592 |
/** |
|
593 |
* Finds the character in the uppercase mapping table. |
|
594 |
* |
|
595 |
* @param ch the <code>char</code> to search |
|
596 |
* @return the index location ch in the table or -1 if not found |
|
597 |
* @since 1.4 |
|
598 |
*/ |
|
599 |
int findInCharMap(int ch) { |
|
600 |
if (charMap == null || charMap.length == 0) { |
|
601 |
return -1; |
|
602 |
} |
|
603 |
int top, bottom, current; |
|
604 |
bottom = 0; |
|
605 |
top = charMap.length; |
|
606 |
current = top/2; |
|
607 |
// invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0] |
|
608 |
while (top - bottom > 1) { |
|
609 |
if (ch >= charMap[current][0][0]) { |
|
610 |
bottom = current; |
|
611 |
} else { |
|
612 |
top = current; |
|
613 |
} |
|
614 |
current = (top + bottom) / 2; |
|
615 |
} |
|
616 |
if (ch == charMap[current][0][0]) return current; |
|
617 |
else return -1; |
|
618 |
} |
|
619 |
||
620 |
static final CharacterData00 instance = new CharacterData00(); |
|
621 |
private CharacterData00() {}; |
|
622 |
||
623 |
$$Tables |
|
624 |
||
625 |
static { |
|
626 |
$$Initializers |
|
627 |
} |
|
628 |
} |