|
1 /* |
|
2 * Copyright 1999-2000 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Sun designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Sun in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
22 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
23 * have any questions. |
|
24 */ |
|
25 |
|
26 package java.util.regex; |
|
27 |
|
28 |
|
29 /** |
|
30 * Utility class that implements the standard C ctype functionality. |
|
31 * |
|
32 * @author Hong Zhang |
|
33 */ |
|
34 |
|
35 final class ASCII { |
|
36 |
|
37 static final int UPPER = 0x00000100; |
|
38 |
|
39 static final int LOWER = 0x00000200; |
|
40 |
|
41 static final int DIGIT = 0x00000400; |
|
42 |
|
43 static final int SPACE = 0x00000800; |
|
44 |
|
45 static final int PUNCT = 0x00001000; |
|
46 |
|
47 static final int CNTRL = 0x00002000; |
|
48 |
|
49 static final int BLANK = 0x00004000; |
|
50 |
|
51 static final int HEX = 0x00008000; |
|
52 |
|
53 static final int UNDER = 0x00010000; |
|
54 |
|
55 static final int ASCII = 0x0000FF00; |
|
56 |
|
57 static final int ALPHA = (UPPER|LOWER); |
|
58 |
|
59 static final int ALNUM = (UPPER|LOWER|DIGIT); |
|
60 |
|
61 static final int GRAPH = (PUNCT|UPPER|LOWER|DIGIT); |
|
62 |
|
63 static final int WORD = (UPPER|LOWER|UNDER|DIGIT); |
|
64 |
|
65 static final int XDIGIT = (HEX); |
|
66 |
|
67 private static final int[] ctype = new int[] { |
|
68 CNTRL, /* 00 (NUL) */ |
|
69 CNTRL, /* 01 (SOH) */ |
|
70 CNTRL, /* 02 (STX) */ |
|
71 CNTRL, /* 03 (ETX) */ |
|
72 CNTRL, /* 04 (EOT) */ |
|
73 CNTRL, /* 05 (ENQ) */ |
|
74 CNTRL, /* 06 (ACK) */ |
|
75 CNTRL, /* 07 (BEL) */ |
|
76 CNTRL, /* 08 (BS) */ |
|
77 SPACE+CNTRL+BLANK, /* 09 (HT) */ |
|
78 SPACE+CNTRL, /* 0A (LF) */ |
|
79 SPACE+CNTRL, /* 0B (VT) */ |
|
80 SPACE+CNTRL, /* 0C (FF) */ |
|
81 SPACE+CNTRL, /* 0D (CR) */ |
|
82 CNTRL, /* 0E (SI) */ |
|
83 CNTRL, /* 0F (SO) */ |
|
84 CNTRL, /* 10 (DLE) */ |
|
85 CNTRL, /* 11 (DC1) */ |
|
86 CNTRL, /* 12 (DC2) */ |
|
87 CNTRL, /* 13 (DC3) */ |
|
88 CNTRL, /* 14 (DC4) */ |
|
89 CNTRL, /* 15 (NAK) */ |
|
90 CNTRL, /* 16 (SYN) */ |
|
91 CNTRL, /* 17 (ETB) */ |
|
92 CNTRL, /* 18 (CAN) */ |
|
93 CNTRL, /* 19 (EM) */ |
|
94 CNTRL, /* 1A (SUB) */ |
|
95 CNTRL, /* 1B (ESC) */ |
|
96 CNTRL, /* 1C (FS) */ |
|
97 CNTRL, /* 1D (GS) */ |
|
98 CNTRL, /* 1E (RS) */ |
|
99 CNTRL, /* 1F (US) */ |
|
100 SPACE+BLANK, /* 20 SPACE */ |
|
101 PUNCT, /* 21 ! */ |
|
102 PUNCT, /* 22 " */ |
|
103 PUNCT, /* 23 # */ |
|
104 PUNCT, /* 24 $ */ |
|
105 PUNCT, /* 25 % */ |
|
106 PUNCT, /* 26 & */ |
|
107 PUNCT, /* 27 ' */ |
|
108 PUNCT, /* 28 ( */ |
|
109 PUNCT, /* 29 ) */ |
|
110 PUNCT, /* 2A * */ |
|
111 PUNCT, /* 2B + */ |
|
112 PUNCT, /* 2C , */ |
|
113 PUNCT, /* 2D - */ |
|
114 PUNCT, /* 2E . */ |
|
115 PUNCT, /* 2F / */ |
|
116 DIGIT+HEX+0, /* 30 0 */ |
|
117 DIGIT+HEX+1, /* 31 1 */ |
|
118 DIGIT+HEX+2, /* 32 2 */ |
|
119 DIGIT+HEX+3, /* 33 3 */ |
|
120 DIGIT+HEX+4, /* 34 4 */ |
|
121 DIGIT+HEX+5, /* 35 5 */ |
|
122 DIGIT+HEX+6, /* 36 6 */ |
|
123 DIGIT+HEX+7, /* 37 7 */ |
|
124 DIGIT+HEX+8, /* 38 8 */ |
|
125 DIGIT+HEX+9, /* 39 9 */ |
|
126 PUNCT, /* 3A : */ |
|
127 PUNCT, /* 3B ; */ |
|
128 PUNCT, /* 3C < */ |
|
129 PUNCT, /* 3D = */ |
|
130 PUNCT, /* 3E > */ |
|
131 PUNCT, /* 3F ? */ |
|
132 PUNCT, /* 40 @ */ |
|
133 UPPER+HEX+10, /* 41 A */ |
|
134 UPPER+HEX+11, /* 42 B */ |
|
135 UPPER+HEX+12, /* 43 C */ |
|
136 UPPER+HEX+13, /* 44 D */ |
|
137 UPPER+HEX+14, /* 45 E */ |
|
138 UPPER+HEX+15, /* 46 F */ |
|
139 UPPER+16, /* 47 G */ |
|
140 UPPER+17, /* 48 H */ |
|
141 UPPER+18, /* 49 I */ |
|
142 UPPER+19, /* 4A J */ |
|
143 UPPER+20, /* 4B K */ |
|
144 UPPER+21, /* 4C L */ |
|
145 UPPER+22, /* 4D M */ |
|
146 UPPER+23, /* 4E N */ |
|
147 UPPER+24, /* 4F O */ |
|
148 UPPER+25, /* 50 P */ |
|
149 UPPER+26, /* 51 Q */ |
|
150 UPPER+27, /* 52 R */ |
|
151 UPPER+28, /* 53 S */ |
|
152 UPPER+29, /* 54 T */ |
|
153 UPPER+30, /* 55 U */ |
|
154 UPPER+31, /* 56 V */ |
|
155 UPPER+32, /* 57 W */ |
|
156 UPPER+33, /* 58 X */ |
|
157 UPPER+34, /* 59 Y */ |
|
158 UPPER+35, /* 5A Z */ |
|
159 PUNCT, /* 5B [ */ |
|
160 PUNCT, /* 5C \ */ |
|
161 PUNCT, /* 5D ] */ |
|
162 PUNCT, /* 5E ^ */ |
|
163 PUNCT|UNDER, /* 5F _ */ |
|
164 PUNCT, /* 60 ` */ |
|
165 LOWER+HEX+10, /* 61 a */ |
|
166 LOWER+HEX+11, /* 62 b */ |
|
167 LOWER+HEX+12, /* 63 c */ |
|
168 LOWER+HEX+13, /* 64 d */ |
|
169 LOWER+HEX+14, /* 65 e */ |
|
170 LOWER+HEX+15, /* 66 f */ |
|
171 LOWER+16, /* 67 g */ |
|
172 LOWER+17, /* 68 h */ |
|
173 LOWER+18, /* 69 i */ |
|
174 LOWER+19, /* 6A j */ |
|
175 LOWER+20, /* 6B k */ |
|
176 LOWER+21, /* 6C l */ |
|
177 LOWER+22, /* 6D m */ |
|
178 LOWER+23, /* 6E n */ |
|
179 LOWER+24, /* 6F o */ |
|
180 LOWER+25, /* 70 p */ |
|
181 LOWER+26, /* 71 q */ |
|
182 LOWER+27, /* 72 r */ |
|
183 LOWER+28, /* 73 s */ |
|
184 LOWER+29, /* 74 t */ |
|
185 LOWER+30, /* 75 u */ |
|
186 LOWER+31, /* 76 v */ |
|
187 LOWER+32, /* 77 w */ |
|
188 LOWER+33, /* 78 x */ |
|
189 LOWER+34, /* 79 y */ |
|
190 LOWER+35, /* 7A z */ |
|
191 PUNCT, /* 7B { */ |
|
192 PUNCT, /* 7C | */ |
|
193 PUNCT, /* 7D } */ |
|
194 PUNCT, /* 7E ~ */ |
|
195 CNTRL, /* 7F (DEL) */ |
|
196 }; |
|
197 |
|
198 static int getType(int ch) { |
|
199 return ((ch & 0xFFFFFF80) == 0 ? ctype[ch] : 0); |
|
200 } |
|
201 |
|
202 static boolean isType(int ch, int type) { |
|
203 return (getType(ch) & type) != 0; |
|
204 } |
|
205 |
|
206 static boolean isAscii(int ch) { |
|
207 return ((ch & 0xFFFFFF80) == 0); |
|
208 } |
|
209 |
|
210 static boolean isAlpha(int ch) { |
|
211 return isType(ch, ALPHA); |
|
212 } |
|
213 |
|
214 static boolean isDigit(int ch) { |
|
215 return ((ch-'0')|('9'-ch)) >= 0; |
|
216 } |
|
217 |
|
218 static boolean isAlnum(int ch) { |
|
219 return isType(ch, ALNUM); |
|
220 } |
|
221 |
|
222 static boolean isGraph(int ch) { |
|
223 return isType(ch, GRAPH); |
|
224 } |
|
225 |
|
226 static boolean isPrint(int ch) { |
|
227 return ((ch-0x20)|(0x7E-ch)) >= 0; |
|
228 } |
|
229 |
|
230 static boolean isPunct(int ch) { |
|
231 return isType(ch, PUNCT); |
|
232 } |
|
233 |
|
234 static boolean isSpace(int ch) { |
|
235 return isType(ch, SPACE); |
|
236 } |
|
237 |
|
238 static boolean isHexDigit(int ch) { |
|
239 return isType(ch, HEX); |
|
240 } |
|
241 |
|
242 static boolean isOctDigit(int ch) { |
|
243 return ((ch-'0')|('7'-ch)) >= 0; |
|
244 } |
|
245 |
|
246 static boolean isCntrl(int ch) { |
|
247 return isType(ch, CNTRL); |
|
248 } |
|
249 |
|
250 static boolean isLower(int ch) { |
|
251 return ((ch-'a')|('z'-ch)) >= 0; |
|
252 } |
|
253 |
|
254 static boolean isUpper(int ch) { |
|
255 return ((ch-'A')|('Z'-ch)) >= 0; |
|
256 } |
|
257 |
|
258 static boolean isWord(int ch) { |
|
259 return isType(ch, WORD); |
|
260 } |
|
261 |
|
262 static int toDigit(int ch) { |
|
263 return (ctype[ch & 0x7F] & 0x3F); |
|
264 } |
|
265 |
|
266 static int toLower(int ch) { |
|
267 return isUpper(ch) ? (ch + 0x20) : ch; |
|
268 } |
|
269 |
|
270 static int toUpper(int ch) { |
|
271 return isLower(ch) ? (ch - 0x20) : ch; |
|
272 } |
|
273 |
|
274 } |