1 /* |
|
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package com.sun.codemodel.internal.util; |
|
27 |
|
28 import java.nio.CharBuffer; |
|
29 import java.nio.charset.CoderResult; |
|
30 |
|
31 |
|
32 /** |
|
33 * Utility class for dealing with surrogates. |
|
34 * |
|
35 * @author Mark Reinhold |
|
36 * @version 1.11, 03/01/23 |
|
37 */ |
|
38 |
|
39 class Surrogate { |
|
40 |
|
41 private Surrogate() { } |
|
42 |
|
43 // UTF-16 surrogate-character ranges |
|
44 // |
|
45 public static final char MIN_HIGH = '\uD800'; |
|
46 public static final char MAX_HIGH = '\uDBFF'; |
|
47 public static final char MIN_LOW = '\uDC00'; |
|
48 public static final char MAX_LOW = '\uDFFF'; |
|
49 public static final char MIN = MIN_HIGH; |
|
50 public static final char MAX = MAX_LOW; |
|
51 |
|
52 // Range of UCS-4 values that need surrogates in UTF-16 |
|
53 // |
|
54 public static final int UCS4_MIN = 0x10000; |
|
55 public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1; |
|
56 |
|
57 /** |
|
58 * Tells whether or not the given UTF-16 value is a high surrogate. |
|
59 */ |
|
60 public static boolean isHigh(int c) { |
|
61 return (MIN_HIGH <= c) && (c <= MAX_HIGH); |
|
62 } |
|
63 |
|
64 /** |
|
65 * Tells whether or not the given UTF-16 value is a low surrogate. |
|
66 */ |
|
67 public static boolean isLow(int c) { |
|
68 return (MIN_LOW <= c) && (c <= MAX_LOW); |
|
69 } |
|
70 |
|
71 /** |
|
72 * Tells whether or not the given UTF-16 value is a surrogate character, |
|
73 */ |
|
74 public static boolean is(int c) { |
|
75 return (MIN <= c) && (c <= MAX); |
|
76 } |
|
77 |
|
78 /** |
|
79 * Tells whether or not the given UCS-4 character must be represented as a |
|
80 * surrogate pair in UTF-16. |
|
81 */ |
|
82 public static boolean neededFor(int uc) { |
|
83 return (uc >= UCS4_MIN) && (uc <= UCS4_MAX); |
|
84 } |
|
85 |
|
86 /** |
|
87 * Returns the high UTF-16 surrogate for the given UCS-4 character. |
|
88 */ |
|
89 public static char high(int uc) { |
|
90 return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff)); |
|
91 } |
|
92 |
|
93 /** |
|
94 * Returns the low UTF-16 surrogate for the given UCS-4 character. |
|
95 */ |
|
96 public static char low(int uc) { |
|
97 return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff)); |
|
98 } |
|
99 |
|
100 /** |
|
101 * Converts the given surrogate pair into a 32-bit UCS-4 character. |
|
102 */ |
|
103 public static int toUCS4(char c, char d) { |
|
104 return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000; |
|
105 } |
|
106 |
|
107 /** |
|
108 * Surrogate parsing support. Charset implementations may use instances of |
|
109 * this class to handle the details of parsing UTF-16 surrogate pairs. |
|
110 */ |
|
111 public static class Parser { |
|
112 |
|
113 public Parser() { } |
|
114 |
|
115 private int character; // UCS-4 |
|
116 private CoderResult error = CoderResult.UNDERFLOW; |
|
117 private boolean isPair; |
|
118 |
|
119 /** |
|
120 * Returns the UCS-4 character previously parsed. |
|
121 */ |
|
122 public int character() { |
|
123 return character; |
|
124 } |
|
125 |
|
126 /** |
|
127 * Tells whether or not the previously-parsed UCS-4 character was |
|
128 * originally represented by a surrogate pair. |
|
129 */ |
|
130 public boolean isPair() { |
|
131 return isPair; |
|
132 } |
|
133 |
|
134 /** |
|
135 * Returns the number of UTF-16 characters consumed by the previous |
|
136 * parse. |
|
137 */ |
|
138 public int increment() { |
|
139 return isPair ? 2 : 1; |
|
140 } |
|
141 |
|
142 /** |
|
143 * If the previous parse operation detected an error, return the object |
|
144 * describing that error. |
|
145 */ |
|
146 public CoderResult error() { |
|
147 return error; |
|
148 } |
|
149 |
|
150 /** |
|
151 * Returns an unmappable-input result object, with the appropriate |
|
152 * input length, for the previously-parsed character. |
|
153 */ |
|
154 public CoderResult unmappableResult() { |
|
155 return CoderResult.unmappableForLength(isPair ? 2 : 1); |
|
156 } |
|
157 |
|
158 /** |
|
159 * Parses a UCS-4 character from the given source buffer, handling |
|
160 * surrogates. |
|
161 * |
|
162 * @param c The first character |
|
163 * @param in The source buffer, from which one more character |
|
164 * will be consumed if c is a high surrogate |
|
165 * |
|
166 * @return Either a parsed UCS-4 character, in which case the isPair() |
|
167 * and increment() methods will return meaningful values, or |
|
168 * -1, in which case error() will return a descriptive result |
|
169 * object |
|
170 */ |
|
171 public int parse(char c, CharBuffer in) { |
|
172 if (isHigh(c)) { |
|
173 if (!in.hasRemaining()) { |
|
174 error = CoderResult.UNDERFLOW; |
|
175 return -1; |
|
176 } |
|
177 char d = in.get(); |
|
178 if (isLow(d)) { |
|
179 character = toUCS4(c, d); |
|
180 isPair = true; |
|
181 error = null; |
|
182 return character; |
|
183 } |
|
184 error = CoderResult.malformedForLength(1); |
|
185 return -1; |
|
186 } |
|
187 if (isLow(c)) { |
|
188 error = CoderResult.malformedForLength(1); |
|
189 return -1; |
|
190 } |
|
191 character = c; |
|
192 isPair = false; |
|
193 error = null; |
|
194 return character; |
|
195 } |
|
196 |
|
197 /** |
|
198 * Parses a UCS-4 character from the given source buffer, handling |
|
199 * surrogates. |
|
200 * |
|
201 * @param c The first character |
|
202 * @param ia The input array, from which one more character |
|
203 * will be consumed if c is a high surrogate |
|
204 * @param ip The input index |
|
205 * @param il The input limit |
|
206 * |
|
207 * @return Either a parsed UCS-4 character, in which case the isPair() |
|
208 * and increment() methods will return meaningful values, or |
|
209 * -1, in which case error() will return a descriptive result |
|
210 * object |
|
211 */ |
|
212 public int parse(char c, char[] ia, int ip, int il) { |
|
213 if (isHigh(c)) { |
|
214 if (il - ip < 2) { |
|
215 error = CoderResult.UNDERFLOW; |
|
216 return -1; |
|
217 } |
|
218 char d = ia[ip + 1]; |
|
219 if (isLow(d)) { |
|
220 character = toUCS4(c, d); |
|
221 isPair = true; |
|
222 error = null; |
|
223 return character; |
|
224 } |
|
225 error = CoderResult.malformedForLength(1); |
|
226 return -1; |
|
227 } |
|
228 if (isLow(c)) { |
|
229 error = CoderResult.malformedForLength(1); |
|
230 return -1; |
|
231 } |
|
232 character = c; |
|
233 isPair = false; |
|
234 error = null; |
|
235 return character; |
|
236 } |
|
237 |
|
238 } |
|
239 |
|
240 /** |
|
241 * Surrogate generation support. Charset implementations may use instances |
|
242 * of this class to handle the details of generating UTF-16 surrogate |
|
243 * pairs. |
|
244 */ |
|
245 public static class Generator { |
|
246 |
|
247 public Generator() { } |
|
248 |
|
249 private CoderResult error = CoderResult.OVERFLOW; |
|
250 |
|
251 /** |
|
252 * If the previous generation operation detected an error, return the |
|
253 * object describing that error. |
|
254 */ |
|
255 public CoderResult error() { |
|
256 return error; |
|
257 } |
|
258 |
|
259 /** |
|
260 * Generates one or two UTF-16 characters to represent the given UCS-4 |
|
261 * character. |
|
262 * |
|
263 * @param uc The UCS-4 character |
|
264 * @param len The number of input bytes from which the UCS-4 value |
|
265 * was constructed (used when creating result objects) |
|
266 * @param dst The destination buffer, to which one or two UTF-16 |
|
267 * characters will be written |
|
268 * |
|
269 * @return Either a positive count of the number of UTF-16 characters |
|
270 * written to the destination buffer, or -1, in which case |
|
271 * error() will return a descriptive result object |
|
272 */ |
|
273 public int generate(int uc, int len, CharBuffer dst) { |
|
274 if (uc <= 0xffff) { |
|
275 if (is(uc)) { |
|
276 error = CoderResult.malformedForLength(len); |
|
277 return -1; |
|
278 } |
|
279 if (dst.remaining() < 1) { |
|
280 error = CoderResult.OVERFLOW; |
|
281 return -1; |
|
282 } |
|
283 dst.put((char)uc); |
|
284 error = null; |
|
285 return 1; |
|
286 } |
|
287 if (uc < UCS4_MIN) { |
|
288 error = CoderResult.malformedForLength(len); |
|
289 return -1; |
|
290 } |
|
291 if (uc <= UCS4_MAX) { |
|
292 if (dst.remaining() < 2) { |
|
293 error = CoderResult.OVERFLOW; |
|
294 return -1; |
|
295 } |
|
296 dst.put(high(uc)); |
|
297 dst.put(low(uc)); |
|
298 error = null; |
|
299 return 2; |
|
300 } |
|
301 error = CoderResult.unmappableForLength(len); |
|
302 return -1; |
|
303 } |
|
304 |
|
305 /** |
|
306 * Generates one or two UTF-16 characters to represent the given UCS-4 |
|
307 * character. |
|
308 * |
|
309 * @param uc The UCS-4 character |
|
310 * @param len The number of input bytes from which the UCS-4 value |
|
311 * was constructed (used when creating result objects) |
|
312 * @param da The destination array, to which one or two UTF-16 |
|
313 * characters will be written |
|
314 * @param dp The destination position |
|
315 * @param dl The destination limit |
|
316 * |
|
317 * @return Either a positive count of the number of UTF-16 characters |
|
318 * written to the destination buffer, or -1, in which case |
|
319 * error() will return a descriptive result object |
|
320 */ |
|
321 public int generate(int uc, int len, char[] da, int dp, int dl) { |
|
322 if (uc <= 0xffff) { |
|
323 if (is(uc)) { |
|
324 error = CoderResult.malformedForLength(len); |
|
325 return -1; |
|
326 } |
|
327 if (dl - dp < 1) { |
|
328 error = CoderResult.OVERFLOW; |
|
329 return -1; |
|
330 } |
|
331 da[dp] = (char)uc; |
|
332 error = null; |
|
333 return 1; |
|
334 } |
|
335 if (uc < UCS4_MIN) { |
|
336 error = CoderResult.malformedForLength(len); |
|
337 return -1; |
|
338 } |
|
339 if (uc <= UCS4_MAX) { |
|
340 if (dl - dp < 2) { |
|
341 error = CoderResult.OVERFLOW; |
|
342 return -1; |
|
343 } |
|
344 da[dp] = high(uc); |
|
345 da[dp + 1] = low(uc); |
|
346 error = null; |
|
347 return 2; |
|
348 } |
|
349 error = CoderResult.unmappableForLength(len); |
|
350 return -1; |
|
351 } |
|
352 |
|
353 } |
|
354 |
|
355 } |
|