1 /* |
1 /* |
2 * Copyright (c) 2000, 2001, Oracle and/or its affiliates. All rights reserved. |
2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * |
4 * |
5 * This code is free software; you can redistribute it and/or modify it |
5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as |
6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. Oracle designates this |
7 * published by the Free Software Foundation. Oracle designates this |
32 |
32 |
33 /** |
33 /** |
34 * Utility class for dealing with surrogates. |
34 * Utility class for dealing with surrogates. |
35 * |
35 * |
36 * @author Mark Reinhold |
36 * @author Mark Reinhold |
|
37 * @author Martin Buchholz |
|
38 * @author Ulf Zibis |
37 */ |
39 */ |
38 |
|
39 public class Surrogate { |
40 public class Surrogate { |
40 |
41 |
41 private Surrogate() { } |
42 private Surrogate() { } |
42 |
43 |
43 // TODO: Deprecate/remove the following redundant definitions |
44 // TODO: Deprecate/remove the following redundant definitions |
73 public static boolean is(int c) { |
74 public static boolean is(int c) { |
74 return (MIN <= c) && (c <= MAX); |
75 return (MIN <= c) && (c <= MAX); |
75 } |
76 } |
76 |
77 |
77 /** |
78 /** |
78 * Tells whether or not the given UCS-4 character is in the Basic |
|
79 * Multilingual Plane, and can be represented using a single char. |
|
80 */ |
|
81 public static boolean isBMPCodePoint(int uc) { |
|
82 return uc >> 16 == 0; |
|
83 } |
|
84 |
|
85 /** |
|
86 * Tells whether or not the given UCS-4 character must be represented as a |
79 * Tells whether or not the given UCS-4 character must be represented as a |
87 * surrogate pair in UTF-16. |
80 * surrogate pair in UTF-16. |
|
81 * Use of {@link Character#isSupplementaryCodePoint} is generally preferred. |
88 */ |
82 */ |
89 public static boolean neededFor(int uc) { |
83 public static boolean neededFor(int uc) { |
90 return Character.isSupplementaryCodePoint(uc); |
84 return Character.isSupplementaryCodePoint(uc); |
91 } |
85 } |
92 |
86 |
108 return (char)((uc & 0x3ff) + Character.MIN_LOW_SURROGATE); |
102 return (char)((uc & 0x3ff) + Character.MIN_LOW_SURROGATE); |
109 } |
103 } |
110 |
104 |
111 /** |
105 /** |
112 * Converts the given surrogate pair into a 32-bit UCS-4 character. |
106 * Converts the given surrogate pair into a 32-bit UCS-4 character. |
|
107 * Use of {@link Character#toCodePoint} is generally preferred. |
113 */ |
108 */ |
114 public static int toUCS4(char c, char d) { |
109 public static int toUCS4(char c, char d) { |
115 assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d); |
110 assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d); |
116 return Character.toCodePoint(c, d); |
111 return Character.toCodePoint(c, d); |
117 } |
112 } |
288 * @returns Either a positive count of the number of UTF-16 characters |
283 * @returns Either a positive count of the number of UTF-16 characters |
289 * written to the destination buffer, or -1, in which case |
284 * written to the destination buffer, or -1, in which case |
290 * error() will return a descriptive result object |
285 * error() will return a descriptive result object |
291 */ |
286 */ |
292 public int generate(int uc, int len, CharBuffer dst) { |
287 public int generate(int uc, int len, CharBuffer dst) { |
293 if (Surrogate.isBMPCodePoint(uc)) { |
288 if (Character.isBmpCodePoint(uc)) { |
294 if (Surrogate.is(uc)) { |
289 char c = (char) uc; |
|
290 if (Character.isSurrogate(c)) { |
295 error = CoderResult.malformedForLength(len); |
291 error = CoderResult.malformedForLength(len); |
296 return -1; |
292 return -1; |
297 } |
293 } |
298 if (dst.remaining() < 1) { |
294 if (dst.remaining() < 1) { |
299 error = CoderResult.OVERFLOW; |
295 error = CoderResult.OVERFLOW; |
300 return -1; |
296 return -1; |
301 } |
297 } |
302 dst.put((char)uc); |
298 dst.put(c); |
303 error = null; |
299 error = null; |
304 return 1; |
300 return 1; |
305 } else if (Character.isSupplementaryCodePoint(uc)) { |
301 } else if (Character.isValidCodePoint(uc)) { |
306 if (dst.remaining() < 2) { |
302 if (dst.remaining() < 2) { |
307 error = CoderResult.OVERFLOW; |
303 error = CoderResult.OVERFLOW; |
308 return -1; |
304 return -1; |
309 } |
305 } |
310 dst.put(Surrogate.high(uc)); |
306 dst.put(Surrogate.high(uc)); |
332 * @returns Either a positive count of the number of UTF-16 characters |
328 * @returns Either a positive count of the number of UTF-16 characters |
333 * written to the destination buffer, or -1, in which case |
329 * written to the destination buffer, or -1, in which case |
334 * error() will return a descriptive result object |
330 * error() will return a descriptive result object |
335 */ |
331 */ |
336 public int generate(int uc, int len, char[] da, int dp, int dl) { |
332 public int generate(int uc, int len, char[] da, int dp, int dl) { |
337 if (Surrogate.isBMPCodePoint(uc)) { |
333 if (Character.isBmpCodePoint(uc)) { |
338 if (Surrogate.is(uc)) { |
334 char c = (char) uc; |
|
335 if (Character.isSurrogate(c)) { |
339 error = CoderResult.malformedForLength(len); |
336 error = CoderResult.malformedForLength(len); |
340 return -1; |
337 return -1; |
341 } |
338 } |
342 if (dl - dp < 1) { |
339 if (dl - dp < 1) { |
343 error = CoderResult.OVERFLOW; |
340 error = CoderResult.OVERFLOW; |
344 return -1; |
341 return -1; |
345 } |
342 } |
346 da[dp] = (char)uc; |
343 da[dp] = c; |
347 error = null; |
344 error = null; |
348 return 1; |
345 return 1; |
349 } else if (Character.isSupplementaryCodePoint(uc)) { |
346 } else if (Character.isValidCodePoint(uc)) { |
350 if (dl - dp < 2) { |
347 if (dl - dp < 2) { |
351 error = CoderResult.OVERFLOW; |
348 error = CoderResult.OVERFLOW; |
352 return -1; |
349 return -1; |
353 } |
350 } |
354 da[dp] = Surrogate.high(uc); |
351 da[dp] = Surrogate.high(uc); |