author | sherman |
Tue, 30 Aug 2011 11:53:11 -0700 | |
changeset 10419 | 12c063b39232 |
parent 5506 | 202f599c92aa |
child 14342 | 8435a30053c1 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
5506 | 2 |
* Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package sun.reflect; |
|
27 |
||
28 |
/** It is necessary to use a "bootstrap" UTF-8 encoder for encoding |
|
29 |
constant pool entries because the character set converters rely on |
|
30 |
Class.newInstance(). */ |
|
31 |
||
32 |
class UTF8 { |
|
33 |
// This encoder is not quite correct. It does not handle surrogate pairs. |
|
34 |
static byte[] encode(String str) { |
|
35 |
int len = str.length(); |
|
36 |
byte[] res = new byte[utf8Length(str)]; |
|
37 |
int utf8Idx = 0; |
|
38 |
try { |
|
39 |
for (int i = 0; i < len; i++) { |
|
40 |
int c = str.charAt(i) & 0xFFFF; |
|
41 |
if (c >= 0x0001 && c <= 0x007F) { |
|
42 |
res[utf8Idx++] = (byte) c; |
|
43 |
} else if (c == 0x0000 || |
|
44 |
(c >= 0x0080 && c <= 0x07FF)) { |
|
45 |
res[utf8Idx++] = (byte) (0xC0 + (c >> 6)); |
|
46 |
res[utf8Idx++] = (byte) (0x80 + (c & 0x3F)); |
|
47 |
} else { |
|
48 |
res[utf8Idx++] = (byte) (0xE0 + (c >> 12)); |
|
49 |
res[utf8Idx++] = (byte) (0x80 + ((c >> 6) & 0x3F)); |
|
50 |
res[utf8Idx++] = (byte) (0x80 + (c & 0x3F)); |
|
51 |
} |
|
52 |
} |
|
53 |
} catch (ArrayIndexOutOfBoundsException e) { |
|
54 |
throw new InternalError |
|
10419
12c063b39232
7084245: Update usages of InternalError to use exception chaining
sherman
parents:
5506
diff
changeset
|
55 |
("Bug in sun.reflect bootstrap UTF-8 encoder", e); |
2 | 56 |
} |
57 |
return res; |
|
58 |
} |
|
59 |
||
60 |
private static int utf8Length(String str) { |
|
61 |
int len = str.length(); |
|
62 |
int utf8Len = 0; |
|
63 |
for (int i = 0; i < len; i++) { |
|
64 |
int c = str.charAt(i) & 0xFFFF; |
|
65 |
if (c >= 0x0001 && c <= 0x007F) { |
|
66 |
utf8Len += 1; |
|
67 |
} else if (c == 0x0000 || |
|
68 |
(c >= 0x0080 && c <= 0x07FF)) { |
|
69 |
utf8Len += 2; |
|
70 |
} else { |
|
71 |
utf8Len += 3; |
|
72 |
} |
|
73 |
} |
|
74 |
return utf8Len; |
|
75 |
} |
|
76 |
} |