22 */ |
22 */ |
23 package com.sun.org.apache.xml.internal.security.c14n.implementations; |
23 package com.sun.org.apache.xml.internal.security.c14n.implementations; |
24 |
24 |
25 import java.io.IOException; |
25 import java.io.IOException; |
26 import java.io.OutputStream; |
26 import java.io.OutputStream; |
|
27 import java.security.AccessController; |
|
28 import java.security.PrivilegedAction; |
27 import java.util.Map; |
29 import java.util.Map; |
28 |
30 |
29 public class UtfHelpper { |
31 public final class UtfHelpper { |
30 |
32 |
31 static final void writeByte( |
33 /** |
|
34 * Revert to the old behavior (version 2 or before), i.e. surrogate pairs characters becomes |
|
35 * '??' in output. Set system property com.sun.org.apache.xml.internal.security.c14n.oldUtf8=true if you want |
|
36 * to verify signatures generated by version 2 or before that contains 32 bit chars in the |
|
37 * XML document. |
|
38 */ |
|
39 private static final boolean OLD_UTF8 = |
|
40 AccessController.doPrivileged((PrivilegedAction<Boolean>) |
|
41 () -> Boolean.getBoolean("com.sun.org.apache.xml.internal.security.c14n.oldUtf8")); |
|
42 |
|
43 private UtfHelpper() { |
|
44 // complete |
|
45 } |
|
46 |
|
47 public static void writeByte( |
32 final String str, |
48 final String str, |
33 final OutputStream out, |
49 final OutputStream out, |
34 Map<String, byte[]> cache |
50 Map<String, byte[]> cache |
35 ) throws IOException { |
51 ) throws IOException { |
36 byte[] result = cache.get(str); |
52 byte[] result = cache.get(str); |
40 } |
56 } |
41 |
57 |
42 out.write(result); |
58 out.write(result); |
43 } |
59 } |
44 |
60 |
45 static final void writeCharToUtf8(final char c, final OutputStream out) throws IOException { |
61 public static void writeCodePointToUtf8(final int c, final OutputStream out) throws IOException { |
|
62 if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) { |
|
63 // valid code point: c >= 0x0000 && c <= 0x10FFFF |
|
64 out.write(0x3f); |
|
65 return; |
|
66 } |
|
67 if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
68 // version 2 or before output 2 question mark characters for 32 bit chars |
|
69 out.write(0x3f); |
|
70 out.write(0x3f); |
|
71 return; |
|
72 } |
|
73 |
|
74 if (c < 0x80) { |
|
75 // 0x00000000 - 0x0000007F |
|
76 // 0xxxxxxx |
|
77 out.write(c); |
|
78 return; |
|
79 } |
|
80 byte extraByte = 0; |
|
81 if (c < 0x800) { |
|
82 // 0x00000080 - 0x000007FF |
|
83 // 110xxxxx 10xxxxxx |
|
84 extraByte = 1; |
|
85 } else if (c < 0x10000) { |
|
86 // 0x00000800 - 0x0000FFFF |
|
87 // 1110xxxx 10xxxxxx 10xxxxxx |
|
88 extraByte = 2; |
|
89 } else if (c < 0x200000) { |
|
90 // 0x00010000 - 0x001FFFFF |
|
91 // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx |
|
92 extraByte = 3; |
|
93 } else if (c < 0x4000000) { |
|
94 // 0x00200000 - 0x03FFFFFF |
|
95 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
|
96 // already outside valid Character range, just for completeness |
|
97 extraByte = 4; |
|
98 } else if (c <= 0x7FFFFFFF) { |
|
99 // 0x04000000 - 0x7FFFFFFF |
|
100 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
|
101 // already outside valid Character range, just for completeness |
|
102 extraByte = 5; |
|
103 } else { |
|
104 // 0x80000000 - 0xFFFFFFFF |
|
105 // case not possible as java has no unsigned int |
|
106 out.write(0x3f); |
|
107 return; |
|
108 } |
|
109 |
|
110 byte write; |
|
111 int shift = 6 * extraByte; |
|
112 write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift)); |
|
113 out.write(write); |
|
114 for (int i = extraByte - 1; i >= 0; i--) { |
|
115 shift -= 6; |
|
116 write = (byte)(0x80 | ((c >>> shift) & 0x3F)); |
|
117 out.write(write); |
|
118 } |
|
119 } |
|
120 |
|
121 @Deprecated |
|
122 public static void writeCharToUtf8(final char c, final OutputStream out) throws IOException { |
46 if (c < 0x80) { |
123 if (c < 0x80) { |
47 out.write(c); |
124 out.write(c); |
48 return; |
125 return; |
49 } |
126 } |
50 if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { |
127 if (c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) { |
51 //No Surrogates in sun java |
128 //No Surrogates in sun java |
52 out.write(0x3f); |
129 out.write(0x3f); |
53 return; |
130 return; |
54 } |
131 } |
55 int bias; |
132 int bias; |
57 char ch; |
134 char ch; |
58 if (c > 0x07FF) { |
135 if (c > 0x07FF) { |
59 ch = (char)(c>>>12); |
136 ch = (char)(c>>>12); |
60 write = 0xE0; |
137 write = 0xE0; |
61 if (ch > 0) { |
138 if (ch > 0) { |
62 write |= (ch & 0x0F); |
139 write |= ch & 0x0F; |
63 } |
140 } |
64 out.write(write); |
141 out.write(write); |
65 write = 0x80; |
142 write = 0x80; |
66 bias = 0x3F; |
143 bias = 0x3F; |
67 } else { |
144 } else { |
68 write = 0xC0; |
145 write = 0xC0; |
69 bias = 0x1F; |
146 bias = 0x1F; |
70 } |
147 } |
71 ch = (char)(c>>>6); |
148 ch = (char)(c>>>6); |
72 if (ch > 0) { |
149 if (ch > 0) { |
73 write |= (ch & bias); |
150 write |= ch & bias; |
74 } |
151 } |
75 out.write(write); |
152 out.write(write); |
76 out.write(0x80 | ((c) & 0x3F)); |
153 out.write(0x80 | ((c) & 0x3F)); |
77 |
154 |
78 } |
155 } |
79 |
156 |
80 static final void writeStringToUtf8( |
157 public static void writeStringToUtf8( |
81 final String str, |
158 final String str, final OutputStream out |
82 final OutputStream out |
159 ) throws IOException { |
83 ) throws IOException{ |
|
84 final int length = str.length(); |
160 final int length = str.length(); |
85 int i = 0; |
161 int i = 0; |
86 char c; |
162 int c; |
87 while (i < length) { |
163 while (i < length) { |
88 c = str.charAt(i++); |
164 c = str.codePointAt(i); |
|
165 i += Character.charCount(c); |
|
166 if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) { |
|
167 // valid code point: c >= 0x0000 && c <= 0x10FFFF |
|
168 out.write(0x3f); |
|
169 continue; |
|
170 } |
|
171 if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
172 // version 2 or before output 2 question mark characters for 32 bit chars |
|
173 out.write(0x3f); |
|
174 out.write(0x3f); |
|
175 continue; |
|
176 } |
89 if (c < 0x80) { |
177 if (c < 0x80) { |
90 out.write(c); |
178 out.write(c); |
91 continue; |
179 continue; |
92 } |
180 } |
93 if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { |
181 byte extraByte = 0; |
94 //No Surrogates in sun java |
182 if (c < 0x800) { |
|
183 // 0x00000080 - 0x000007FF |
|
184 // 110xxxxx 10xxxxxx |
|
185 extraByte = 1; |
|
186 } else if (c < 0x10000) { |
|
187 // 0x00000800 - 0x0000FFFF |
|
188 // 1110xxxx 10xxxxxx 10xxxxxx |
|
189 extraByte = 2; |
|
190 } else if (c < 0x200000) { |
|
191 // 0x00010000 - 0x001FFFFF |
|
192 // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx |
|
193 extraByte = 3; |
|
194 } else if (c < 0x4000000) { |
|
195 // 0x00200000 - 0x03FFFFFF |
|
196 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
|
197 // already outside valid Character range, just for completeness |
|
198 extraByte = 4; |
|
199 } else if (c <= 0x7FFFFFFF) { |
|
200 // 0x04000000 - 0x7FFFFFFF |
|
201 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
|
202 // already outside valid Character range, just for completeness |
|
203 extraByte = 5; |
|
204 } else { |
|
205 // 0x80000000 - 0xFFFFFFFF |
|
206 // case not possible as java has no unsigned int |
95 out.write(0x3f); |
207 out.write(0x3f); |
96 continue; |
208 continue; |
97 } |
209 } |
98 char ch; |
210 byte write; |
99 int bias; |
211 int shift = 6 * extraByte; |
100 int write; |
212 write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift)); |
101 if (c > 0x07FF) { |
213 out.write(write); |
102 ch = (char)(c>>>12); |
214 for (int j = extraByte - 1; j >= 0; j--) { |
103 write = 0xE0; |
215 shift -= 6; |
104 if (ch > 0) { |
216 write = (byte)(0x80 | ((c >>> shift) & 0x3F)); |
105 write |= (ch & 0x0F); |
|
106 } |
|
107 out.write(write); |
217 out.write(write); |
108 write = 0x80; |
218 } |
109 bias = 0x3F; |
219 |
110 } else { |
220 } |
111 write = 0xC0; |
221 |
112 bias = 0x1F; |
222 } |
113 } |
223 |
114 ch = (char)(c>>>6); |
224 public static byte[] getStringInUtf8(final String str) { |
115 if (ch > 0) { |
|
116 write |= (ch & bias); |
|
117 } |
|
118 out.write(write); |
|
119 out.write(0x80 | ((c) & 0x3F)); |
|
120 |
|
121 } |
|
122 |
|
123 } |
|
124 |
|
125 public static final byte[] getStringInUtf8(final String str) { |
|
126 final int length = str.length(); |
225 final int length = str.length(); |
127 boolean expanded = false; |
226 boolean expanded = false; |
128 byte[] result = new byte[length]; |
227 byte[] result = new byte[length]; |
129 int i = 0; |
228 int i = 0; |
130 int out = 0; |
229 int out = 0; |
131 char c; |
230 int c; |
132 while (i < length) { |
231 while (i < length) { |
133 c = str.charAt(i++); |
232 c = str.codePointAt(i); |
|
233 i += Character.charCount(c); |
|
234 if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) { |
|
235 // valid code point: c >= 0x0000 && c <= 0x10FFFF |
|
236 result[out++] = (byte)0x3f; |
|
237 continue; |
|
238 } |
|
239 if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
240 // version 2 or before output 2 question mark characters for 32 bit chars |
|
241 result[out++] = (byte)0x3f; |
|
242 result[out++] = (byte)0x3f; |
|
243 continue; |
|
244 } |
134 if (c < 0x80) { |
245 if (c < 0x80) { |
135 result[out++] = (byte)c; |
246 result[out++] = (byte)c; |
136 continue; |
247 continue; |
137 } |
248 } |
138 if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) { |
|
139 //No Surrogates in sun java |
|
140 result[out++] = 0x3f; |
|
141 continue; |
|
142 } |
|
143 if (!expanded) { |
249 if (!expanded) { |
144 byte newResult[] = new byte[3*length]; |
250 byte newResult[] = new byte[6*length]; |
145 System.arraycopy(result, 0, newResult, 0, out); |
251 System.arraycopy(result, 0, newResult, 0, out); |
146 result = newResult; |
252 result = newResult; |
147 expanded = true; |
253 expanded = true; |
148 } |
254 } |
149 char ch; |
255 byte extraByte = 0; |
150 int bias; |
256 if (c < 0x800) { |
|
257 // 0x00000080 - 0x000007FF |
|
258 // 110xxxxx 10xxxxxx |
|
259 extraByte = 1; |
|
260 } else if (c < 0x10000) { |
|
261 // 0x00000800 - 0x0000FFFF |
|
262 // 1110xxxx 10xxxxxx 10xxxxxx |
|
263 extraByte = 2; |
|
264 } else if (c < 0x200000) { |
|
265 // 0x00010000 - 0x001FFFFF |
|
266 // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx |
|
267 extraByte = 3; |
|
268 } else if (c < 0x4000000) { |
|
269 // 0x00200000 - 0x03FFFFFF |
|
270 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
|
271 // already outside valid Character range, just for completeness |
|
272 extraByte = 4; |
|
273 } else if (c <= 0x7FFFFFFF) { |
|
274 // 0x04000000 - 0x7FFFFFFF |
|
275 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
|
276 // already outside valid Character range, just for completeness |
|
277 extraByte = 5; |
|
278 } else { |
|
279 // 0x80000000 - 0xFFFFFFFF |
|
280 // case not possible as java has no unsigned int |
|
281 result[out++] = 0x3f; |
|
282 continue; |
|
283 } |
151 byte write; |
284 byte write; |
152 if (c > 0x07FF) { |
285 int shift = 6 * extraByte; |
153 ch = (char)(c>>>12); |
286 write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift)); |
154 write = (byte)0xE0; |
287 result[out++] = write; |
155 if (ch > 0) { |
288 for (int j = extraByte - 1; j >= 0; j--) { |
156 write |= (ch & 0x0F); |
289 shift -= 6; |
157 } |
290 write = (byte)(0x80 | ((c >>> shift) & 0x3F)); |
158 result[out++] = write; |
291 result[out++] = write; |
159 write = (byte)0x80; |
292 } |
160 bias = 0x3F; |
|
161 } else { |
|
162 write = (byte)0xC0; |
|
163 bias = 0x1F; |
|
164 } |
|
165 ch = (char)(c>>>6); |
|
166 if (ch > 0) { |
|
167 write |= (ch & bias); |
|
168 } |
|
169 result[out++] = write; |
|
170 result[out++] = (byte)(0x80 | ((c) & 0x3F)); |
|
171 } |
293 } |
172 if (expanded) { |
294 if (expanded) { |
173 byte newResult[] = new byte[out]; |
295 byte newResult[] = new byte[out]; |
174 System.arraycopy(result, 0, newResult, 0, out); |
296 System.arraycopy(result, 0, newResult, 0, out); |
175 result = newResult; |
297 result = newResult; |
176 } |
298 } |
177 return result; |
299 return result; |
178 } |
300 } |
179 |
|
180 } |
301 } |