|
1 /* |
|
2 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package sun.nio.cs; |
|
27 |
|
28 import java.nio.Buffer; |
|
29 import java.nio.ByteBuffer; |
|
30 import java.nio.CharBuffer; |
|
31 import java.nio.charset.Charset; |
|
32 import java.nio.charset.CharsetDecoder; |
|
33 import java.nio.charset.CharsetEncoder; |
|
34 import java.nio.charset.CoderResult; |
|
35 import java.nio.charset.CodingErrorAction; |
|
36 |
|
37 /* Legal CESU-8 Byte Sequences |
|
38 * |
|
39 * # Code Points Bits Bit/Byte pattern |
|
40 * 1 7 0xxxxxxx |
|
41 * U+0000..U+007F 00..7F |
|
42 * |
|
43 * 2 11 110xxxxx 10xxxxxx |
|
44 * U+0080..U+07FF C2..DF 80..BF |
|
45 * |
|
46 * 3 16 1110xxxx 10xxxxxx 10xxxxxx |
|
47 * U+0800..U+0FFF E0 A0..BF 80..BF |
|
48 * U+1000..U+FFFF E1..EF 80..BF 80..BF |
|
49 * |
|
50 */ |
|
51 |
|
52 class CESU_8 extends Unicode |
|
53 { |
|
54 public CESU_8() { |
|
55 super("CESU-8", StandardCharsets.aliases_CESU_8()); |
|
56 } |
|
57 |
|
58 public String historicalName() { |
|
59 return "CESU8"; |
|
60 } |
|
61 |
|
62 public CharsetDecoder newDecoder() { |
|
63 return new Decoder(this); |
|
64 } |
|
65 |
|
66 public CharsetEncoder newEncoder() { |
|
67 return new Encoder(this); |
|
68 } |
|
69 |
|
70 private static final void updatePositions(Buffer src, int sp, |
|
71 Buffer dst, int dp) { |
|
72 src.position(sp - src.arrayOffset()); |
|
73 dst.position(dp - dst.arrayOffset()); |
|
74 } |
|
75 |
|
76 private static class Decoder extends CharsetDecoder |
|
77 implements ArrayDecoder { |
|
78 private Decoder(Charset cs) { |
|
79 super(cs, 1.0f, 1.0f); |
|
80 } |
|
81 |
|
82 private static boolean isNotContinuation(int b) { |
|
83 return (b & 0xc0) != 0x80; |
|
84 } |
|
85 |
|
86 // [E0] [A0..BF] [80..BF] |
|
87 // [E1..EF] [80..BF] [80..BF] |
|
88 private static boolean isMalformed3(int b1, int b2, int b3) { |
|
89 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || |
|
90 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; |
|
91 } |
|
92 |
|
93 // only used when there is only one byte left in src buffer |
|
94 private static boolean isMalformed3_2(int b1, int b2) { |
|
95 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || |
|
96 (b2 & 0xc0) != 0x80; |
|
97 } |
|
98 |
|
99 |
|
100 // [F0] [90..BF] [80..BF] [80..BF] |
|
101 // [F1..F3] [80..BF] [80..BF] [80..BF] |
|
102 // [F4] [80..8F] [80..BF] [80..BF] |
|
103 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] |
|
104 // will be checked by Character.isSupplementaryCodePoint(uc) |
|
105 private static boolean isMalformed4(int b2, int b3, int b4) { |
|
106 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || |
|
107 (b4 & 0xc0) != 0x80; |
|
108 } |
|
109 |
|
110 // only used when there is less than 4 bytes left in src buffer |
|
111 private static boolean isMalformed4_2(int b1, int b2) { |
|
112 return (b1 == 0xf0 && b2 == 0x90) || |
|
113 (b2 & 0xc0) != 0x80; |
|
114 } |
|
115 |
|
116 private static boolean isMalformed4_3(int b3) { |
|
117 return (b3 & 0xc0) != 0x80; |
|
118 } |
|
119 |
|
120 private static CoderResult malformedN(ByteBuffer src, int nb) { |
|
121 switch (nb) { |
|
122 case 1: |
|
123 case 2: // always 1 |
|
124 return CoderResult.malformedForLength(1); |
|
125 case 3: |
|
126 int b1 = src.get(); |
|
127 int b2 = src.get(); // no need to lookup b3 |
|
128 return CoderResult.malformedForLength( |
|
129 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || |
|
130 isNotContinuation(b2)) ? 1 : 2); |
|
131 case 4: // we don't care the speed here |
|
132 b1 = src.get() & 0xff; |
|
133 b2 = src.get() & 0xff; |
|
134 if (b1 > 0xf4 || |
|
135 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || |
|
136 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || |
|
137 isNotContinuation(b2)) |
|
138 return CoderResult.malformedForLength(1); |
|
139 if (isNotContinuation(src.get())) |
|
140 return CoderResult.malformedForLength(2); |
|
141 return CoderResult.malformedForLength(3); |
|
142 default: |
|
143 assert false; |
|
144 return null; |
|
145 } |
|
146 } |
|
147 |
|
148 private static CoderResult malformed(ByteBuffer src, int sp, |
|
149 CharBuffer dst, int dp, |
|
150 int nb) |
|
151 { |
|
152 src.position(sp - src.arrayOffset()); |
|
153 CoderResult cr = malformedN(src, nb); |
|
154 updatePositions(src, sp, dst, dp); |
|
155 return cr; |
|
156 } |
|
157 |
|
158 |
|
159 private static CoderResult malformed(ByteBuffer src, |
|
160 int mark, int nb) |
|
161 { |
|
162 src.position(mark); |
|
163 CoderResult cr = malformedN(src, nb); |
|
164 src.position(mark); |
|
165 return cr; |
|
166 } |
|
167 |
|
168 private static CoderResult malformedForLength(ByteBuffer src, |
|
169 int sp, |
|
170 CharBuffer dst, |
|
171 int dp, |
|
172 int malformedNB) |
|
173 { |
|
174 updatePositions(src, sp, dst, dp); |
|
175 return CoderResult.malformedForLength(malformedNB); |
|
176 } |
|
177 |
|
178 private static CoderResult malformedForLength(ByteBuffer src, |
|
179 int mark, |
|
180 int malformedNB) |
|
181 { |
|
182 src.position(mark); |
|
183 return CoderResult.malformedForLength(malformedNB); |
|
184 } |
|
185 |
|
186 |
|
187 private static CoderResult xflow(Buffer src, int sp, int sl, |
|
188 Buffer dst, int dp, int nb) { |
|
189 updatePositions(src, sp, dst, dp); |
|
190 return (nb == 0 || sl - sp < nb) |
|
191 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; |
|
192 } |
|
193 |
|
194 private static CoderResult xflow(Buffer src, int mark, int nb) { |
|
195 src.position(mark); |
|
196 return (nb == 0 || src.remaining() < nb) |
|
197 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; |
|
198 } |
|
199 |
|
200 private CoderResult decodeArrayLoop(ByteBuffer src, |
|
201 CharBuffer dst) |
|
202 { |
|
203 // This method is optimized for ASCII input. |
|
204 byte[] sa = src.array(); |
|
205 int sp = src.arrayOffset() + src.position(); |
|
206 int sl = src.arrayOffset() + src.limit(); |
|
207 |
|
208 char[] da = dst.array(); |
|
209 int dp = dst.arrayOffset() + dst.position(); |
|
210 int dl = dst.arrayOffset() + dst.limit(); |
|
211 int dlASCII = dp + Math.min(sl - sp, dl - dp); |
|
212 |
|
213 // ASCII only loop |
|
214 while (dp < dlASCII && sa[sp] >= 0) |
|
215 da[dp++] = (char) sa[sp++]; |
|
216 while (sp < sl) { |
|
217 int b1 = sa[sp]; |
|
218 if (b1 >= 0) { |
|
219 // 1 byte, 7 bits: 0xxxxxxx |
|
220 if (dp >= dl) |
|
221 return xflow(src, sp, sl, dst, dp, 1); |
|
222 da[dp++] = (char) b1; |
|
223 sp++; |
|
224 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { |
|
225 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx |
|
226 if (sl - sp < 2 || dp >= dl) |
|
227 return xflow(src, sp, sl, dst, dp, 2); |
|
228 int b2 = sa[sp + 1]; |
|
229 if (isNotContinuation(b2)) |
|
230 return malformedForLength(src, sp, dst, dp, 1); |
|
231 da[dp++] = (char) (((b1 << 6) ^ b2) |
|
232 ^ |
|
233 (((byte) 0xC0 << 6) ^ |
|
234 ((byte) 0x80 << 0))); |
|
235 sp += 2; |
|
236 } else if ((b1 >> 4) == -2) { |
|
237 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx |
|
238 int srcRemaining = sl - sp; |
|
239 if (srcRemaining < 3 || dp >= dl) { |
|
240 if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1])) |
|
241 return malformedForLength(src, sp, dst, dp, 1); |
|
242 return xflow(src, sp, sl, dst, dp, 3); |
|
243 } |
|
244 int b2 = sa[sp + 1]; |
|
245 int b3 = sa[sp + 2]; |
|
246 if (isMalformed3(b1, b2, b3)) |
|
247 return malformed(src, sp, dst, dp, 3); |
|
248 da[dp++] = (char) |
|
249 ((b1 << 12) ^ |
|
250 (b2 << 6) ^ |
|
251 (b3 ^ |
|
252 (((byte) 0xE0 << 12) ^ |
|
253 ((byte) 0x80 << 6) ^ |
|
254 ((byte) 0x80 << 0)))); |
|
255 sp += 3; |
|
256 } else { |
|
257 return malformed(src, sp, dst, dp, 1); |
|
258 } |
|
259 } |
|
260 return xflow(src, sp, sl, dst, dp, 0); |
|
261 } |
|
262 |
|
263 private CoderResult decodeBufferLoop(ByteBuffer src, |
|
264 CharBuffer dst) |
|
265 { |
|
266 int mark = src.position(); |
|
267 int limit = src.limit(); |
|
268 while (mark < limit) { |
|
269 int b1 = src.get(); |
|
270 if (b1 >= 0) { |
|
271 // 1 byte, 7 bits: 0xxxxxxx |
|
272 if (dst.remaining() < 1) |
|
273 return xflow(src, mark, 1); // overflow |
|
274 dst.put((char) b1); |
|
275 mark++; |
|
276 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { |
|
277 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx |
|
278 if (limit - mark < 2|| dst.remaining() < 1) |
|
279 return xflow(src, mark, 2); |
|
280 int b2 = src.get(); |
|
281 if (isNotContinuation(b2)) |
|
282 return malformedForLength(src, mark, 1); |
|
283 dst.put((char) (((b1 << 6) ^ b2) |
|
284 ^ |
|
285 (((byte) 0xC0 << 6) ^ |
|
286 ((byte) 0x80 << 0)))); |
|
287 mark += 2; |
|
288 } else if ((b1 >> 4) == -2) { |
|
289 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx |
|
290 int srcRemaining = limit - mark; |
|
291 if (srcRemaining < 3 || dst.remaining() < 1) { |
|
292 if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) |
|
293 return malformedForLength(src, mark, 1); |
|
294 return xflow(src, mark, 3); |
|
295 } |
|
296 int b2 = src.get(); |
|
297 int b3 = src.get(); |
|
298 if (isMalformed3(b1, b2, b3)) |
|
299 return malformed(src, mark, 3); |
|
300 dst.put((char) |
|
301 ((b1 << 12) ^ |
|
302 (b2 << 6) ^ |
|
303 (b3 ^ |
|
304 (((byte) 0xE0 << 12) ^ |
|
305 ((byte) 0x80 << 6) ^ |
|
306 ((byte) 0x80 << 0))))); |
|
307 mark += 3; |
|
308 } else { |
|
309 return malformed(src, mark, 1); |
|
310 } |
|
311 } |
|
312 return xflow(src, mark, 0); |
|
313 } |
|
314 |
|
315 protected CoderResult decodeLoop(ByteBuffer src, |
|
316 CharBuffer dst) |
|
317 { |
|
318 if (src.hasArray() && dst.hasArray()) |
|
319 return decodeArrayLoop(src, dst); |
|
320 else |
|
321 return decodeBufferLoop(src, dst); |
|
322 } |
|
323 |
|
324 private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp) |
|
325 { |
|
326 if (bb == null) |
|
327 bb = ByteBuffer.wrap(ba); |
|
328 bb.position(sp); |
|
329 return bb; |
|
330 } |
|
331 |
|
332 // returns -1 if there is/are malformed byte(s) and the |
|
333 // "action" for malformed input is not REPLACE. |
|
334 public int decode(byte[] sa, int sp, int len, char[] da) { |
|
335 final int sl = sp + len; |
|
336 int dp = 0; |
|
337 int dlASCII = Math.min(len, da.length); |
|
338 ByteBuffer bb = null; // only necessary if malformed |
|
339 |
|
340 // ASCII only optimized loop |
|
341 while (dp < dlASCII && sa[sp] >= 0) |
|
342 da[dp++] = (char) sa[sp++]; |
|
343 |
|
344 while (sp < sl) { |
|
345 int b1 = sa[sp++]; |
|
346 if (b1 >= 0) { |
|
347 // 1 byte, 7 bits: 0xxxxxxx |
|
348 da[dp++] = (char) b1; |
|
349 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { |
|
350 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx |
|
351 if (sp < sl) { |
|
352 int b2 = sa[sp++]; |
|
353 if (isNotContinuation(b2)) { |
|
354 if (malformedInputAction() != CodingErrorAction.REPLACE) |
|
355 return -1; |
|
356 da[dp++] = replacement().charAt(0); |
|
357 sp--; // malformedN(bb, 2) always returns 1 |
|
358 } else { |
|
359 da[dp++] = (char) (((b1 << 6) ^ b2)^ |
|
360 (((byte) 0xC0 << 6) ^ |
|
361 ((byte) 0x80 << 0))); |
|
362 } |
|
363 continue; |
|
364 } |
|
365 if (malformedInputAction() != CodingErrorAction.REPLACE) |
|
366 return -1; |
|
367 da[dp++] = replacement().charAt(0); |
|
368 return dp; |
|
369 } else if ((b1 >> 4) == -2) { |
|
370 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx |
|
371 if (sp + 1 < sl) { |
|
372 int b2 = sa[sp++]; |
|
373 int b3 = sa[sp++]; |
|
374 if (isMalformed3(b1, b2, b3)) { |
|
375 if (malformedInputAction() != CodingErrorAction.REPLACE) |
|
376 return -1; |
|
377 da[dp++] = replacement().charAt(0); |
|
378 sp -=3; |
|
379 bb = getByteBuffer(bb, sa, sp); |
|
380 sp += malformedN(bb, 3).length(); |
|
381 } else { |
|
382 da[dp++] = (char)((b1 << 12) ^ |
|
383 (b2 << 6) ^ |
|
384 (b3 ^ |
|
385 (((byte) 0xE0 << 12) ^ |
|
386 ((byte) 0x80 << 6) ^ |
|
387 ((byte) 0x80 << 0)))); |
|
388 } |
|
389 continue; |
|
390 } |
|
391 if (malformedInputAction() != CodingErrorAction.REPLACE) |
|
392 return -1; |
|
393 if (sp < sl && isMalformed3_2(b1, sa[sp])) { |
|
394 da[dp++] = replacement().charAt(0); |
|
395 continue; |
|
396 |
|
397 } |
|
398 da[dp++] = replacement().charAt(0); |
|
399 return dp; |
|
400 } else { |
|
401 if (malformedInputAction() != CodingErrorAction.REPLACE) |
|
402 return -1; |
|
403 da[dp++] = replacement().charAt(0); |
|
404 } |
|
405 } |
|
406 return dp; |
|
407 } |
|
408 } |
|
409 |
|
410 private static class Encoder extends CharsetEncoder |
|
411 implements ArrayEncoder { |
|
412 |
|
413 private Encoder(Charset cs) { |
|
414 super(cs, 1.1f, 3.0f); |
|
415 } |
|
416 |
|
417 public boolean canEncode(char c) { |
|
418 return !Character.isSurrogate(c); |
|
419 } |
|
420 |
|
421 public boolean isLegalReplacement(byte[] repl) { |
|
422 return ((repl.length == 1 && repl[0] >= 0) || |
|
423 super.isLegalReplacement(repl)); |
|
424 } |
|
425 |
|
426 private static CoderResult overflow(CharBuffer src, int sp, |
|
427 ByteBuffer dst, int dp) { |
|
428 updatePositions(src, sp, dst, dp); |
|
429 return CoderResult.OVERFLOW; |
|
430 } |
|
431 |
|
432 private static CoderResult overflow(CharBuffer src, int mark) { |
|
433 src.position(mark); |
|
434 return CoderResult.OVERFLOW; |
|
435 } |
|
436 |
|
437 private static void to3Bytes(byte[] da, int dp, char c) { |
|
438 da[dp] = (byte)(0xe0 | ((c >> 12))); |
|
439 da[dp + 1] = (byte)(0x80 | ((c >> 6) & 0x3f)); |
|
440 da[dp + 2] = (byte)(0x80 | (c & 0x3f)); |
|
441 } |
|
442 |
|
443 private static void to3Bytes(ByteBuffer dst, char c) { |
|
444 dst.put((byte)(0xe0 | ((c >> 12)))); |
|
445 dst.put((byte)(0x80 | ((c >> 6) & 0x3f))); |
|
446 dst.put((byte)(0x80 | (c & 0x3f))); |
|
447 } |
|
448 |
|
449 private Surrogate.Parser sgp; |
|
450 private char[] c2; |
|
451 private CoderResult encodeArrayLoop(CharBuffer src, |
|
452 ByteBuffer dst) |
|
453 { |
|
454 char[] sa = src.array(); |
|
455 int sp = src.arrayOffset() + src.position(); |
|
456 int sl = src.arrayOffset() + src.limit(); |
|
457 |
|
458 byte[] da = dst.array(); |
|
459 int dp = dst.arrayOffset() + dst.position(); |
|
460 int dl = dst.arrayOffset() + dst.limit(); |
|
461 int dlASCII = dp + Math.min(sl - sp, dl - dp); |
|
462 |
|
463 // ASCII only loop |
|
464 while (dp < dlASCII && sa[sp] < '\u0080') |
|
465 da[dp++] = (byte) sa[sp++]; |
|
466 while (sp < sl) { |
|
467 char c = sa[sp]; |
|
468 if (c < 0x80) { |
|
469 // Have at most seven bits |
|
470 if (dp >= dl) |
|
471 return overflow(src, sp, dst, dp); |
|
472 da[dp++] = (byte)c; |
|
473 } else if (c < 0x800) { |
|
474 // 2 bytes, 11 bits |
|
475 if (dl - dp < 2) |
|
476 return overflow(src, sp, dst, dp); |
|
477 da[dp++] = (byte)(0xc0 | (c >> 6)); |
|
478 da[dp++] = (byte)(0x80 | (c & 0x3f)); |
|
479 } else if (Character.isSurrogate(c)) { |
|
480 // Have a surrogate pair |
|
481 if (sgp == null) |
|
482 sgp = new Surrogate.Parser(); |
|
483 int uc = sgp.parse(c, sa, sp, sl); |
|
484 if (uc < 0) { |
|
485 updatePositions(src, sp, dst, dp); |
|
486 return sgp.error(); |
|
487 } |
|
488 if (dl - dp < 6) |
|
489 return overflow(src, sp, dst, dp); |
|
490 to3Bytes(da, dp, Character.highSurrogate(uc)); |
|
491 dp += 3; |
|
492 to3Bytes(da, dp, Character.lowSurrogate(uc)); |
|
493 dp += 3; |
|
494 sp++; // 2 chars |
|
495 } else { |
|
496 // 3 bytes, 16 bits |
|
497 if (dl - dp < 3) |
|
498 return overflow(src, sp, dst, dp); |
|
499 to3Bytes(da, dp, c); |
|
500 dp += 3; |
|
501 } |
|
502 sp++; |
|
503 } |
|
504 updatePositions(src, sp, dst, dp); |
|
505 return CoderResult.UNDERFLOW; |
|
506 } |
|
507 |
|
508 private CoderResult encodeBufferLoop(CharBuffer src, |
|
509 ByteBuffer dst) |
|
510 { |
|
511 int mark = src.position(); |
|
512 while (src.hasRemaining()) { |
|
513 char c = src.get(); |
|
514 if (c < 0x80) { |
|
515 // Have at most seven bits |
|
516 if (!dst.hasRemaining()) |
|
517 return overflow(src, mark); |
|
518 dst.put((byte)c); |
|
519 } else if (c < 0x800) { |
|
520 // 2 bytes, 11 bits |
|
521 if (dst.remaining() < 2) |
|
522 return overflow(src, mark); |
|
523 dst.put((byte)(0xc0 | (c >> 6))); |
|
524 dst.put((byte)(0x80 | (c & 0x3f))); |
|
525 } else if (Character.isSurrogate(c)) { |
|
526 // Have a surrogate pair |
|
527 if (sgp == null) |
|
528 sgp = new Surrogate.Parser(); |
|
529 int uc = sgp.parse(c, src); |
|
530 if (uc < 0) { |
|
531 src.position(mark); |
|
532 return sgp.error(); |
|
533 } |
|
534 if (dst.remaining() < 6) |
|
535 return overflow(src, mark); |
|
536 to3Bytes(dst, Character.highSurrogate(uc)); |
|
537 to3Bytes(dst, Character.lowSurrogate(uc)); |
|
538 mark++; // 2 chars |
|
539 } else { |
|
540 // 3 bytes, 16 bits |
|
541 if (dst.remaining() < 3) |
|
542 return overflow(src, mark); |
|
543 to3Bytes(dst, c); |
|
544 } |
|
545 mark++; |
|
546 } |
|
547 src.position(mark); |
|
548 return CoderResult.UNDERFLOW; |
|
549 } |
|
550 |
|
551 protected final CoderResult encodeLoop(CharBuffer src, |
|
552 ByteBuffer dst) |
|
553 { |
|
554 if (src.hasArray() && dst.hasArray()) |
|
555 return encodeArrayLoop(src, dst); |
|
556 else |
|
557 return encodeBufferLoop(src, dst); |
|
558 } |
|
559 |
|
560 // returns -1 if there is malformed char(s) and the |
|
561 // "action" for malformed input is not REPLACE. |
|
562 public int encode(char[] sa, int sp, int len, byte[] da) { |
|
563 int sl = sp + len; |
|
564 int dp = 0; |
|
565 int dlASCII = dp + Math.min(len, da.length); |
|
566 |
|
567 // ASCII only optimized loop |
|
568 while (dp < dlASCII && sa[sp] < '\u0080') |
|
569 da[dp++] = (byte) sa[sp++]; |
|
570 |
|
571 while (sp < sl) { |
|
572 char c = sa[sp++]; |
|
573 if (c < 0x80) { |
|
574 // Have at most seven bits |
|
575 da[dp++] = (byte)c; |
|
576 } else if (c < 0x800) { |
|
577 // 2 bytes, 11 bits |
|
578 da[dp++] = (byte)(0xc0 | (c >> 6)); |
|
579 da[dp++] = (byte)(0x80 | (c & 0x3f)); |
|
580 } else if (Character.isSurrogate(c)) { |
|
581 if (sgp == null) |
|
582 sgp = new Surrogate.Parser(); |
|
583 int uc = sgp.parse(c, sa, sp - 1, sl); |
|
584 if (uc < 0) { |
|
585 if (malformedInputAction() != CodingErrorAction.REPLACE) |
|
586 return -1; |
|
587 da[dp++] = replacement()[0]; |
|
588 } else { |
|
589 to3Bytes(da, dp, Character.highSurrogate(uc)); |
|
590 dp += 3; |
|
591 to3Bytes(da, dp, Character.lowSurrogate(uc)); |
|
592 dp += 3; |
|
593 sp++; // 2 chars |
|
594 } |
|
595 } else { |
|
596 // 3 bytes, 16 bits |
|
597 to3Bytes(da, dp, c); |
|
598 dp += 3; |
|
599 } |
|
600 } |
|
601 return dp; |
|
602 } |
|
603 } |
|
604 } |