|
1 /* |
|
2 * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package sun.nio.cs.ext; |
|
27 |
|
28 import java.nio.ByteBuffer; |
|
29 import java.nio.CharBuffer; |
|
30 import java.nio.charset.Charset; |
|
31 import java.nio.charset.CharsetDecoder; |
|
32 import java.nio.charset.CharsetEncoder; |
|
33 import java.nio.charset.CoderResult; |
|
34 import java.nio.charset.CodingErrorAction; |
|
35 import sun.nio.cs.DelegatableDecoder; |
|
36 import sun.nio.cs.DoubleByte; |
|
37 import sun.nio.cs.HistoricallyNamedCharset; |
|
38 import sun.nio.cs.Surrogate; |
|
39 import sun.nio.cs.US_ASCII; |
|
40 import sun.nio.cs.*; |
|
41 import static sun.nio.cs.CharsetMapping.*; |
|
42 |
|
43 /* |
|
44 * Implementation notes: |
|
45 * |
|
46 * (1)"Standard based" (ASCII, JIS_X_0201 and JIS_X_0208) ISO2022-JP charset |
|
47 * is provided by the base implementation of this class. |
|
48 * |
|
49 * Three Microsoft ISO2022-JP variants, MS50220, MS50221 and MSISO2022JP |
|
50 * are provided via subclasses. |
|
51 * |
|
52 * (2)MS50220 and MS50221 are assumed to work the same way as Microsoft |
|
53 * CP50220 and CP50221's 7-bit implementation works by using CP5022X |
|
54 * specific JIS0208 and JIS0212 mapping tables (generated via Microsoft's |
|
55 * MultiByteToWideChar/WideCharToMultiByte APIs). The only difference |
|
56 * between these 2 classes is that MS50220 does not support singlebyte |
|
57 * halfwidth kana (Uff61-Uff9f) shiftin mechanism when "encoding", instead |
|
58 * these halfwidth kana characters are converted to their fullwidth JIS0208 |
|
59 * counterparts. |
|
60 * |
|
61 * The difference between the standard JIS_X_0208 and JIS_X_0212 mappings |
|
62 * and the CP50220/50221 specific are |
|
63 * |
|
64 * 0208 mapping: |
|
65 * 1)0x213d <-> U2015 (compared to U2014) |
|
66 * 2)One way mappings for 5 characters below |
|
67 * u2225 (ms) -> 0x2142 <-> u2016 (jis) |
|
68 * uff0d (ms) -> 0x215d <-> u2212 (jis) |
|
69 * uffe0 (ms) -> 0x2171 <-> u00a2 (jis) |
|
70 * uffe1 (ms) -> 0x2172 <-> u00a3 (jis) |
|
71 * uffe2 (ms) -> 0x224c <-> u00ac (jis) |
|
72 * //should consider 0xff5e -> 0x2141 <-> U301c? |
|
73 * 3)NEC Row13 0x2d21-0x2d79 |
|
74 * 4)85-94 ku <-> UE000,UE3AB (includes NEC selected |
|
75 * IBM kanji in 89-92ku) |
|
76 * 5)UFF61-UFF9f -> Fullwidth 0208 KANA |
|
77 * |
|
78 * 0212 mapping: |
|
79 * 1)0x2237 <-> UFF5E (Fullwidth Tilde) |
|
80 * 2)0x2271 <-> U2116 (Numero Sign) |
|
81 * 3)85-94 ku <-> UE3AC - UE757 |
|
82 * |
|
83 * (3)MSISO2022JP uses a JIS0208 mapping generated from MS932DB.b2c |
|
84 * and MS932DB.c2b by converting the SJIS codepoints back to their |
|
85 * JIS0208 counterparts. With the exception of |
|
86 * |
|
87 * (a)Codepoints with a resulting JIS0208 codepoints beyond 0x7e00 are |
|
88 * dropped (this includs the IBM Extended Kanji/Non-kanji from 0x9321 |
|
89 * to 0x972c) |
|
90 * (b)The Unicode codepoints that the IBM Extended Kanji/Non-kanji are |
|
91 * mapped to (in MS932) are mapped back to NEC selected IBM Kanji/ |
|
92 * Non-kanji area at 0x7921-0x7c7e. |
|
93 * |
|
94 * Compared to JIS_X_0208 mapping, this MS932 based mapping has |
|
95 |
|
96 * (a)different mappings for 7 JIS codepoints |
|
97 * 0x213d <-> U2015 |
|
98 * 0x2141 <-> UFF5E |
|
99 * 0x2142 <-> U2225 |
|
100 * 0x215d <-> Uff0d |
|
101 * 0x2171 <-> Uffe0 |
|
102 * 0x2172 <-> Uffe1 |
|
103 * 0x224c <-> Uffe2 |
|
104 * (b)added one-way c2b mappings for |
|
105 * U00b8 -> 0x2124 |
|
106 * U00b7 -> 0x2126 |
|
107 * U00af -> 0x2131 |
|
108 * U00ab -> 0x2263 |
|
109 * U00bb -> 0x2264 |
|
110 * U3094 -> 0x2574 |
|
111 * U00b5 -> 0x264c |
|
112 * (c)NEC Row 13 |
|
113 * (d)NEC selected IBM extended Kanji/Non-kanji |
|
114 * These codepoints are mapped to the same Unicode codepoints as |
|
115 * the MS932 does, while MS50220/50221 maps them to the Unicode |
|
116 * private area. |
|
117 * |
|
118 * # There is also an interesting difference when compared to MS5022X |
|
119 * 0208 mapping for JIS codepoint "0x2D60", MS932 maps it to U301d |
|
120 * but MS5022X maps it to U301e, obvious MS5022X is wrong, but... |
|
121 */ |
|
122 |
|
123 public class ISO2022_JP |
|
124 extends Charset |
|
125 implements HistoricallyNamedCharset |
|
126 { |
|
127 private static final int ASCII = 0; // ESC ( B |
|
128 private static final int JISX0201_1976 = 1; // ESC ( J |
|
129 private static final int JISX0208_1978 = 2; // ESC $ @ |
|
130 private static final int JISX0208_1983 = 3; // ESC $ B |
|
131 private static final int JISX0212_1990 = 4; // ESC $ ( D |
|
132 private static final int JISX0201_1976_KANA = 5; // ESC ( I |
|
133 private static final int SHIFTOUT = 6; |
|
134 |
|
135 private static final int ESC = 0x1b; |
|
136 private static final int SO = 0x0e; |
|
137 private static final int SI = 0x0f; |
|
138 |
|
139 public ISO2022_JP() { |
|
140 super("ISO-2022-JP", |
|
141 ExtendedCharsets.aliasesFor("ISO-2022-JP")); |
|
142 } |
|
143 |
|
144 protected ISO2022_JP(String canonicalName, |
|
145 String[] aliases) { |
|
146 super(canonicalName, aliases); |
|
147 } |
|
148 |
|
149 public String historicalName() { |
|
150 return "ISO2022JP"; |
|
151 } |
|
152 |
|
153 public boolean contains(Charset cs) { |
|
154 return ((cs instanceof JIS_X_0201) |
|
155 || (cs instanceof US_ASCII) |
|
156 || (cs instanceof JIS_X_0208) |
|
157 || (cs instanceof ISO2022_JP)); |
|
158 } |
|
159 |
|
160 public CharsetDecoder newDecoder() { |
|
161 return new Decoder(this); |
|
162 } |
|
163 |
|
164 public CharsetEncoder newEncoder() { |
|
165 return new Encoder(this); |
|
166 } |
|
167 |
|
168 protected boolean doSBKANA() { |
|
169 return true; |
|
170 } |
|
171 |
|
172 static class Decoder extends CharsetDecoder |
|
173 implements DelegatableDecoder { |
|
174 |
|
175 final static DoubleByte.Decoder DEC0208 = |
|
176 (DoubleByte.Decoder)new JIS_X_0208().newDecoder(); |
|
177 |
|
178 private int currentState; |
|
179 private int previousState; |
|
180 |
|
181 private DoubleByte.Decoder dec0208; |
|
182 private DoubleByte.Decoder dec0212; |
|
183 |
|
184 private Decoder(Charset cs) { |
|
185 this(cs, DEC0208, null); |
|
186 } |
|
187 |
|
188 protected Decoder(Charset cs, |
|
189 DoubleByte.Decoder dec0208, |
|
190 DoubleByte.Decoder dec0212) { |
|
191 super(cs, 0.5f, 1.0f); |
|
192 this.dec0208 = dec0208; |
|
193 this.dec0212 = dec0212; |
|
194 currentState = ASCII; |
|
195 previousState = ASCII; |
|
196 } |
|
197 |
|
198 public void implReset() { |
|
199 currentState = ASCII; |
|
200 previousState = ASCII; |
|
201 } |
|
202 |
|
203 private CoderResult decodeArrayLoop(ByteBuffer src, |
|
204 CharBuffer dst) |
|
205 { |
|
206 int inputSize = 0; |
|
207 int b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
|
208 char c = UNMAPPABLE_DECODING; |
|
209 byte[] sa = src.array(); |
|
210 int sp = src.arrayOffset() + src.position(); |
|
211 int sl = src.arrayOffset() + src.limit(); |
|
212 assert (sp <= sl); |
|
213 sp = (sp <= sl ? sp : sl); |
|
214 |
|
215 char[] da = dst.array(); |
|
216 int dp = dst.arrayOffset() + dst.position(); |
|
217 int dl = dst.arrayOffset() + dst.limit(); |
|
218 assert (dp <= dl); |
|
219 dp = (dp <= dl ? dp : dl); |
|
220 |
|
221 try { |
|
222 while (sp < sl) { |
|
223 b1 = sa[sp] & 0xff; |
|
224 inputSize = 1; |
|
225 if ((b1 & 0x80) != 0) { |
|
226 return CoderResult.malformedForLength(inputSize); |
|
227 } |
|
228 if (b1 == ESC || b1 == SO || b1 == SI) { |
|
229 if (b1 == ESC) { |
|
230 if (sp + inputSize + 2 > sl) |
|
231 return CoderResult.UNDERFLOW; |
|
232 b2 = sa[sp + inputSize++] & 0xff; |
|
233 if (b2 == '(') { |
|
234 b3 = sa[sp + inputSize++] & 0xff; |
|
235 if (b3 == 'B'){ |
|
236 currentState = ASCII; |
|
237 } else if (b3 == 'J'){ |
|
238 currentState = JISX0201_1976; |
|
239 } else if (b3 == 'I'){ |
|
240 currentState = JISX0201_1976_KANA; |
|
241 } else { |
|
242 return CoderResult.malformedForLength(inputSize); |
|
243 } |
|
244 } else if (b2 == '$'){ |
|
245 b3 = sa[sp + inputSize++] & 0xff; |
|
246 if (b3 == '@'){ |
|
247 currentState = JISX0208_1978; |
|
248 } else if (b3 == 'B'){ |
|
249 currentState = JISX0208_1983; |
|
250 } else if (b3 == '(' && dec0212 != null) { |
|
251 if (sp + inputSize + 1 > sl) |
|
252 return CoderResult.UNDERFLOW; |
|
253 b4 = sa[sp + inputSize++] & 0xff; |
|
254 if (b4 == 'D') { |
|
255 currentState = JISX0212_1990; |
|
256 } else { |
|
257 return CoderResult.malformedForLength(inputSize); |
|
258 } |
|
259 } else { |
|
260 return CoderResult.malformedForLength(inputSize); |
|
261 } |
|
262 } else { |
|
263 return CoderResult.malformedForLength(inputSize); |
|
264 } |
|
265 } else if (b1 == SO) { |
|
266 previousState = currentState; |
|
267 currentState = SHIFTOUT; |
|
268 } else if (b1 == SI) { |
|
269 currentState = previousState; |
|
270 } |
|
271 sp += inputSize; |
|
272 continue; |
|
273 } |
|
274 if (dp + 1 > dl) |
|
275 return CoderResult.OVERFLOW; |
|
276 |
|
277 switch (currentState){ |
|
278 case ASCII: |
|
279 da[dp++] = (char)(b1 & 0xff); |
|
280 break; |
|
281 case JISX0201_1976: |
|
282 switch (b1) { |
|
283 case 0x5c: // Yen/tilde substitution |
|
284 da[dp++] = '\u00a5'; |
|
285 break; |
|
286 case 0x7e: |
|
287 da[dp++] = '\u203e'; |
|
288 break; |
|
289 default: |
|
290 da[dp++] = (char)b1; |
|
291 break; |
|
292 } |
|
293 break; |
|
294 case JISX0208_1978: |
|
295 case JISX0208_1983: |
|
296 if (sp + inputSize + 1 > sl) |
|
297 return CoderResult.UNDERFLOW; |
|
298 b2 = sa[sp + inputSize++] & 0xff; |
|
299 c = dec0208.decodeDouble(b1,b2); |
|
300 if (c == UNMAPPABLE_DECODING) |
|
301 return CoderResult.unmappableForLength(inputSize); |
|
302 da[dp++] = c; |
|
303 break; |
|
304 case JISX0212_1990: |
|
305 if (sp + inputSize + 1 > sl) |
|
306 return CoderResult.UNDERFLOW; |
|
307 b2 = sa[sp + inputSize++] & 0xff; |
|
308 c = dec0212.decodeDouble(b1,b2); |
|
309 if (c == UNMAPPABLE_DECODING) |
|
310 return CoderResult.unmappableForLength(inputSize); |
|
311 da[dp++] = c; |
|
312 break; |
|
313 case JISX0201_1976_KANA: |
|
314 case SHIFTOUT: |
|
315 if (b1 > 0x60) { |
|
316 return CoderResult.malformedForLength(inputSize); |
|
317 } |
|
318 da[dp++] = (char)(b1 + 0xff40); |
|
319 break; |
|
320 } |
|
321 sp += inputSize; |
|
322 } |
|
323 return CoderResult.UNDERFLOW; |
|
324 } finally { |
|
325 src.position(sp - src.arrayOffset()); |
|
326 dst.position(dp - dst.arrayOffset()); |
|
327 } |
|
328 } |
|
329 |
|
330 private CoderResult decodeBufferLoop(ByteBuffer src, |
|
331 CharBuffer dst) |
|
332 { |
|
333 int mark = src.position(); |
|
334 int b1 = 0, b2 = 0, b3 = 0, b4=0; |
|
335 char c = UNMAPPABLE_DECODING; |
|
336 int inputSize = 0; |
|
337 try { |
|
338 while (src.hasRemaining()) { |
|
339 b1 = src.get() & 0xff; |
|
340 inputSize = 1; |
|
341 if ((b1 & 0x80) != 0) |
|
342 return CoderResult.malformedForLength(inputSize); |
|
343 if (b1 == ESC || b1 == SO || b1 == SI) { |
|
344 if (b1 == ESC) { // ESC |
|
345 if (src.remaining() < 2) |
|
346 return CoderResult.UNDERFLOW; |
|
347 b2 = src.get() & 0xff; |
|
348 inputSize++; |
|
349 if (b2 == '(') { |
|
350 b3 = src.get() & 0xff; |
|
351 inputSize++; |
|
352 if (b3 == 'B'){ |
|
353 currentState = ASCII; |
|
354 } else if (b3 == 'J'){ |
|
355 currentState = JISX0201_1976; |
|
356 } else if (b3 == 'I'){ |
|
357 currentState = JISX0201_1976_KANA; |
|
358 } else { |
|
359 return CoderResult.malformedForLength(inputSize); |
|
360 } |
|
361 } else if (b2 == '$'){ |
|
362 b3 = src.get() & 0xff; |
|
363 inputSize++; |
|
364 if (b3 == '@'){ |
|
365 currentState = JISX0208_1978; |
|
366 } else if (b3 == 'B'){ |
|
367 currentState = JISX0208_1983; |
|
368 } else if (b3 == '(' && dec0212 != null) { |
|
369 if (!src.hasRemaining()) |
|
370 return CoderResult.UNDERFLOW; |
|
371 b4 = src.get() & 0xff; |
|
372 inputSize++; |
|
373 if (b4 == 'D') { |
|
374 currentState = JISX0212_1990; |
|
375 } else { |
|
376 return CoderResult.malformedForLength(inputSize); |
|
377 } |
|
378 } else { |
|
379 return CoderResult.malformedForLength(inputSize); |
|
380 } |
|
381 } else { |
|
382 return CoderResult.malformedForLength(inputSize); |
|
383 } |
|
384 } else if (b1 == SO) { |
|
385 previousState = currentState; |
|
386 currentState = SHIFTOUT; |
|
387 } else if (b1 == SI) { // shift back in |
|
388 currentState = previousState; |
|
389 } |
|
390 mark += inputSize; |
|
391 continue; |
|
392 } |
|
393 if (!dst.hasRemaining()) |
|
394 return CoderResult.OVERFLOW; |
|
395 |
|
396 switch (currentState){ |
|
397 case ASCII: |
|
398 dst.put((char)(b1 & 0xff)); |
|
399 break; |
|
400 case JISX0201_1976: |
|
401 switch (b1) { |
|
402 case 0x5c: // Yen/tilde substitution |
|
403 dst.put('\u00a5'); |
|
404 break; |
|
405 case 0x7e: |
|
406 dst.put('\u203e'); |
|
407 break; |
|
408 default: |
|
409 dst.put((char)b1); |
|
410 break; |
|
411 } |
|
412 break; |
|
413 case JISX0208_1978: |
|
414 case JISX0208_1983: |
|
415 if (!src.hasRemaining()) |
|
416 return CoderResult.UNDERFLOW; |
|
417 b2 = src.get() & 0xff; |
|
418 inputSize++; |
|
419 c = dec0208.decodeDouble(b1,b2); |
|
420 if (c == UNMAPPABLE_DECODING) |
|
421 return CoderResult.unmappableForLength(inputSize); |
|
422 dst.put(c); |
|
423 break; |
|
424 case JISX0212_1990: |
|
425 if (!src.hasRemaining()) |
|
426 return CoderResult.UNDERFLOW; |
|
427 b2 = src.get() & 0xff; |
|
428 inputSize++; |
|
429 c = dec0212.decodeDouble(b1,b2); |
|
430 if (c == UNMAPPABLE_DECODING) |
|
431 return CoderResult.unmappableForLength(inputSize); |
|
432 dst.put(c); |
|
433 break; |
|
434 case JISX0201_1976_KANA: |
|
435 case SHIFTOUT: |
|
436 if (b1 > 0x60) { |
|
437 return CoderResult.malformedForLength(inputSize); |
|
438 } |
|
439 dst.put((char)(b1 + 0xff40)); |
|
440 break; |
|
441 } |
|
442 mark += inputSize; |
|
443 } |
|
444 return CoderResult.UNDERFLOW; |
|
445 } finally { |
|
446 src.position(mark); |
|
447 } |
|
448 } |
|
449 |
|
450 // Make some protected methods public for use by JISAutoDetect |
|
451 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { |
|
452 if (src.hasArray() && dst.hasArray()) |
|
453 return decodeArrayLoop(src, dst); |
|
454 else |
|
455 return decodeBufferLoop(src, dst); |
|
456 } |
|
457 |
|
458 public CoderResult implFlush(CharBuffer out) { |
|
459 return super.implFlush(out); |
|
460 } |
|
461 } |
|
462 |
|
463 static class Encoder extends CharsetEncoder { |
|
464 |
|
465 final static DoubleByte.Encoder ENC0208 = |
|
466 (DoubleByte.Encoder)new JIS_X_0208().newEncoder(); |
|
467 |
|
468 private static byte[] repl = { (byte)0x21, (byte)0x29 }; |
|
469 private int currentMode = ASCII; |
|
470 private int replaceMode = JISX0208_1983; |
|
471 private DoubleByte.Encoder enc0208; |
|
472 private DoubleByte.Encoder enc0212; |
|
473 private boolean doSBKANA; |
|
474 |
|
475 private Encoder(Charset cs) { |
|
476 this(cs, ENC0208, null, true); |
|
477 } |
|
478 |
|
479 Encoder(Charset cs, |
|
480 DoubleByte.Encoder enc0208, |
|
481 DoubleByte.Encoder enc0212, |
|
482 boolean doSBKANA) { |
|
483 super(cs, 4.0f, (enc0212 != null)? 9.0f : 8.0f, repl); |
|
484 this.enc0208 = enc0208; |
|
485 this.enc0212 = enc0212; |
|
486 this.doSBKANA = doSBKANA; |
|
487 } |
|
488 |
|
489 protected int encodeSingle(char inputChar) { |
|
490 return -1; |
|
491 } |
|
492 |
|
493 protected void implReset() { |
|
494 currentMode = ASCII; |
|
495 } |
|
496 |
|
497 protected void implReplaceWith(byte[] newReplacement) { |
|
498 /* It's almost impossible to decide which charset they belong |
|
499 to. The best thing we can do here is to "guess" based on |
|
500 the length of newReplacement. |
|
501 */ |
|
502 if (newReplacement.length == 1) { |
|
503 replaceMode = ASCII; |
|
504 } else if (newReplacement.length == 2) { |
|
505 replaceMode = JISX0208_1983; |
|
506 } |
|
507 } |
|
508 |
|
509 protected CoderResult implFlush(ByteBuffer out) { |
|
510 if (currentMode != ASCII) { |
|
511 if (out.remaining() < 3) |
|
512 return CoderResult.OVERFLOW; |
|
513 out.put((byte)0x1b); |
|
514 out.put((byte)0x28); |
|
515 out.put((byte)0x42); |
|
516 currentMode = ASCII; |
|
517 } |
|
518 return CoderResult.UNDERFLOW; |
|
519 } |
|
520 |
|
521 public boolean canEncode(char c) { |
|
522 return ((c <= '\u007F') || |
|
523 (c >= 0xFF61 && c <= 0xFF9F) || |
|
524 (c == '\u00A5') || |
|
525 (c == '\u203E') || |
|
526 enc0208.canEncode(c) || |
|
527 (enc0212!=null && enc0212.canEncode(c))); |
|
528 } |
|
529 |
|
530 private final Surrogate.Parser sgp = new Surrogate.Parser(); |
|
531 |
|
532 private CoderResult encodeArrayLoop(CharBuffer src, |
|
533 ByteBuffer dst) |
|
534 { |
|
535 char[] sa = src.array(); |
|
536 int sp = src.arrayOffset() + src.position(); |
|
537 int sl = src.arrayOffset() + src.limit(); |
|
538 assert (sp <= sl); |
|
539 sp = (sp <= sl ? sp : sl); |
|
540 byte[] da = dst.array(); |
|
541 int dp = dst.arrayOffset() + dst.position(); |
|
542 int dl = dst.arrayOffset() + dst.limit(); |
|
543 assert (dp <= dl); |
|
544 dp = (dp <= dl ? dp : dl); |
|
545 |
|
546 try { |
|
547 while (sp < sl) { |
|
548 char c = sa[sp]; |
|
549 if (c <= '\u007F') { |
|
550 if (currentMode != ASCII) { |
|
551 if (dl - dp < 3) |
|
552 return CoderResult.OVERFLOW; |
|
553 da[dp++] = (byte)0x1b; |
|
554 da[dp++] = (byte)0x28; |
|
555 da[dp++] = (byte)0x42; |
|
556 currentMode = ASCII; |
|
557 } |
|
558 if (dl - dp < 1) |
|
559 return CoderResult.OVERFLOW; |
|
560 da[dp++] = (byte)c; |
|
561 } else if (c >= 0xff61 && c <= 0xff9f && doSBKANA) { |
|
562 //a single byte kana |
|
563 if (currentMode != JISX0201_1976_KANA) { |
|
564 if (dl - dp < 3) |
|
565 return CoderResult.OVERFLOW; |
|
566 da[dp++] = (byte)0x1b; |
|
567 da[dp++] = (byte)0x28; |
|
568 da[dp++] = (byte)0x49; |
|
569 currentMode = JISX0201_1976_KANA; |
|
570 } |
|
571 if (dl - dp < 1) |
|
572 return CoderResult.OVERFLOW; |
|
573 da[dp++] = (byte)(c - 0xff40); |
|
574 } else if (c == '\u00A5' || c == '\u203E') { |
|
575 //backslash or tilde |
|
576 if (currentMode != JISX0201_1976) { |
|
577 if (dl - dp < 3) |
|
578 return CoderResult.OVERFLOW; |
|
579 da[dp++] = (byte)0x1b; |
|
580 da[dp++] = (byte)0x28; |
|
581 da[dp++] = (byte)0x4a; |
|
582 currentMode = JISX0201_1976; |
|
583 } |
|
584 if (dl - dp < 1) |
|
585 return CoderResult.OVERFLOW; |
|
586 da[dp++] = (c == '\u00A5')?(byte)0x5C:(byte)0x7e; |
|
587 } else { |
|
588 int index = enc0208.encodeChar(c); |
|
589 if (index != UNMAPPABLE_ENCODING) { |
|
590 if (currentMode != JISX0208_1983) { |
|
591 if (dl - dp < 3) |
|
592 return CoderResult.OVERFLOW; |
|
593 da[dp++] = (byte)0x1b; |
|
594 da[dp++] = (byte)0x24; |
|
595 da[dp++] = (byte)0x42; |
|
596 currentMode = JISX0208_1983; |
|
597 } |
|
598 if (dl - dp < 2) |
|
599 return CoderResult.OVERFLOW; |
|
600 da[dp++] = (byte)(index >> 8); |
|
601 da[dp++] = (byte)(index & 0xff); |
|
602 } else if (enc0212 != null && |
|
603 (index = enc0212.encodeChar(c)) != UNMAPPABLE_ENCODING) { |
|
604 if (currentMode != JISX0212_1990) { |
|
605 if (dl - dp < 4) |
|
606 return CoderResult.OVERFLOW; |
|
607 da[dp++] = (byte)0x1b; |
|
608 da[dp++] = (byte)0x24; |
|
609 da[dp++] = (byte)0x28; |
|
610 da[dp++] = (byte)0x44; |
|
611 currentMode = JISX0212_1990; |
|
612 } |
|
613 if (dl - dp < 2) |
|
614 return CoderResult.OVERFLOW; |
|
615 da[dp++] = (byte)(index >> 8); |
|
616 da[dp++] = (byte)(index & 0xff); |
|
617 } else { |
|
618 if (Character.isSurrogate(c) && sgp.parse(c, sa, sp, sl) < 0) |
|
619 return sgp.error(); |
|
620 if (unmappableCharacterAction() |
|
621 == CodingErrorAction.REPLACE |
|
622 && currentMode != replaceMode) { |
|
623 if (dl - dp < 3) |
|
624 return CoderResult.OVERFLOW; |
|
625 if (replaceMode == ASCII) { |
|
626 da[dp++] = (byte)0x1b; |
|
627 da[dp++] = (byte)0x28; |
|
628 da[dp++] = (byte)0x42; |
|
629 } else { |
|
630 da[dp++] = (byte)0x1b; |
|
631 da[dp++] = (byte)0x24; |
|
632 da[dp++] = (byte)0x42; |
|
633 } |
|
634 currentMode = replaceMode; |
|
635 } |
|
636 if (Character.isSurrogate(c)) |
|
637 return sgp.unmappableResult(); |
|
638 return CoderResult.unmappableForLength(1); |
|
639 } |
|
640 } |
|
641 sp++; |
|
642 } |
|
643 return CoderResult.UNDERFLOW; |
|
644 } finally { |
|
645 src.position(sp - src.arrayOffset()); |
|
646 dst.position(dp - dst.arrayOffset()); |
|
647 } |
|
648 } |
|
649 |
|
650 private CoderResult encodeBufferLoop(CharBuffer src, |
|
651 ByteBuffer dst) |
|
652 { |
|
653 int mark = src.position(); |
|
654 try { |
|
655 while (src.hasRemaining()) { |
|
656 char c = src.get(); |
|
657 |
|
658 if (c <= '\u007F') { |
|
659 if (currentMode != ASCII) { |
|
660 if (dst.remaining() < 3) |
|
661 return CoderResult.OVERFLOW; |
|
662 dst.put((byte)0x1b); |
|
663 dst.put((byte)0x28); |
|
664 dst.put((byte)0x42); |
|
665 currentMode = ASCII; |
|
666 } |
|
667 if (dst.remaining() < 1) |
|
668 return CoderResult.OVERFLOW; |
|
669 dst.put((byte)c); |
|
670 } else if (c >= 0xff61 && c <= 0xff9f && doSBKANA) { |
|
671 //Is it a single byte kana? |
|
672 if (currentMode != JISX0201_1976_KANA) { |
|
673 if (dst.remaining() < 3) |
|
674 return CoderResult.OVERFLOW; |
|
675 dst.put((byte)0x1b); |
|
676 dst.put((byte)0x28); |
|
677 dst.put((byte)0x49); |
|
678 currentMode = JISX0201_1976_KANA; |
|
679 } |
|
680 if (dst.remaining() < 1) |
|
681 return CoderResult.OVERFLOW; |
|
682 dst.put((byte)(c - 0xff40)); |
|
683 } else if (c == '\u00a5' || c == '\u203E') { |
|
684 if (currentMode != JISX0201_1976) { |
|
685 if (dst.remaining() < 3) |
|
686 return CoderResult.OVERFLOW; |
|
687 dst.put((byte)0x1b); |
|
688 dst.put((byte)0x28); |
|
689 dst.put((byte)0x4a); |
|
690 currentMode = JISX0201_1976; |
|
691 } |
|
692 if (dst.remaining() < 1) |
|
693 return CoderResult.OVERFLOW; |
|
694 dst.put((c == '\u00A5')?(byte)0x5C:(byte)0x7e); |
|
695 } else { |
|
696 int index = enc0208.encodeChar(c); |
|
697 if (index != UNMAPPABLE_ENCODING) { |
|
698 if (currentMode != JISX0208_1983) { |
|
699 if (dst.remaining() < 3) |
|
700 return CoderResult.OVERFLOW; |
|
701 dst.put((byte)0x1b); |
|
702 dst.put((byte)0x24); |
|
703 dst.put((byte)0x42); |
|
704 currentMode = JISX0208_1983; |
|
705 } |
|
706 if (dst.remaining() < 2) |
|
707 return CoderResult.OVERFLOW; |
|
708 dst.put((byte)(index >> 8)); |
|
709 dst.put((byte)(index & 0xff)); |
|
710 } else if (enc0212 != null && |
|
711 (index = enc0212.encodeChar(c)) != UNMAPPABLE_ENCODING) { |
|
712 if (currentMode != JISX0212_1990) { |
|
713 if (dst.remaining() < 4) |
|
714 return CoderResult.OVERFLOW; |
|
715 dst.put((byte)0x1b); |
|
716 dst.put((byte)0x24); |
|
717 dst.put((byte)0x28); |
|
718 dst.put((byte)0x44); |
|
719 currentMode = JISX0212_1990; |
|
720 } |
|
721 if (dst.remaining() < 2) |
|
722 return CoderResult.OVERFLOW; |
|
723 dst.put((byte)(index >> 8)); |
|
724 dst.put((byte)(index & 0xff)); |
|
725 } else { |
|
726 if (Character.isSurrogate(c) && sgp.parse(c, src) < 0) |
|
727 return sgp.error(); |
|
728 if (unmappableCharacterAction() == CodingErrorAction.REPLACE |
|
729 && currentMode != replaceMode) { |
|
730 if (dst.remaining() < 3) |
|
731 return CoderResult.OVERFLOW; |
|
732 if (replaceMode == ASCII) { |
|
733 dst.put((byte)0x1b); |
|
734 dst.put((byte)0x28); |
|
735 dst.put((byte)0x42); |
|
736 } else { |
|
737 dst.put((byte)0x1b); |
|
738 dst.put((byte)0x24); |
|
739 dst.put((byte)0x42); |
|
740 } |
|
741 currentMode = replaceMode; |
|
742 } |
|
743 if (Character.isSurrogate(c)) |
|
744 return sgp.unmappableResult(); |
|
745 return CoderResult.unmappableForLength(1); |
|
746 } |
|
747 } |
|
748 mark++; |
|
749 } |
|
750 return CoderResult.UNDERFLOW; |
|
751 } finally { |
|
752 src.position(mark); |
|
753 } |
|
754 } |
|
755 |
|
756 protected CoderResult encodeLoop(CharBuffer src, |
|
757 ByteBuffer dst) |
|
758 { |
|
759 if (src.hasArray() && dst.hasArray()) |
|
760 return encodeArrayLoop(src, dst); |
|
761 else |
|
762 return encodeBufferLoop(src, dst); |
|
763 } |
|
764 } |
|
765 } |