author | stefank |
Mon, 25 Aug 2014 09:10:13 +0200 | |
changeset 26314 | f8bc1966fb30 |
parent 25859 | 3317bb8137f4 |
child 28969 | f980bee32887 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
14342
8435a30053c1
7197491: update copyright year to match last edit in jdk8 jdk repository
alanb
parents:
13366
diff
changeset
|
2 |
* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package sun.nio.cs.ext; |
|
27 |
||
28 |
import java.nio.ByteBuffer; |
|
29 |
import java.nio.CharBuffer; |
|
30 |
import java.nio.charset.Charset; |
|
31 |
import java.nio.charset.CharsetDecoder; |
|
32 |
import java.nio.charset.CharsetEncoder; |
|
33 |
import java.nio.charset.CoderResult; |
|
34 |
import java.nio.charset.CodingErrorAction; |
|
35 |
import sun.nio.cs.HistoricallyNamedCharset; |
|
36 |
import sun.nio.cs.Surrogate; |
|
37 |
import sun.nio.cs.US_ASCII; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
38 |
import static sun.nio.cs.CharsetMapping.*; |
2 | 39 |
|
40 |
/* |
|
41 |
* Implementation notes: |
|
42 |
* |
|
43 |
* (1)"Standard based" (ASCII, JIS_X_0201 and JIS_X_0208) ISO2022-JP charset |
|
44 |
* is provided by the base implementation of this class. |
|
45 |
* |
|
46 |
* Three Microsoft ISO2022-JP variants, MS50220, MS50221 and MSISO2022JP |
|
47 |
* are provided via subclasses. |
|
48 |
* |
|
49 |
* (2)MS50220 and MS50221 are assumed to work the same way as Microsoft |
|
50 |
* CP50220 and CP50221's 7-bit implementation works by using CP5022X |
|
51 |
* specific JIS0208 and JIS0212 mapping tables (generated via Microsoft's |
|
52 |
* MultiByteToWideChar/WideCharToMultiByte APIs). The only difference |
|
53 |
* between these 2 classes is that MS50220 does not support singlebyte |
|
54 |
* halfwidth kana (Uff61-Uff9f) shiftin mechanism when "encoding", instead |
|
55 |
* these halfwidth kana characters are converted to their fullwidth JIS0208 |
|
56 |
* counterparts. |
|
57 |
* |
|
58 |
* The difference between the standard JIS_X_0208 and JIS_X_0212 mappings |
|
59 |
* and the CP50220/50221 specific are |
|
60 |
* |
|
61 |
* 0208 mapping: |
|
62 |
* 1)0x213d <-> U2015 (compared to U2014) |
|
63 |
* 2)One way mappings for 5 characters below |
|
64 |
* u2225 (ms) -> 0x2142 <-> u2016 (jis) |
|
65 |
* uff0d (ms) -> 0x215d <-> u2212 (jis) |
|
66 |
* uffe0 (ms) -> 0x2171 <-> u00a2 (jis) |
|
67 |
* uffe1 (ms) -> 0x2172 <-> u00a3 (jis) |
|
68 |
* uffe2 (ms) -> 0x224c <-> u00ac (jis) |
|
69 |
* //should consider 0xff5e -> 0x2141 <-> U301c? |
|
70 |
* 3)NEC Row13 0x2d21-0x2d79 |
|
71 |
* 4)85-94 ku <-> UE000,UE3AB (includes NEC selected |
|
72 |
* IBM kanji in 89-92ku) |
|
73 |
* 5)UFF61-UFF9f -> Fullwidth 0208 KANA |
|
74 |
* |
|
75 |
* 0212 mapping: |
|
76 |
* 1)0x2237 <-> UFF5E (Fullwidth Tilde) |
|
77 |
* 2)0x2271 <-> U2116 (Numero Sign) |
|
78 |
* 3)85-94 ku <-> UE3AC - UE757 |
|
79 |
* |
|
80 |
* (3)MSISO2022JP uses a JIS0208 mapping generated from MS932DB.b2c |
|
81 |
* and MS932DB.c2b by converting the SJIS codepoints back to their |
|
82 |
* JIS0208 counterparts. With the exception of |
|
83 |
* |
|
84 |
* (a)Codepoints with a resulting JIS0208 codepoints beyond 0x7e00 are |
|
85 |
* dropped (this includs the IBM Extended Kanji/Non-kanji from 0x9321 |
|
86 |
* to 0x972c) |
|
87 |
* (b)The Unicode codepoints that the IBM Extended Kanji/Non-kanji are |
|
88 |
* mapped to (in MS932) are mapped back to NEC selected IBM Kanji/ |
|
89 |
* Non-kanji area at 0x7921-0x7c7e. |
|
90 |
* |
|
91 |
* Compared to JIS_X_0208 mapping, this MS932 based mapping has |
|
92 |
||
93 |
* (a)different mappings for 7 JIS codepoints |
|
94 |
* 0x213d <-> U2015 |
|
95 |
* 0x2141 <-> UFF5E |
|
96 |
* 0x2142 <-> U2225 |
|
97 |
* 0x215d <-> Uff0d |
|
98 |
* 0x2171 <-> Uffe0 |
|
99 |
* 0x2172 <-> Uffe1 |
|
100 |
* 0x224c <-> Uffe2 |
|
101 |
* (b)added one-way c2b mappings for |
|
102 |
* U00b8 -> 0x2124 |
|
103 |
* U00b7 -> 0x2126 |
|
104 |
* U00af -> 0x2131 |
|
105 |
* U00ab -> 0x2263 |
|
106 |
* U00bb -> 0x2264 |
|
107 |
* U3094 -> 0x2574 |
|
108 |
* U00b5 -> 0x264c |
|
109 |
* (c)NEC Row 13 |
|
110 |
* (d)NEC selected IBM extended Kanji/Non-kanji |
|
111 |
* These codepoints are mapped to the same Unicode codepoints as |
|
112 |
* the MS932 does, while MS50220/50221 maps them to the Unicode |
|
113 |
* private area. |
|
114 |
* |
|
115 |
* # There is also an interesting difference when compared to MS5022X |
|
116 |
* 0208 mapping for JIS codepoint "0x2D60", MS932 maps it to U301d |
|
117 |
* but MS5022X maps it to U301e, obvious MS5022X is wrong, but... |
|
118 |
*/ |
|
119 |
||
120 |
public class ISO2022_JP |
|
121 |
extends Charset |
|
122 |
implements HistoricallyNamedCharset |
|
123 |
{ |
|
124 |
private static final int ASCII = 0; // ESC ( B |
|
125 |
private static final int JISX0201_1976 = 1; // ESC ( J |
|
126 |
private static final int JISX0208_1978 = 2; // ESC $ @ |
|
127 |
private static final int JISX0208_1983 = 3; // ESC $ B |
|
128 |
private static final int JISX0212_1990 = 4; // ESC $ ( D |
|
129 |
private static final int JISX0201_1976_KANA = 5; // ESC ( I |
|
130 |
private static final int SHIFTOUT = 6; |
|
131 |
||
132 |
private static final int ESC = 0x1b; |
|
133 |
private static final int SO = 0x0e; |
|
134 |
private static final int SI = 0x0f; |
|
135 |
||
136 |
public ISO2022_JP() { |
|
137 |
super("ISO-2022-JP", |
|
138 |
ExtendedCharsets.aliasesFor("ISO-2022-JP")); |
|
139 |
} |
|
140 |
||
141 |
protected ISO2022_JP(String canonicalName, |
|
142 |
String[] aliases) { |
|
143 |
super(canonicalName, aliases); |
|
144 |
} |
|
145 |
||
146 |
public String historicalName() { |
|
147 |
return "ISO2022JP"; |
|
148 |
} |
|
149 |
||
150 |
public boolean contains(Charset cs) { |
|
151 |
return ((cs instanceof JIS_X_0201) |
|
152 |
|| (cs instanceof US_ASCII) |
|
153 |
|| (cs instanceof JIS_X_0208) |
|
154 |
|| (cs instanceof ISO2022_JP)); |
|
155 |
} |
|
156 |
||
157 |
public CharsetDecoder newDecoder() { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
158 |
return new Decoder(this); |
2 | 159 |
} |
160 |
||
161 |
public CharsetEncoder newEncoder() { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
162 |
return new Encoder(this); |
2 | 163 |
} |
164 |
||
165 |
protected boolean doSBKANA() { |
|
166 |
return true; |
|
167 |
} |
|
168 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
169 |
static class Decoder extends CharsetDecoder |
2 | 170 |
implements DelegatableDecoder { |
171 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
172 |
final static DoubleByte.Decoder DEC0208 = |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
173 |
(DoubleByte.Decoder)new JIS_X_0208().newDecoder(); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
174 |
|
2 | 175 |
private int currentState; |
176 |
private int previousState; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
177 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
178 |
private DoubleByte.Decoder dec0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
179 |
private DoubleByte.Decoder dec0212; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
180 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
181 |
private Decoder(Charset cs) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
182 |
this(cs, DEC0208, null); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
183 |
} |
2 | 184 |
|
185 |
protected Decoder(Charset cs, |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
186 |
DoubleByte.Decoder dec0208, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
187 |
DoubleByte.Decoder dec0212) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
188 |
super(cs, 0.5f, 1.0f); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
189 |
this.dec0208 = dec0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
190 |
this.dec0212 = dec0212; |
2 | 191 |
currentState = ASCII; |
192 |
previousState = ASCII; |
|
193 |
} |
|
194 |
||
195 |
public void implReset() { |
|
196 |
currentState = ASCII; |
|
197 |
previousState = ASCII; |
|
198 |
} |
|
199 |
||
200 |
private CoderResult decodeArrayLoop(ByteBuffer src, |
|
201 |
CharBuffer dst) |
|
202 |
{ |
|
203 |
int inputSize = 0; |
|
204 |
int b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
205 |
char c = UNMAPPABLE_DECODING; |
2 | 206 |
byte[] sa = src.array(); |
207 |
int sp = src.arrayOffset() + src.position(); |
|
208 |
int sl = src.arrayOffset() + src.limit(); |
|
209 |
assert (sp <= sl); |
|
210 |
sp = (sp <= sl ? sp : sl); |
|
211 |
||
212 |
char[] da = dst.array(); |
|
213 |
int dp = dst.arrayOffset() + dst.position(); |
|
214 |
int dl = dst.arrayOffset() + dst.limit(); |
|
215 |
assert (dp <= dl); |
|
216 |
dp = (dp <= dl ? dp : dl); |
|
217 |
||
218 |
try { |
|
219 |
while (sp < sl) { |
|
220 |
b1 = sa[sp] & 0xff; |
|
221 |
inputSize = 1; |
|
222 |
if ((b1 & 0x80) != 0) { |
|
223 |
return CoderResult.malformedForLength(inputSize); |
|
224 |
} |
|
225 |
if (b1 == ESC || b1 == SO || b1 == SI) { |
|
226 |
if (b1 == ESC) { |
|
227 |
if (sp + inputSize + 2 > sl) |
|
228 |
return CoderResult.UNDERFLOW; |
|
229 |
b2 = sa[sp + inputSize++] & 0xff; |
|
230 |
if (b2 == '(') { |
|
231 |
b3 = sa[sp + inputSize++] & 0xff; |
|
232 |
if (b3 == 'B'){ |
|
233 |
currentState = ASCII; |
|
234 |
} else if (b3 == 'J'){ |
|
235 |
currentState = JISX0201_1976; |
|
236 |
} else if (b3 == 'I'){ |
|
237 |
currentState = JISX0201_1976_KANA; |
|
238 |
} else { |
|
239 |
return CoderResult.malformedForLength(inputSize); |
|
240 |
} |
|
241 |
} else if (b2 == '$'){ |
|
242 |
b3 = sa[sp + inputSize++] & 0xff; |
|
243 |
if (b3 == '@'){ |
|
244 |
currentState = JISX0208_1978; |
|
245 |
} else if (b3 == 'B'){ |
|
246 |
currentState = JISX0208_1983; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
247 |
} else if (b3 == '(' && dec0212 != null) { |
2 | 248 |
if (sp + inputSize + 1 > sl) |
249 |
return CoderResult.UNDERFLOW; |
|
250 |
b4 = sa[sp + inputSize++] & 0xff; |
|
251 |
if (b4 == 'D') { |
|
252 |
currentState = JISX0212_1990; |
|
253 |
} else { |
|
254 |
return CoderResult.malformedForLength(inputSize); |
|
255 |
} |
|
256 |
} else { |
|
257 |
return CoderResult.malformedForLength(inputSize); |
|
258 |
} |
|
259 |
} else { |
|
260 |
return CoderResult.malformedForLength(inputSize); |
|
261 |
} |
|
262 |
} else if (b1 == SO) { |
|
263 |
previousState = currentState; |
|
264 |
currentState = SHIFTOUT; |
|
265 |
} else if (b1 == SI) { |
|
266 |
currentState = previousState; |
|
267 |
} |
|
268 |
sp += inputSize; |
|
269 |
continue; |
|
270 |
} |
|
271 |
if (dp + 1 > dl) |
|
272 |
return CoderResult.OVERFLOW; |
|
273 |
||
274 |
switch (currentState){ |
|
275 |
case ASCII: |
|
276 |
da[dp++] = (char)(b1 & 0xff); |
|
277 |
break; |
|
278 |
case JISX0201_1976: |
|
279 |
switch (b1) { |
|
280 |
case 0x5c: // Yen/tilde substitution |
|
281 |
da[dp++] = '\u00a5'; |
|
282 |
break; |
|
283 |
case 0x7e: |
|
284 |
da[dp++] = '\u203e'; |
|
285 |
break; |
|
286 |
default: |
|
287 |
da[dp++] = (char)b1; |
|
288 |
break; |
|
289 |
} |
|
290 |
break; |
|
291 |
case JISX0208_1978: |
|
292 |
case JISX0208_1983: |
|
293 |
if (sp + inputSize + 1 > sl) |
|
294 |
return CoderResult.UNDERFLOW; |
|
295 |
b2 = sa[sp + inputSize++] & 0xff; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
296 |
c = dec0208.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
297 |
if (c == UNMAPPABLE_DECODING) |
2 | 298 |
return CoderResult.unmappableForLength(inputSize); |
299 |
da[dp++] = c; |
|
300 |
break; |
|
301 |
case JISX0212_1990: |
|
302 |
if (sp + inputSize + 1 > sl) |
|
303 |
return CoderResult.UNDERFLOW; |
|
304 |
b2 = sa[sp + inputSize++] & 0xff; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
305 |
c = dec0212.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
306 |
if (c == UNMAPPABLE_DECODING) |
2 | 307 |
return CoderResult.unmappableForLength(inputSize); |
308 |
da[dp++] = c; |
|
309 |
break; |
|
310 |
case JISX0201_1976_KANA: |
|
311 |
case SHIFTOUT: |
|
312 |
if (b1 > 0x60) { |
|
313 |
return CoderResult.malformedForLength(inputSize); |
|
314 |
} |
|
315 |
da[dp++] = (char)(b1 + 0xff40); |
|
316 |
break; |
|
317 |
} |
|
318 |
sp += inputSize; |
|
319 |
} |
|
320 |
return CoderResult.UNDERFLOW; |
|
321 |
} finally { |
|
322 |
src.position(sp - src.arrayOffset()); |
|
323 |
dst.position(dp - dst.arrayOffset()); |
|
324 |
} |
|
325 |
} |
|
326 |
||
327 |
private CoderResult decodeBufferLoop(ByteBuffer src, |
|
328 |
CharBuffer dst) |
|
329 |
{ |
|
330 |
int mark = src.position(); |
|
331 |
int b1 = 0, b2 = 0, b3 = 0, b4=0; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
332 |
char c = UNMAPPABLE_DECODING; |
2 | 333 |
int inputSize = 0; |
334 |
try { |
|
335 |
while (src.hasRemaining()) { |
|
336 |
b1 = src.get() & 0xff; |
|
337 |
inputSize = 1; |
|
338 |
if ((b1 & 0x80) != 0) |
|
339 |
return CoderResult.malformedForLength(inputSize); |
|
340 |
if (b1 == ESC || b1 == SO || b1 == SI) { |
|
341 |
if (b1 == ESC) { // ESC |
|
342 |
if (src.remaining() < 2) |
|
343 |
return CoderResult.UNDERFLOW; |
|
344 |
b2 = src.get() & 0xff; |
|
345 |
inputSize++; |
|
346 |
if (b2 == '(') { |
|
347 |
b3 = src.get() & 0xff; |
|
348 |
inputSize++; |
|
349 |
if (b3 == 'B'){ |
|
350 |
currentState = ASCII; |
|
351 |
} else if (b3 == 'J'){ |
|
352 |
currentState = JISX0201_1976; |
|
353 |
} else if (b3 == 'I'){ |
|
354 |
currentState = JISX0201_1976_KANA; |
|
355 |
} else { |
|
356 |
return CoderResult.malformedForLength(inputSize); |
|
357 |
} |
|
358 |
} else if (b2 == '$'){ |
|
359 |
b3 = src.get() & 0xff; |
|
360 |
inputSize++; |
|
361 |
if (b3 == '@'){ |
|
362 |
currentState = JISX0208_1978; |
|
363 |
} else if (b3 == 'B'){ |
|
364 |
currentState = JISX0208_1983; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
365 |
} else if (b3 == '(' && dec0212 != null) { |
2 | 366 |
if (!src.hasRemaining()) |
367 |
return CoderResult.UNDERFLOW; |
|
368 |
b4 = src.get() & 0xff; |
|
369 |
inputSize++; |
|
370 |
if (b4 == 'D') { |
|
371 |
currentState = JISX0212_1990; |
|
372 |
} else { |
|
373 |
return CoderResult.malformedForLength(inputSize); |
|
374 |
} |
|
375 |
} else { |
|
376 |
return CoderResult.malformedForLength(inputSize); |
|
377 |
} |
|
378 |
} else { |
|
379 |
return CoderResult.malformedForLength(inputSize); |
|
380 |
} |
|
381 |
} else if (b1 == SO) { |
|
382 |
previousState = currentState; |
|
383 |
currentState = SHIFTOUT; |
|
384 |
} else if (b1 == SI) { // shift back in |
|
385 |
currentState = previousState; |
|
386 |
} |
|
387 |
mark += inputSize; |
|
388 |
continue; |
|
389 |
} |
|
390 |
if (!dst.hasRemaining()) |
|
391 |
return CoderResult.OVERFLOW; |
|
392 |
||
393 |
switch (currentState){ |
|
394 |
case ASCII: |
|
395 |
dst.put((char)(b1 & 0xff)); |
|
396 |
break; |
|
397 |
case JISX0201_1976: |
|
398 |
switch (b1) { |
|
399 |
case 0x5c: // Yen/tilde substitution |
|
400 |
dst.put('\u00a5'); |
|
401 |
break; |
|
402 |
case 0x7e: |
|
403 |
dst.put('\u203e'); |
|
404 |
break; |
|
405 |
default: |
|
406 |
dst.put((char)b1); |
|
407 |
break; |
|
408 |
} |
|
409 |
break; |
|
410 |
case JISX0208_1978: |
|
411 |
case JISX0208_1983: |
|
412 |
if (!src.hasRemaining()) |
|
413 |
return CoderResult.UNDERFLOW; |
|
414 |
b2 = src.get() & 0xff; |
|
415 |
inputSize++; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
416 |
c = dec0208.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
417 |
if (c == UNMAPPABLE_DECODING) |
2 | 418 |
return CoderResult.unmappableForLength(inputSize); |
419 |
dst.put(c); |
|
420 |
break; |
|
421 |
case JISX0212_1990: |
|
422 |
if (!src.hasRemaining()) |
|
423 |
return CoderResult.UNDERFLOW; |
|
424 |
b2 = src.get() & 0xff; |
|
425 |
inputSize++; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
426 |
c = dec0212.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
427 |
if (c == UNMAPPABLE_DECODING) |
2 | 428 |
return CoderResult.unmappableForLength(inputSize); |
429 |
dst.put(c); |
|
430 |
break; |
|
431 |
case JISX0201_1976_KANA: |
|
432 |
case SHIFTOUT: |
|
433 |
if (b1 > 0x60) { |
|
434 |
return CoderResult.malformedForLength(inputSize); |
|
435 |
} |
|
436 |
dst.put((char)(b1 + 0xff40)); |
|
437 |
break; |
|
438 |
} |
|
439 |
mark += inputSize; |
|
440 |
} |
|
441 |
return CoderResult.UNDERFLOW; |
|
442 |
} finally { |
|
443 |
src.position(mark); |
|
444 |
} |
|
445 |
} |
|
446 |
||
447 |
// Make some protected methods public for use by JISAutoDetect |
|
448 |
public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { |
|
449 |
if (src.hasArray() && dst.hasArray()) |
|
450 |
return decodeArrayLoop(src, dst); |
|
451 |
else |
|
452 |
return decodeBufferLoop(src, dst); |
|
453 |
} |
|
454 |
||
455 |
public CoderResult implFlush(CharBuffer out) { |
|
456 |
return super.implFlush(out); |
|
457 |
} |
|
458 |
} |
|
459 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
460 |
static class Encoder extends CharsetEncoder { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
461 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
462 |
final static DoubleByte.Encoder ENC0208 = |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
463 |
(DoubleByte.Encoder)new JIS_X_0208().newEncoder(); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
464 |
|
2 | 465 |
private static byte[] repl = { (byte)0x21, (byte)0x29 }; |
466 |
private int currentMode = ASCII; |
|
467 |
private int replaceMode = JISX0208_1983; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
468 |
private DoubleByte.Encoder enc0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
469 |
private DoubleByte.Encoder enc0212; |
2 | 470 |
private boolean doSBKANA; |
471 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
472 |
private Encoder(Charset cs) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
473 |
this(cs, ENC0208, null, true); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
474 |
} |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
475 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
476 |
Encoder(Charset cs, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
477 |
DoubleByte.Encoder enc0208, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
478 |
DoubleByte.Encoder enc0212, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
479 |
boolean doSBKANA) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
480 |
super(cs, 4.0f, (enc0212 != null)? 9.0f : 8.0f, repl); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
481 |
this.enc0208 = enc0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
482 |
this.enc0212 = enc0212; |
2 | 483 |
this.doSBKANA = doSBKANA; |
484 |
} |
|
485 |
||
486 |
protected int encodeSingle(char inputChar) { |
|
487 |
return -1; |
|
488 |
} |
|
489 |
||
490 |
protected void implReset() { |
|
491 |
currentMode = ASCII; |
|
492 |
} |
|
493 |
||
494 |
protected void implReplaceWith(byte[] newReplacement) { |
|
495 |
/* It's almost impossible to decide which charset they belong |
|
496 |
to. The best thing we can do here is to "guess" based on |
|
497 |
the length of newReplacement. |
|
498 |
*/ |
|
499 |
if (newReplacement.length == 1) { |
|
500 |
replaceMode = ASCII; |
|
501 |
} else if (newReplacement.length == 2) { |
|
502 |
replaceMode = JISX0208_1983; |
|
503 |
} |
|
504 |
} |
|
505 |
||
506 |
protected CoderResult implFlush(ByteBuffer out) { |
|
507 |
if (currentMode != ASCII) { |
|
508 |
if (out.remaining() < 3) |
|
509 |
return CoderResult.OVERFLOW; |
|
510 |
out.put((byte)0x1b); |
|
511 |
out.put((byte)0x28); |
|
512 |
out.put((byte)0x42); |
|
513 |
currentMode = ASCII; |
|
514 |
} |
|
515 |
return CoderResult.UNDERFLOW; |
|
516 |
} |
|
517 |
||
518 |
public boolean canEncode(char c) { |
|
519 |
return ((c <= '\u007F') || |
|
520 |
(c >= 0xFF61 && c <= 0xFF9F) || |
|
521 |
(c == '\u00A5') || |
|
522 |
(c == '\u203E') || |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
523 |
enc0208.canEncode(c) || |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
524 |
(enc0212!=null && enc0212.canEncode(c))); |
2 | 525 |
} |
526 |
||
527 |
private final Surrogate.Parser sgp = new Surrogate.Parser(); |
|
528 |
||
529 |
private CoderResult encodeArrayLoop(CharBuffer src, |
|
530 |
ByteBuffer dst) |
|
531 |
{ |
|
532 |
char[] sa = src.array(); |
|
533 |
int sp = src.arrayOffset() + src.position(); |
|
534 |
int sl = src.arrayOffset() + src.limit(); |
|
535 |
assert (sp <= sl); |
|
536 |
sp = (sp <= sl ? sp : sl); |
|
537 |
byte[] da = dst.array(); |
|
538 |
int dp = dst.arrayOffset() + dst.position(); |
|
539 |
int dl = dst.arrayOffset() + dst.limit(); |
|
540 |
assert (dp <= dl); |
|
541 |
dp = (dp <= dl ? dp : dl); |
|
542 |
||
543 |
try { |
|
544 |
while (sp < sl) { |
|
545 |
char c = sa[sp]; |
|
546 |
if (c <= '\u007F') { |
|
547 |
if (currentMode != ASCII) { |
|
548 |
if (dl - dp < 3) |
|
549 |
return CoderResult.OVERFLOW; |
|
550 |
da[dp++] = (byte)0x1b; |
|
551 |
da[dp++] = (byte)0x28; |
|
552 |
da[dp++] = (byte)0x42; |
|
553 |
currentMode = ASCII; |
|
554 |
} |
|
555 |
if (dl - dp < 1) |
|
556 |
return CoderResult.OVERFLOW; |
|
557 |
da[dp++] = (byte)c; |
|
558 |
} else if (c >= 0xff61 && c <= 0xff9f && doSBKANA) { |
|
559 |
//a single byte kana |
|
560 |
if (currentMode != JISX0201_1976_KANA) { |
|
561 |
if (dl - dp < 3) |
|
562 |
return CoderResult.OVERFLOW; |
|
563 |
da[dp++] = (byte)0x1b; |
|
564 |
da[dp++] = (byte)0x28; |
|
565 |
da[dp++] = (byte)0x49; |
|
566 |
currentMode = JISX0201_1976_KANA; |
|
567 |
} |
|
568 |
if (dl - dp < 1) |
|
569 |
return CoderResult.OVERFLOW; |
|
570 |
da[dp++] = (byte)(c - 0xff40); |
|
571 |
} else if (c == '\u00A5' || c == '\u203E') { |
|
572 |
//backslash or tilde |
|
573 |
if (currentMode != JISX0201_1976) { |
|
574 |
if (dl - dp < 3) |
|
575 |
return CoderResult.OVERFLOW; |
|
576 |
da[dp++] = (byte)0x1b; |
|
577 |
da[dp++] = (byte)0x28; |
|
578 |
da[dp++] = (byte)0x4a; |
|
579 |
currentMode = JISX0201_1976; |
|
580 |
} |
|
581 |
if (dl - dp < 1) |
|
582 |
return CoderResult.OVERFLOW; |
|
583 |
da[dp++] = (c == '\u00A5')?(byte)0x5C:(byte)0x7e; |
|
584 |
} else { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
585 |
int index = enc0208.encodeChar(c); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
586 |
if (index != UNMAPPABLE_ENCODING) { |
2 | 587 |
if (currentMode != JISX0208_1983) { |
588 |
if (dl - dp < 3) |
|
589 |
return CoderResult.OVERFLOW; |
|
590 |
da[dp++] = (byte)0x1b; |
|
591 |
da[dp++] = (byte)0x24; |
|
592 |
da[dp++] = (byte)0x42; |
|
593 |
currentMode = JISX0208_1983; |
|
594 |
} |
|
595 |
if (dl - dp < 2) |
|
596 |
return CoderResult.OVERFLOW; |
|
597 |
da[dp++] = (byte)(index >> 8); |
|
598 |
da[dp++] = (byte)(index & 0xff); |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
599 |
} else if (enc0212 != null && |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
600 |
(index = enc0212.encodeChar(c)) != UNMAPPABLE_ENCODING) { |
2 | 601 |
if (currentMode != JISX0212_1990) { |
602 |
if (dl - dp < 4) |
|
603 |
return CoderResult.OVERFLOW; |
|
604 |
da[dp++] = (byte)0x1b; |
|
605 |
da[dp++] = (byte)0x24; |
|
606 |
da[dp++] = (byte)0x28; |
|
607 |
da[dp++] = (byte)0x44; |
|
608 |
currentMode = JISX0212_1990; |
|
609 |
} |
|
610 |
if (dl - dp < 2) |
|
611 |
return CoderResult.OVERFLOW; |
|
612 |
da[dp++] = (byte)(index >> 8); |
|
613 |
da[dp++] = (byte)(index & 0xff); |
|
614 |
} else { |
|
3714 | 615 |
if (Character.isSurrogate(c) && sgp.parse(c, sa, sp, sl) < 0) |
2 | 616 |
return sgp.error(); |
617 |
if (unmappableCharacterAction() |
|
618 |
== CodingErrorAction.REPLACE |
|
619 |
&& currentMode != replaceMode) { |
|
620 |
if (dl - dp < 3) |
|
621 |
return CoderResult.OVERFLOW; |
|
622 |
if (replaceMode == ASCII) { |
|
623 |
da[dp++] = (byte)0x1b; |
|
624 |
da[dp++] = (byte)0x28; |
|
625 |
da[dp++] = (byte)0x42; |
|
626 |
} else { |
|
627 |
da[dp++] = (byte)0x1b; |
|
628 |
da[dp++] = (byte)0x24; |
|
629 |
da[dp++] = (byte)0x42; |
|
630 |
} |
|
631 |
currentMode = replaceMode; |
|
632 |
} |
|
3714 | 633 |
if (Character.isSurrogate(c)) |
2 | 634 |
return sgp.unmappableResult(); |
635 |
return CoderResult.unmappableForLength(1); |
|
636 |
} |
|
637 |
} |
|
638 |
sp++; |
|
639 |
} |
|
640 |
return CoderResult.UNDERFLOW; |
|
641 |
} finally { |
|
642 |
src.position(sp - src.arrayOffset()); |
|
643 |
dst.position(dp - dst.arrayOffset()); |
|
644 |
} |
|
645 |
} |
|
646 |
||
647 |
private CoderResult encodeBufferLoop(CharBuffer src, |
|
648 |
ByteBuffer dst) |
|
649 |
{ |
|
650 |
int mark = src.position(); |
|
651 |
try { |
|
652 |
while (src.hasRemaining()) { |
|
653 |
char c = src.get(); |
|
654 |
||
655 |
if (c <= '\u007F') { |
|
656 |
if (currentMode != ASCII) { |
|
657 |
if (dst.remaining() < 3) |
|
658 |
return CoderResult.OVERFLOW; |
|
659 |
dst.put((byte)0x1b); |
|
660 |
dst.put((byte)0x28); |
|
661 |
dst.put((byte)0x42); |
|
662 |
currentMode = ASCII; |
|
663 |
} |
|
664 |
if (dst.remaining() < 1) |
|
665 |
return CoderResult.OVERFLOW; |
|
666 |
dst.put((byte)c); |
|
667 |
} else if (c >= 0xff61 && c <= 0xff9f && doSBKANA) { |
|
668 |
//Is it a single byte kana? |
|
669 |
if (currentMode != JISX0201_1976_KANA) { |
|
670 |
if (dst.remaining() < 3) |
|
671 |
return CoderResult.OVERFLOW; |
|
672 |
dst.put((byte)0x1b); |
|
673 |
dst.put((byte)0x28); |
|
674 |
dst.put((byte)0x49); |
|
675 |
currentMode = JISX0201_1976_KANA; |
|
676 |
} |
|
677 |
if (dst.remaining() < 1) |
|
678 |
return CoderResult.OVERFLOW; |
|
679 |
dst.put((byte)(c - 0xff40)); |
|
680 |
} else if (c == '\u00a5' || c == '\u203E') { |
|
681 |
if (currentMode != JISX0201_1976) { |
|
682 |
if (dst.remaining() < 3) |
|
683 |
return CoderResult.OVERFLOW; |
|
684 |
dst.put((byte)0x1b); |
|
685 |
dst.put((byte)0x28); |
|
686 |
dst.put((byte)0x4a); |
|
687 |
currentMode = JISX0201_1976; |
|
688 |
} |
|
689 |
if (dst.remaining() < 1) |
|
690 |
return CoderResult.OVERFLOW; |
|
691 |
dst.put((c == '\u00A5')?(byte)0x5C:(byte)0x7e); |
|
692 |
} else { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
693 |
int index = enc0208.encodeChar(c); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
694 |
if (index != UNMAPPABLE_ENCODING) { |
2 | 695 |
if (currentMode != JISX0208_1983) { |
696 |
if (dst.remaining() < 3) |
|
697 |
return CoderResult.OVERFLOW; |
|
698 |
dst.put((byte)0x1b); |
|
699 |
dst.put((byte)0x24); |
|
700 |
dst.put((byte)0x42); |
|
701 |
currentMode = JISX0208_1983; |
|
702 |
} |
|
703 |
if (dst.remaining() < 2) |
|
704 |
return CoderResult.OVERFLOW; |
|
705 |
dst.put((byte)(index >> 8)); |
|
706 |
dst.put((byte)(index & 0xff)); |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
707 |
} else if (enc0212 != null && |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
708 |
(index = enc0212.encodeChar(c)) != UNMAPPABLE_ENCODING) { |
2 | 709 |
if (currentMode != JISX0212_1990) { |
710 |
if (dst.remaining() < 4) |
|
711 |
return CoderResult.OVERFLOW; |
|
712 |
dst.put((byte)0x1b); |
|
713 |
dst.put((byte)0x24); |
|
714 |
dst.put((byte)0x28); |
|
715 |
dst.put((byte)0x44); |
|
716 |
currentMode = JISX0212_1990; |
|
717 |
} |
|
718 |
if (dst.remaining() < 2) |
|
719 |
return CoderResult.OVERFLOW; |
|
720 |
dst.put((byte)(index >> 8)); |
|
721 |
dst.put((byte)(index & 0xff)); |
|
722 |
} else { |
|
3714 | 723 |
if (Character.isSurrogate(c) && sgp.parse(c, src) < 0) |
2 | 724 |
return sgp.error(); |
725 |
if (unmappableCharacterAction() == CodingErrorAction.REPLACE |
|
726 |
&& currentMode != replaceMode) { |
|
727 |
if (dst.remaining() < 3) |
|
728 |
return CoderResult.OVERFLOW; |
|
729 |
if (replaceMode == ASCII) { |
|
730 |
dst.put((byte)0x1b); |
|
731 |
dst.put((byte)0x28); |
|
732 |
dst.put((byte)0x42); |
|
733 |
} else { |
|
734 |
dst.put((byte)0x1b); |
|
735 |
dst.put((byte)0x24); |
|
736 |
dst.put((byte)0x42); |
|
737 |
} |
|
738 |
currentMode = replaceMode; |
|
739 |
} |
|
3714 | 740 |
if (Character.isSurrogate(c)) |
2 | 741 |
return sgp.unmappableResult(); |
742 |
return CoderResult.unmappableForLength(1); |
|
743 |
} |
|
744 |
} |
|
745 |
mark++; |
|
746 |
} |
|
747 |
return CoderResult.UNDERFLOW; |
|
748 |
} finally { |
|
749 |
src.position(mark); |
|
750 |
} |
|
751 |
} |
|
752 |
||
753 |
protected CoderResult encodeLoop(CharBuffer src, |
|
754 |
ByteBuffer dst) |
|
755 |
{ |
|
756 |
if (src.hasArray() && dst.hasArray()) |
|
757 |
return encodeArrayLoop(src, dst); |
|
758 |
else |
|
759 |
return encodeBufferLoop(src, dst); |
|
760 |
} |
|
761 |
} |
|
762 |
} |