author | jwilhelm |
Thu, 03 Oct 2019 07:10:36 +0200 | |
changeset 58449 | e606e9b6ba7a |
parent 52378 | fb71a4bc010d |
permissions | -rw-r--r-- |
2 | 1 |
/* |
14342
8435a30053c1
7197491: update copyright year to match last edit in jdk8 jdk repository
alanb
parents:
13366
diff
changeset
|
2 |
* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package sun.nio.cs.ext; |
|
27 |
||
28 |
import java.nio.ByteBuffer; |
|
29 |
import java.nio.CharBuffer; |
|
30 |
import java.nio.charset.Charset; |
|
31 |
import java.nio.charset.CharsetDecoder; |
|
32 |
import java.nio.charset.CharsetEncoder; |
|
33 |
import java.nio.charset.CoderResult; |
|
34 |
import java.nio.charset.CodingErrorAction; |
|
28969
f980bee32887
8073152: Update Standard/ExtendedCharsets to work with module system
sherman
parents:
25859
diff
changeset
|
35 |
import sun.nio.cs.DelegatableDecoder; |
f980bee32887
8073152: Update Standard/ExtendedCharsets to work with module system
sherman
parents:
25859
diff
changeset
|
36 |
import sun.nio.cs.DoubleByte; |
2 | 37 |
import sun.nio.cs.HistoricallyNamedCharset; |
38 |
import sun.nio.cs.Surrogate; |
|
39 |
import sun.nio.cs.US_ASCII; |
|
28969
f980bee32887
8073152: Update Standard/ExtendedCharsets to work with module system
sherman
parents:
25859
diff
changeset
|
40 |
import sun.nio.cs.*; |
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
41 |
import static sun.nio.cs.CharsetMapping.*; |
2 | 42 |
|
43 |
/* |
|
44 |
* Implementation notes: |
|
45 |
* |
|
46 |
* (1)"Standard based" (ASCII, JIS_X_0201 and JIS_X_0208) ISO2022-JP charset |
|
47 |
* is provided by the base implementation of this class. |
|
48 |
* |
|
49 |
* Three Microsoft ISO2022-JP variants, MS50220, MS50221 and MSISO2022JP |
|
50 |
* are provided via subclasses. |
|
51 |
* |
|
52 |
* (2)MS50220 and MS50221 are assumed to work the same way as Microsoft |
|
53 |
* CP50220 and CP50221's 7-bit implementation works by using CP5022X |
|
54 |
* specific JIS0208 and JIS0212 mapping tables (generated via Microsoft's |
|
55 |
* MultiByteToWideChar/WideCharToMultiByte APIs). The only difference |
|
56 |
* between these 2 classes is that MS50220 does not support singlebyte |
|
57 |
* halfwidth kana (Uff61-Uff9f) shiftin mechanism when "encoding", instead |
|
58 |
* these halfwidth kana characters are converted to their fullwidth JIS0208 |
|
59 |
* counterparts. |
|
60 |
* |
|
61 |
* The difference between the standard JIS_X_0208 and JIS_X_0212 mappings |
|
62 |
* and the CP50220/50221 specific are |
|
63 |
* |
|
64 |
* 0208 mapping: |
|
65 |
* 1)0x213d <-> U2015 (compared to U2014) |
|
66 |
* 2)One way mappings for 5 characters below |
|
67 |
* u2225 (ms) -> 0x2142 <-> u2016 (jis) |
|
68 |
* uff0d (ms) -> 0x215d <-> u2212 (jis) |
|
69 |
* uffe0 (ms) -> 0x2171 <-> u00a2 (jis) |
|
70 |
* uffe1 (ms) -> 0x2172 <-> u00a3 (jis) |
|
71 |
* uffe2 (ms) -> 0x224c <-> u00ac (jis) |
|
72 |
* //should consider 0xff5e -> 0x2141 <-> U301c? |
|
73 |
* 3)NEC Row13 0x2d21-0x2d79 |
|
74 |
* 4)85-94 ku <-> UE000,UE3AB (includes NEC selected |
|
75 |
* IBM kanji in 89-92ku) |
|
76 |
* 5)UFF61-UFF9f -> Fullwidth 0208 KANA |
|
77 |
* |
|
78 |
* 0212 mapping: |
|
79 |
* 1)0x2237 <-> UFF5E (Fullwidth Tilde) |
|
80 |
* 2)0x2271 <-> U2116 (Numero Sign) |
|
81 |
* 3)85-94 ku <-> UE3AC - UE757 |
|
82 |
* |
|
83 |
* (3)MSISO2022JP uses a JIS0208 mapping generated from MS932DB.b2c |
|
84 |
* and MS932DB.c2b by converting the SJIS codepoints back to their |
|
85 |
* JIS0208 counterparts. With the exception of |
|
86 |
* |
|
87 |
* (a)Codepoints with a resulting JIS0208 codepoints beyond 0x7e00 are |
|
88 |
* dropped (this includs the IBM Extended Kanji/Non-kanji from 0x9321 |
|
89 |
* to 0x972c) |
|
90 |
* (b)The Unicode codepoints that the IBM Extended Kanji/Non-kanji are |
|
91 |
* mapped to (in MS932) are mapped back to NEC selected IBM Kanji/ |
|
92 |
* Non-kanji area at 0x7921-0x7c7e. |
|
93 |
* |
|
94 |
* Compared to JIS_X_0208 mapping, this MS932 based mapping has |
|
95 |
||
96 |
* (a)different mappings for 7 JIS codepoints |
|
97 |
* 0x213d <-> U2015 |
|
98 |
* 0x2141 <-> UFF5E |
|
99 |
* 0x2142 <-> U2225 |
|
100 |
* 0x215d <-> Uff0d |
|
101 |
* 0x2171 <-> Uffe0 |
|
102 |
* 0x2172 <-> Uffe1 |
|
103 |
* 0x224c <-> Uffe2 |
|
104 |
* (b)added one-way c2b mappings for |
|
105 |
* U00b8 -> 0x2124 |
|
106 |
* U00b7 -> 0x2126 |
|
107 |
* U00af -> 0x2131 |
|
108 |
* U00ab -> 0x2263 |
|
109 |
* U00bb -> 0x2264 |
|
110 |
* U3094 -> 0x2574 |
|
111 |
* U00b5 -> 0x264c |
|
112 |
* (c)NEC Row 13 |
|
113 |
* (d)NEC selected IBM extended Kanji/Non-kanji |
|
114 |
* These codepoints are mapped to the same Unicode codepoints as |
|
115 |
* the MS932 does, while MS50220/50221 maps them to the Unicode |
|
116 |
* private area. |
|
117 |
* |
|
118 |
* # There is also an interesting difference when compared to MS5022X |
|
119 |
* 0208 mapping for JIS codepoint "0x2D60", MS932 maps it to U301d |
|
120 |
* but MS5022X maps it to U301e, obvious MS5022X is wrong, but... |
|
121 |
*/ |
|
122 |
||
123 |
public class ISO2022_JP |
|
124 |
extends Charset |
|
125 |
implements HistoricallyNamedCharset |
|
126 |
{ |
|
127 |
private static final int ASCII = 0; // ESC ( B |
|
128 |
private static final int JISX0201_1976 = 1; // ESC ( J |
|
129 |
private static final int JISX0208_1978 = 2; // ESC $ @ |
|
130 |
private static final int JISX0208_1983 = 3; // ESC $ B |
|
131 |
private static final int JISX0212_1990 = 4; // ESC $ ( D |
|
132 |
private static final int JISX0201_1976_KANA = 5; // ESC ( I |
|
133 |
private static final int SHIFTOUT = 6; |
|
134 |
||
135 |
private static final int ESC = 0x1b; |
|
136 |
private static final int SO = 0x0e; |
|
137 |
private static final int SI = 0x0f; |
|
138 |
||
139 |
public ISO2022_JP() { |
|
140 |
super("ISO-2022-JP", |
|
141 |
ExtendedCharsets.aliasesFor("ISO-2022-JP")); |
|
142 |
} |
|
143 |
||
144 |
protected ISO2022_JP(String canonicalName, |
|
145 |
String[] aliases) { |
|
146 |
super(canonicalName, aliases); |
|
147 |
} |
|
148 |
||
149 |
public String historicalName() { |
|
150 |
return "ISO2022JP"; |
|
151 |
} |
|
152 |
||
153 |
public boolean contains(Charset cs) { |
|
154 |
return ((cs instanceof JIS_X_0201) |
|
155 |
|| (cs instanceof US_ASCII) |
|
156 |
|| (cs instanceof JIS_X_0208) |
|
157 |
|| (cs instanceof ISO2022_JP)); |
|
158 |
} |
|
159 |
||
160 |
public CharsetDecoder newDecoder() { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
161 |
return new Decoder(this); |
2 | 162 |
} |
163 |
||
164 |
public CharsetEncoder newEncoder() { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
165 |
return new Encoder(this); |
2 | 166 |
} |
167 |
||
168 |
protected boolean doSBKANA() { |
|
169 |
return true; |
|
170 |
} |
|
171 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
172 |
static class Decoder extends CharsetDecoder |
2 | 173 |
implements DelegatableDecoder { |
174 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
175 |
final static DoubleByte.Decoder DEC0208 = |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
176 |
(DoubleByte.Decoder)new JIS_X_0208().newDecoder(); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
177 |
|
2 | 178 |
private int currentState; |
179 |
private int previousState; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
180 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
181 |
private DoubleByte.Decoder dec0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
182 |
private DoubleByte.Decoder dec0212; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
183 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
184 |
private Decoder(Charset cs) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
185 |
this(cs, DEC0208, null); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
186 |
} |
2 | 187 |
|
188 |
protected Decoder(Charset cs, |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
189 |
DoubleByte.Decoder dec0208, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
190 |
DoubleByte.Decoder dec0212) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
191 |
super(cs, 0.5f, 1.0f); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
192 |
this.dec0208 = dec0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
193 |
this.dec0212 = dec0212; |
2 | 194 |
currentState = ASCII; |
195 |
previousState = ASCII; |
|
196 |
} |
|
197 |
||
198 |
public void implReset() { |
|
199 |
currentState = ASCII; |
|
200 |
previousState = ASCII; |
|
201 |
} |
|
202 |
||
203 |
private CoderResult decodeArrayLoop(ByteBuffer src, |
|
204 |
CharBuffer dst) |
|
205 |
{ |
|
206 |
int inputSize = 0; |
|
207 |
int b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
208 |
char c = UNMAPPABLE_DECODING; |
2 | 209 |
byte[] sa = src.array(); |
210 |
int sp = src.arrayOffset() + src.position(); |
|
211 |
int sl = src.arrayOffset() + src.limit(); |
|
212 |
assert (sp <= sl); |
|
213 |
sp = (sp <= sl ? sp : sl); |
|
214 |
||
215 |
char[] da = dst.array(); |
|
216 |
int dp = dst.arrayOffset() + dst.position(); |
|
217 |
int dl = dst.arrayOffset() + dst.limit(); |
|
218 |
assert (dp <= dl); |
|
219 |
dp = (dp <= dl ? dp : dl); |
|
220 |
||
221 |
try { |
|
222 |
while (sp < sl) { |
|
223 |
b1 = sa[sp] & 0xff; |
|
224 |
inputSize = 1; |
|
225 |
if ((b1 & 0x80) != 0) { |
|
226 |
return CoderResult.malformedForLength(inputSize); |
|
227 |
} |
|
228 |
if (b1 == ESC || b1 == SO || b1 == SI) { |
|
229 |
if (b1 == ESC) { |
|
230 |
if (sp + inputSize + 2 > sl) |
|
231 |
return CoderResult.UNDERFLOW; |
|
232 |
b2 = sa[sp + inputSize++] & 0xff; |
|
233 |
if (b2 == '(') { |
|
234 |
b3 = sa[sp + inputSize++] & 0xff; |
|
235 |
if (b3 == 'B'){ |
|
236 |
currentState = ASCII; |
|
237 |
} else if (b3 == 'J'){ |
|
238 |
currentState = JISX0201_1976; |
|
239 |
} else if (b3 == 'I'){ |
|
240 |
currentState = JISX0201_1976_KANA; |
|
241 |
} else { |
|
242 |
return CoderResult.malformedForLength(inputSize); |
|
243 |
} |
|
244 |
} else if (b2 == '$'){ |
|
245 |
b3 = sa[sp + inputSize++] & 0xff; |
|
246 |
if (b3 == '@'){ |
|
247 |
currentState = JISX0208_1978; |
|
248 |
} else if (b3 == 'B'){ |
|
249 |
currentState = JISX0208_1983; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
250 |
} else if (b3 == '(' && dec0212 != null) { |
2 | 251 |
if (sp + inputSize + 1 > sl) |
252 |
return CoderResult.UNDERFLOW; |
|
253 |
b4 = sa[sp + inputSize++] & 0xff; |
|
254 |
if (b4 == 'D') { |
|
255 |
currentState = JISX0212_1990; |
|
256 |
} else { |
|
257 |
return CoderResult.malformedForLength(inputSize); |
|
258 |
} |
|
259 |
} else { |
|
260 |
return CoderResult.malformedForLength(inputSize); |
|
261 |
} |
|
262 |
} else { |
|
263 |
return CoderResult.malformedForLength(inputSize); |
|
264 |
} |
|
265 |
} else if (b1 == SO) { |
|
266 |
previousState = currentState; |
|
267 |
currentState = SHIFTOUT; |
|
268 |
} else if (b1 == SI) { |
|
269 |
currentState = previousState; |
|
270 |
} |
|
271 |
sp += inputSize; |
|
272 |
continue; |
|
273 |
} |
|
274 |
if (dp + 1 > dl) |
|
275 |
return CoderResult.OVERFLOW; |
|
276 |
||
277 |
switch (currentState){ |
|
278 |
case ASCII: |
|
279 |
da[dp++] = (char)(b1 & 0xff); |
|
280 |
break; |
|
281 |
case JISX0201_1976: |
|
282 |
switch (b1) { |
|
283 |
case 0x5c: // Yen/tilde substitution |
|
284 |
da[dp++] = '\u00a5'; |
|
285 |
break; |
|
286 |
case 0x7e: |
|
287 |
da[dp++] = '\u203e'; |
|
288 |
break; |
|
289 |
default: |
|
290 |
da[dp++] = (char)b1; |
|
291 |
break; |
|
292 |
} |
|
293 |
break; |
|
294 |
case JISX0208_1978: |
|
295 |
case JISX0208_1983: |
|
296 |
if (sp + inputSize + 1 > sl) |
|
297 |
return CoderResult.UNDERFLOW; |
|
298 |
b2 = sa[sp + inputSize++] & 0xff; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
299 |
c = dec0208.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
300 |
if (c == UNMAPPABLE_DECODING) |
2 | 301 |
return CoderResult.unmappableForLength(inputSize); |
302 |
da[dp++] = c; |
|
303 |
break; |
|
304 |
case JISX0212_1990: |
|
305 |
if (sp + inputSize + 1 > sl) |
|
306 |
return CoderResult.UNDERFLOW; |
|
307 |
b2 = sa[sp + inputSize++] & 0xff; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
308 |
c = dec0212.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
309 |
if (c == UNMAPPABLE_DECODING) |
2 | 310 |
return CoderResult.unmappableForLength(inputSize); |
311 |
da[dp++] = c; |
|
312 |
break; |
|
313 |
case JISX0201_1976_KANA: |
|
314 |
case SHIFTOUT: |
|
52378
fb71a4bc010d
8211382: ISO2022JP and GB18030 NIO converter issues
itakiguchi
parents:
47216
diff
changeset
|
315 |
if (b1 > 0x5f) { |
2 | 316 |
return CoderResult.malformedForLength(inputSize); |
317 |
} |
|
318 |
da[dp++] = (char)(b1 + 0xff40); |
|
319 |
break; |
|
320 |
} |
|
321 |
sp += inputSize; |
|
322 |
} |
|
323 |
return CoderResult.UNDERFLOW; |
|
324 |
} finally { |
|
325 |
src.position(sp - src.arrayOffset()); |
|
326 |
dst.position(dp - dst.arrayOffset()); |
|
327 |
} |
|
328 |
} |
|
329 |
||
330 |
private CoderResult decodeBufferLoop(ByteBuffer src, |
|
331 |
CharBuffer dst) |
|
332 |
{ |
|
333 |
int mark = src.position(); |
|
334 |
int b1 = 0, b2 = 0, b3 = 0, b4=0; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
335 |
char c = UNMAPPABLE_DECODING; |
2 | 336 |
int inputSize = 0; |
337 |
try { |
|
338 |
while (src.hasRemaining()) { |
|
339 |
b1 = src.get() & 0xff; |
|
340 |
inputSize = 1; |
|
341 |
if ((b1 & 0x80) != 0) |
|
342 |
return CoderResult.malformedForLength(inputSize); |
|
343 |
if (b1 == ESC || b1 == SO || b1 == SI) { |
|
344 |
if (b1 == ESC) { // ESC |
|
345 |
if (src.remaining() < 2) |
|
346 |
return CoderResult.UNDERFLOW; |
|
347 |
b2 = src.get() & 0xff; |
|
348 |
inputSize++; |
|
349 |
if (b2 == '(') { |
|
350 |
b3 = src.get() & 0xff; |
|
351 |
inputSize++; |
|
352 |
if (b3 == 'B'){ |
|
353 |
currentState = ASCII; |
|
354 |
} else if (b3 == 'J'){ |
|
355 |
currentState = JISX0201_1976; |
|
356 |
} else if (b3 == 'I'){ |
|
357 |
currentState = JISX0201_1976_KANA; |
|
358 |
} else { |
|
359 |
return CoderResult.malformedForLength(inputSize); |
|
360 |
} |
|
361 |
} else if (b2 == '$'){ |
|
362 |
b3 = src.get() & 0xff; |
|
363 |
inputSize++; |
|
364 |
if (b3 == '@'){ |
|
365 |
currentState = JISX0208_1978; |
|
366 |
} else if (b3 == 'B'){ |
|
367 |
currentState = JISX0208_1983; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
368 |
} else if (b3 == '(' && dec0212 != null) { |
2 | 369 |
if (!src.hasRemaining()) |
370 |
return CoderResult.UNDERFLOW; |
|
371 |
b4 = src.get() & 0xff; |
|
372 |
inputSize++; |
|
373 |
if (b4 == 'D') { |
|
374 |
currentState = JISX0212_1990; |
|
375 |
} else { |
|
376 |
return CoderResult.malformedForLength(inputSize); |
|
377 |
} |
|
378 |
} else { |
|
379 |
return CoderResult.malformedForLength(inputSize); |
|
380 |
} |
|
381 |
} else { |
|
382 |
return CoderResult.malformedForLength(inputSize); |
|
383 |
} |
|
384 |
} else if (b1 == SO) { |
|
385 |
previousState = currentState; |
|
386 |
currentState = SHIFTOUT; |
|
387 |
} else if (b1 == SI) { // shift back in |
|
388 |
currentState = previousState; |
|
389 |
} |
|
390 |
mark += inputSize; |
|
391 |
continue; |
|
392 |
} |
|
393 |
if (!dst.hasRemaining()) |
|
394 |
return CoderResult.OVERFLOW; |
|
395 |
||
396 |
switch (currentState){ |
|
397 |
case ASCII: |
|
398 |
dst.put((char)(b1 & 0xff)); |
|
399 |
break; |
|
400 |
case JISX0201_1976: |
|
401 |
switch (b1) { |
|
402 |
case 0x5c: // Yen/tilde substitution |
|
403 |
dst.put('\u00a5'); |
|
404 |
break; |
|
405 |
case 0x7e: |
|
406 |
dst.put('\u203e'); |
|
407 |
break; |
|
408 |
default: |
|
409 |
dst.put((char)b1); |
|
410 |
break; |
|
411 |
} |
|
412 |
break; |
|
413 |
case JISX0208_1978: |
|
414 |
case JISX0208_1983: |
|
415 |
if (!src.hasRemaining()) |
|
416 |
return CoderResult.UNDERFLOW; |
|
417 |
b2 = src.get() & 0xff; |
|
418 |
inputSize++; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
419 |
c = dec0208.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
420 |
if (c == UNMAPPABLE_DECODING) |
2 | 421 |
return CoderResult.unmappableForLength(inputSize); |
422 |
dst.put(c); |
|
423 |
break; |
|
424 |
case JISX0212_1990: |
|
425 |
if (!src.hasRemaining()) |
|
426 |
return CoderResult.UNDERFLOW; |
|
427 |
b2 = src.get() & 0xff; |
|
428 |
inputSize++; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
429 |
c = dec0212.decodeDouble(b1,b2); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
430 |
if (c == UNMAPPABLE_DECODING) |
2 | 431 |
return CoderResult.unmappableForLength(inputSize); |
432 |
dst.put(c); |
|
433 |
break; |
|
434 |
case JISX0201_1976_KANA: |
|
435 |
case SHIFTOUT: |
|
52378
fb71a4bc010d
8211382: ISO2022JP and GB18030 NIO converter issues
itakiguchi
parents:
47216
diff
changeset
|
436 |
if (b1 > 0x5f) { |
2 | 437 |
return CoderResult.malformedForLength(inputSize); |
438 |
} |
|
439 |
dst.put((char)(b1 + 0xff40)); |
|
440 |
break; |
|
441 |
} |
|
442 |
mark += inputSize; |
|
443 |
} |
|
444 |
return CoderResult.UNDERFLOW; |
|
445 |
} finally { |
|
446 |
src.position(mark); |
|
447 |
} |
|
448 |
} |
|
449 |
||
450 |
// Make some protected methods public for use by JISAutoDetect |
|
451 |
public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { |
|
452 |
if (src.hasArray() && dst.hasArray()) |
|
453 |
return decodeArrayLoop(src, dst); |
|
454 |
else |
|
455 |
return decodeBufferLoop(src, dst); |
|
456 |
} |
|
457 |
||
458 |
public CoderResult implFlush(CharBuffer out) { |
|
459 |
return super.implFlush(out); |
|
460 |
} |
|
461 |
} |
|
462 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
463 |
static class Encoder extends CharsetEncoder { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
464 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
465 |
final static DoubleByte.Encoder ENC0208 = |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
466 |
(DoubleByte.Encoder)new JIS_X_0208().newEncoder(); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
467 |
|
2 | 468 |
private static byte[] repl = { (byte)0x21, (byte)0x29 }; |
469 |
private int currentMode = ASCII; |
|
470 |
private int replaceMode = JISX0208_1983; |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
471 |
private DoubleByte.Encoder enc0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
472 |
private DoubleByte.Encoder enc0212; |
2 | 473 |
private boolean doSBKANA; |
474 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
475 |
private Encoder(Charset cs) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
476 |
this(cs, ENC0208, null, true); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
477 |
} |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
478 |
|
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
479 |
Encoder(Charset cs, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
480 |
DoubleByte.Encoder enc0208, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
481 |
DoubleByte.Encoder enc0212, |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
482 |
boolean doSBKANA) { |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
483 |
super(cs, 4.0f, (enc0212 != null)? 9.0f : 8.0f, repl); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
484 |
this.enc0208 = enc0208; |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
485 |
this.enc0212 = enc0212; |
2 | 486 |
this.doSBKANA = doSBKANA; |
487 |
} |
|
488 |
||
489 |
protected int encodeSingle(char inputChar) { |
|
490 |
return -1; |
|
491 |
} |
|
492 |
||
493 |
protected void implReset() { |
|
494 |
currentMode = ASCII; |
|
495 |
} |
|
496 |
||
497 |
protected void implReplaceWith(byte[] newReplacement) { |
|
498 |
/* It's almost impossible to decide which charset they belong |
|
499 |
to. The best thing we can do here is to "guess" based on |
|
500 |
the length of newReplacement. |
|
501 |
*/ |
|
502 |
if (newReplacement.length == 1) { |
|
503 |
replaceMode = ASCII; |
|
504 |
} else if (newReplacement.length == 2) { |
|
505 |
replaceMode = JISX0208_1983; |
|
506 |
} |
|
507 |
} |
|
508 |
||
509 |
protected CoderResult implFlush(ByteBuffer out) { |
|
510 |
if (currentMode != ASCII) { |
|
511 |
if (out.remaining() < 3) |
|
512 |
return CoderResult.OVERFLOW; |
|
513 |
out.put((byte)0x1b); |
|
514 |
out.put((byte)0x28); |
|
515 |
out.put((byte)0x42); |
|
516 |
currentMode = ASCII; |
|
517 |
} |
|
518 |
return CoderResult.UNDERFLOW; |
|
519 |
} |
|
520 |
||
521 |
public boolean canEncode(char c) { |
|
522 |
return ((c <= '\u007F') || |
|
523 |
(c >= 0xFF61 && c <= 0xFF9F) || |
|
524 |
(c == '\u00A5') || |
|
525 |
(c == '\u203E') || |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
526 |
enc0208.canEncode(c) || |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
527 |
(enc0212!=null && enc0212.canEncode(c))); |
2 | 528 |
} |
529 |
||
530 |
private final Surrogate.Parser sgp = new Surrogate.Parser(); |
|
531 |
||
532 |
private CoderResult encodeArrayLoop(CharBuffer src, |
|
533 |
ByteBuffer dst) |
|
534 |
{ |
|
535 |
char[] sa = src.array(); |
|
536 |
int sp = src.arrayOffset() + src.position(); |
|
537 |
int sl = src.arrayOffset() + src.limit(); |
|
538 |
assert (sp <= sl); |
|
539 |
sp = (sp <= sl ? sp : sl); |
|
540 |
byte[] da = dst.array(); |
|
541 |
int dp = dst.arrayOffset() + dst.position(); |
|
542 |
int dl = dst.arrayOffset() + dst.limit(); |
|
543 |
assert (dp <= dl); |
|
544 |
dp = (dp <= dl ? dp : dl); |
|
545 |
||
546 |
try { |
|
547 |
while (sp < sl) { |
|
548 |
char c = sa[sp]; |
|
549 |
if (c <= '\u007F') { |
|
550 |
if (currentMode != ASCII) { |
|
551 |
if (dl - dp < 3) |
|
552 |
return CoderResult.OVERFLOW; |
|
553 |
da[dp++] = (byte)0x1b; |
|
554 |
da[dp++] = (byte)0x28; |
|
555 |
da[dp++] = (byte)0x42; |
|
556 |
currentMode = ASCII; |
|
557 |
} |
|
558 |
if (dl - dp < 1) |
|
559 |
return CoderResult.OVERFLOW; |
|
560 |
da[dp++] = (byte)c; |
|
561 |
} else if (c >= 0xff61 && c <= 0xff9f && doSBKANA) { |
|
562 |
//a single byte kana |
|
563 |
if (currentMode != JISX0201_1976_KANA) { |
|
564 |
if (dl - dp < 3) |
|
565 |
return CoderResult.OVERFLOW; |
|
566 |
da[dp++] = (byte)0x1b; |
|
567 |
da[dp++] = (byte)0x28; |
|
568 |
da[dp++] = (byte)0x49; |
|
569 |
currentMode = JISX0201_1976_KANA; |
|
570 |
} |
|
571 |
if (dl - dp < 1) |
|
572 |
return CoderResult.OVERFLOW; |
|
573 |
da[dp++] = (byte)(c - 0xff40); |
|
574 |
} else if (c == '\u00A5' || c == '\u203E') { |
|
575 |
//backslash or tilde |
|
576 |
if (currentMode != JISX0201_1976) { |
|
577 |
if (dl - dp < 3) |
|
578 |
return CoderResult.OVERFLOW; |
|
579 |
da[dp++] = (byte)0x1b; |
|
580 |
da[dp++] = (byte)0x28; |
|
581 |
da[dp++] = (byte)0x4a; |
|
582 |
currentMode = JISX0201_1976; |
|
583 |
} |
|
584 |
if (dl - dp < 1) |
|
585 |
return CoderResult.OVERFLOW; |
|
586 |
da[dp++] = (c == '\u00A5')?(byte)0x5C:(byte)0x7e; |
|
587 |
} else { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
588 |
int index = enc0208.encodeChar(c); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
589 |
if (index != UNMAPPABLE_ENCODING) { |
2 | 590 |
if (currentMode != JISX0208_1983) { |
591 |
if (dl - dp < 3) |
|
592 |
return CoderResult.OVERFLOW; |
|
593 |
da[dp++] = (byte)0x1b; |
|
594 |
da[dp++] = (byte)0x24; |
|
595 |
da[dp++] = (byte)0x42; |
|
596 |
currentMode = JISX0208_1983; |
|
597 |
} |
|
598 |
if (dl - dp < 2) |
|
599 |
return CoderResult.OVERFLOW; |
|
600 |
da[dp++] = (byte)(index >> 8); |
|
601 |
da[dp++] = (byte)(index & 0xff); |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
602 |
} else if (enc0212 != null && |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
603 |
(index = enc0212.encodeChar(c)) != UNMAPPABLE_ENCODING) { |
2 | 604 |
if (currentMode != JISX0212_1990) { |
605 |
if (dl - dp < 4) |
|
606 |
return CoderResult.OVERFLOW; |
|
607 |
da[dp++] = (byte)0x1b; |
|
608 |
da[dp++] = (byte)0x24; |
|
609 |
da[dp++] = (byte)0x28; |
|
610 |
da[dp++] = (byte)0x44; |
|
611 |
currentMode = JISX0212_1990; |
|
612 |
} |
|
613 |
if (dl - dp < 2) |
|
614 |
return CoderResult.OVERFLOW; |
|
615 |
da[dp++] = (byte)(index >> 8); |
|
616 |
da[dp++] = (byte)(index & 0xff); |
|
617 |
} else { |
|
3714 | 618 |
if (Character.isSurrogate(c) && sgp.parse(c, sa, sp, sl) < 0) |
2 | 619 |
return sgp.error(); |
620 |
if (unmappableCharacterAction() |
|
621 |
== CodingErrorAction.REPLACE |
|
622 |
&& currentMode != replaceMode) { |
|
623 |
if (dl - dp < 3) |
|
624 |
return CoderResult.OVERFLOW; |
|
625 |
if (replaceMode == ASCII) { |
|
626 |
da[dp++] = (byte)0x1b; |
|
627 |
da[dp++] = (byte)0x28; |
|
628 |
da[dp++] = (byte)0x42; |
|
629 |
} else { |
|
630 |
da[dp++] = (byte)0x1b; |
|
631 |
da[dp++] = (byte)0x24; |
|
632 |
da[dp++] = (byte)0x42; |
|
633 |
} |
|
634 |
currentMode = replaceMode; |
|
635 |
} |
|
3714 | 636 |
if (Character.isSurrogate(c)) |
2 | 637 |
return sgp.unmappableResult(); |
638 |
return CoderResult.unmappableForLength(1); |
|
639 |
} |
|
640 |
} |
|
641 |
sp++; |
|
642 |
} |
|
643 |
return CoderResult.UNDERFLOW; |
|
644 |
} finally { |
|
645 |
src.position(sp - src.arrayOffset()); |
|
646 |
dst.position(dp - dst.arrayOffset()); |
|
647 |
} |
|
648 |
} |
|
649 |
||
650 |
private CoderResult encodeBufferLoop(CharBuffer src, |
|
651 |
ByteBuffer dst) |
|
652 |
{ |
|
653 |
int mark = src.position(); |
|
654 |
try { |
|
655 |
while (src.hasRemaining()) { |
|
656 |
char c = src.get(); |
|
657 |
||
658 |
if (c <= '\u007F') { |
|
659 |
if (currentMode != ASCII) { |
|
660 |
if (dst.remaining() < 3) |
|
661 |
return CoderResult.OVERFLOW; |
|
662 |
dst.put((byte)0x1b); |
|
663 |
dst.put((byte)0x28); |
|
664 |
dst.put((byte)0x42); |
|
665 |
currentMode = ASCII; |
|
666 |
} |
|
667 |
if (dst.remaining() < 1) |
|
668 |
return CoderResult.OVERFLOW; |
|
669 |
dst.put((byte)c); |
|
670 |
} else if (c >= 0xff61 && c <= 0xff9f && doSBKANA) { |
|
671 |
//Is it a single byte kana? |
|
672 |
if (currentMode != JISX0201_1976_KANA) { |
|
673 |
if (dst.remaining() < 3) |
|
674 |
return CoderResult.OVERFLOW; |
|
675 |
dst.put((byte)0x1b); |
|
676 |
dst.put((byte)0x28); |
|
677 |
dst.put((byte)0x49); |
|
678 |
currentMode = JISX0201_1976_KANA; |
|
679 |
} |
|
680 |
if (dst.remaining() < 1) |
|
681 |
return CoderResult.OVERFLOW; |
|
682 |
dst.put((byte)(c - 0xff40)); |
|
683 |
} else if (c == '\u00a5' || c == '\u203E') { |
|
684 |
if (currentMode != JISX0201_1976) { |
|
685 |
if (dst.remaining() < 3) |
|
686 |
return CoderResult.OVERFLOW; |
|
687 |
dst.put((byte)0x1b); |
|
688 |
dst.put((byte)0x28); |
|
689 |
dst.put((byte)0x4a); |
|
690 |
currentMode = JISX0201_1976; |
|
691 |
} |
|
692 |
if (dst.remaining() < 1) |
|
693 |
return CoderResult.OVERFLOW; |
|
694 |
dst.put((c == '\u00A5')?(byte)0x5C:(byte)0x7e); |
|
695 |
} else { |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
696 |
int index = enc0208.encodeChar(c); |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
697 |
if (index != UNMAPPABLE_ENCODING) { |
2 | 698 |
if (currentMode != JISX0208_1983) { |
699 |
if (dst.remaining() < 3) |
|
700 |
return CoderResult.OVERFLOW; |
|
701 |
dst.put((byte)0x1b); |
|
702 |
dst.put((byte)0x24); |
|
703 |
dst.put((byte)0x42); |
|
704 |
currentMode = JISX0208_1983; |
|
705 |
} |
|
706 |
if (dst.remaining() < 2) |
|
707 |
return CoderResult.OVERFLOW; |
|
708 |
dst.put((byte)(index >> 8)); |
|
709 |
dst.put((byte)(index & 0xff)); |
|
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
710 |
} else if (enc0212 != null && |
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
711 |
(index = enc0212.encodeChar(c)) != UNMAPPABLE_ENCODING) { |
2 | 712 |
if (currentMode != JISX0212_1990) { |
713 |
if (dst.remaining() < 4) |
|
714 |
return CoderResult.OVERFLOW; |
|
715 |
dst.put((byte)0x1b); |
|
716 |
dst.put((byte)0x24); |
|
717 |
dst.put((byte)0x28); |
|
718 |
dst.put((byte)0x44); |
|
719 |
currentMode = JISX0212_1990; |
|
720 |
} |
|
721 |
if (dst.remaining() < 2) |
|
722 |
return CoderResult.OVERFLOW; |
|
723 |
dst.put((byte)(index >> 8)); |
|
724 |
dst.put((byte)(index & 0xff)); |
|
725 |
} else { |
|
3714 | 726 |
if (Character.isSurrogate(c) && sgp.parse(c, src) < 0) |
2 | 727 |
return sgp.error(); |
728 |
if (unmappableCharacterAction() == CodingErrorAction.REPLACE |
|
729 |
&& currentMode != replaceMode) { |
|
730 |
if (dst.remaining() < 3) |
|
731 |
return CoderResult.OVERFLOW; |
|
732 |
if (replaceMode == ASCII) { |
|
733 |
dst.put((byte)0x1b); |
|
734 |
dst.put((byte)0x28); |
|
735 |
dst.put((byte)0x42); |
|
736 |
} else { |
|
737 |
dst.put((byte)0x1b); |
|
738 |
dst.put((byte)0x24); |
|
739 |
dst.put((byte)0x42); |
|
740 |
} |
|
741 |
currentMode = replaceMode; |
|
742 |
} |
|
3714 | 743 |
if (Character.isSurrogate(c)) |
2 | 744 |
return sgp.unmappableResult(); |
745 |
return CoderResult.unmappableForLength(1); |
|
746 |
} |
|
747 |
} |
|
748 |
mark++; |
|
749 |
} |
|
750 |
return CoderResult.UNDERFLOW; |
|
751 |
} finally { |
|
752 |
src.position(mark); |
|
753 |
} |
|
754 |
} |
|
755 |
||
756 |
protected CoderResult encodeLoop(CharBuffer src, |
|
757 |
ByteBuffer dst) |
|
758 |
{ |
|
759 |
if (src.hasArray() && dst.hasArray()) |
|
760 |
return encodeArrayLoop(src, dst); |
|
761 |
else |
|
762 |
return encodeBufferLoop(src, dst); |
|
763 |
} |
|
764 |
} |
|
765 |
} |