author | mgronlun |
Mon, 09 Sep 2019 13:26:35 +0200 | |
branch | JEP-349-branch |
changeset 58049 | 10ecdb5d3574 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
5506 | 2 |
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
/* |
|
27 |
*/ |
|
28 |
||
29 |
package sun.nio.cs.ext; |
|
30 |
||
31 |
import java.nio.ByteBuffer; |
|
32 |
import java.nio.CharBuffer; |
|
33 |
import java.nio.charset.Charset; |
|
34 |
import java.nio.charset.CharsetDecoder; |
|
35 |
import java.nio.charset.CharsetEncoder; |
|
36 |
import java.nio.charset.CoderResult; |
|
37 |
import java.nio.charset.CharacterCodingException; |
|
28969
f980bee32887
8073152: Update Standard/ExtendedCharsets to work with module system
sherman
parents:
25859
diff
changeset
|
38 |
import sun.nio.cs.DoubleByte; |
2 | 39 |
import sun.nio.cs.HistoricallyNamedCharset; |
40 |
import sun.nio.cs.US_ASCII; |
|
28969
f980bee32887
8073152: Update Standard/ExtendedCharsets to work with module system
sherman
parents:
25859
diff
changeset
|
41 |
import sun.nio.cs.*; |
2 | 42 |
|
43 |
public class ISO2022_CN |
|
44 |
extends Charset |
|
45 |
implements HistoricallyNamedCharset |
|
46 |
{ |
|
47 |
private static final byte ISO_ESC = 0x1b; |
|
48 |
private static final byte ISO_SI = 0x0f; |
|
49 |
private static final byte ISO_SO = 0x0e; |
|
50 |
private static final byte ISO_SS2_7 = 0x4e; |
|
51 |
private static final byte ISO_SS3_7 = 0x4f; |
|
52 |
private static final byte MSB = (byte)0x80; |
|
53 |
private static final char REPLACE_CHAR = '\uFFFD'; |
|
54 |
||
55 |
private static final byte SODesigGB = 0; |
|
56 |
private static final byte SODesigCNS = 1; |
|
57 |
||
58 |
public ISO2022_CN() { |
|
59 |
super("ISO-2022-CN", ExtendedCharsets.aliasesFor("ISO-2022-CN")); |
|
60 |
} |
|
61 |
||
62 |
public String historicalName() { |
|
63 |
return "ISO2022CN"; |
|
64 |
} |
|
65 |
||
66 |
public boolean contains(Charset cs) { |
|
67 |
return ((cs instanceof EUC_CN) // GB2312-80 repertoire |
|
68 |
|| (cs instanceof US_ASCII) |
|
69 |
|| (cs instanceof EUC_TW) // CNS11643 repertoire |
|
70 |
|| (cs instanceof ISO2022_CN)); |
|
71 |
} |
|
72 |
||
73 |
public CharsetDecoder newDecoder() { |
|
74 |
return new Decoder(this); |
|
75 |
} |
|
76 |
||
77 |
public CharsetEncoder newEncoder() { |
|
78 |
throw new UnsupportedOperationException(); |
|
79 |
} |
|
80 |
||
81 |
public boolean canEncode() { |
|
82 |
return false; |
|
83 |
} |
|
84 |
||
85 |
static class Decoder extends CharsetDecoder { |
|
86 |
private boolean shiftOut; |
|
87 |
private byte currentSODesig; |
|
88 |
||
89 |
private static final Charset gb2312 = new EUC_CN(); |
|
90 |
private static final Charset cns = new EUC_TW(); |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2913
diff
changeset
|
91 |
private final DoubleByte.Decoder gb2312Decoder; |
2 | 92 |
private final EUC_TW.Decoder cnsDecoder; |
93 |
||
94 |
Decoder(Charset cs) { |
|
95 |
super(cs, 1.0f, 1.0f); |
|
96 |
shiftOut = false; |
|
97 |
currentSODesig = SODesigGB; |
|
3052
5c9886498f31
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding
sherman
parents:
2913
diff
changeset
|
98 |
gb2312Decoder = (DoubleByte.Decoder)gb2312.newDecoder(); |
2 | 99 |
cnsDecoder = (EUC_TW.Decoder)cns.newDecoder(); |
100 |
} |
|
101 |
||
102 |
protected void implReset() { |
|
103 |
shiftOut= false; |
|
104 |
currentSODesig = SODesigGB; |
|
105 |
} |
|
106 |
||
107 |
private char cnsDecode(byte byte1, byte byte2, byte SS) { |
|
108 |
byte1 |= MSB; |
|
109 |
byte2 |= MSB; |
|
2913
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
110 |
int p = 0; |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
111 |
if (SS == ISO_SS2_7) |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
112 |
p = 1; //plane 2, index -- 1 |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
113 |
else if (SS == ISO_SS3_7) |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
114 |
p = 2; //plane 3, index -- 2 |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
115 |
else |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
116 |
return REPLACE_CHAR; //never happen. |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
117 |
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff, |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
118 |
byte2 & 0xff, |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
119 |
p); |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
120 |
if (ret == null || ret.length == 2) |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
121 |
return REPLACE_CHAR; |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
122 |
return ret[0]; |
2 | 123 |
} |
124 |
||
125 |
private char SODecode(byte byte1, byte byte2, byte SOD) { |
|
126 |
byte1 |= MSB; |
|
127 |
byte2 |= MSB; |
|
128 |
if (SOD == SODesigGB) { |
|
129 |
return gb2312Decoder.decodeDouble(byte1 & 0xff, |
|
130 |
byte2 & 0xff); |
|
131 |
} else { // SOD == SODesigCNS |
|
2913
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
132 |
char[] ret = cnsDecoder.toUnicode(byte1 & 0xff, |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
133 |
byte2 & 0xff, |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
134 |
0); |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
135 |
if (ret == null) |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
136 |
return REPLACE_CHAR; |
39a9cc073b84
6831794: charset EUC_TW is 12.6% of the total size of charsets.jar
sherman
parents:
2
diff
changeset
|
137 |
return ret[0]; |
2 | 138 |
} |
139 |
} |
|
140 |
||
141 |
private CoderResult decodeBufferLoop(ByteBuffer src, |
|
142 |
CharBuffer dst) |
|
143 |
{ |
|
144 |
int mark = src.position(); |
|
145 |
byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
|
146 |
int inputSize = 0; |
|
147 |
char c = REPLACE_CHAR; |
|
148 |
try { |
|
149 |
while (src.hasRemaining()) { |
|
150 |
b1 = src.get(); |
|
151 |
inputSize = 1; |
|
152 |
||
153 |
while (b1 == ISO_ESC || |
|
154 |
b1 == ISO_SO || |
|
155 |
b1 == ISO_SI) { |
|
156 |
if (b1 == ISO_ESC) { // ESC |
|
157 |
currentSODesig = SODesigGB; |
|
158 |
||
159 |
if (src.remaining() < 1) |
|
160 |
return CoderResult.UNDERFLOW; |
|
161 |
||
162 |
b2 = src.get(); |
|
163 |
inputSize++; |
|
164 |
||
165 |
if ((b2 & (byte)0x80) != 0) |
|
166 |
return CoderResult.malformedForLength(inputSize); |
|
167 |
||
168 |
if (b2 == (byte)0x24) { |
|
169 |
if (src.remaining() < 1) |
|
170 |
return CoderResult.UNDERFLOW; |
|
171 |
||
172 |
b3 = src.get(); |
|
173 |
inputSize++; |
|
174 |
||
175 |
if ((b3 & (byte)0x80) != 0) |
|
176 |
return CoderResult.malformedForLength(inputSize); |
|
177 |
if (b3 == 'A'){ // "$A" |
|
178 |
currentSODesig = SODesigGB; |
|
179 |
} else if (b3 == ')') { |
|
180 |
if (src.remaining() < 1) |
|
181 |
return CoderResult.UNDERFLOW; |
|
182 |
b4 = src.get(); |
|
183 |
inputSize++; |
|
184 |
if (b4 == 'A'){ // "$)A" |
|
185 |
currentSODesig = SODesigGB; |
|
186 |
} else if (b4 == 'G'){ // "$)G" |
|
187 |
currentSODesig = SODesigCNS; |
|
188 |
} else { |
|
189 |
return CoderResult.malformedForLength(inputSize); |
|
190 |
} |
|
191 |
} else if (b3 == '*') { |
|
192 |
if (src.remaining() < 1) |
|
193 |
return CoderResult.UNDERFLOW; |
|
194 |
b4 = src.get(); |
|
195 |
inputSize++; |
|
196 |
if (b4 != 'H') { // "$*H" |
|
197 |
//SS2Desig -> CNS-P1 |
|
198 |
return CoderResult.malformedForLength(inputSize); |
|
199 |
} |
|
200 |
} else if (b3 == '+') { |
|
201 |
if (src.remaining() < 1) |
|
202 |
return CoderResult.UNDERFLOW; |
|
203 |
b4 = src.get(); |
|
204 |
inputSize++; |
|
205 |
if (b4 != 'I'){ // "$+I" |
|
206 |
//SS3Desig -> CNS-P2. |
|
207 |
return CoderResult.malformedForLength(inputSize); |
|
208 |
} |
|
209 |
} else { |
|
210 |
return CoderResult.malformedForLength(inputSize); |
|
211 |
} |
|
212 |
} else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { |
|
213 |
if (src.remaining() < 2) |
|
214 |
return CoderResult.UNDERFLOW; |
|
215 |
b3 = src.get(); |
|
216 |
b4 = src.get(); |
|
217 |
inputSize += 2; |
|
218 |
if (dst.remaining() < 1) |
|
219 |
return CoderResult.OVERFLOW; |
|
220 |
//SS2->CNS-P2, SS3->CNS-P3 |
|
221 |
c = cnsDecode(b3, b4, b2); |
|
222 |
if (c == REPLACE_CHAR) |
|
223 |
return CoderResult.unmappableForLength(inputSize); |
|
224 |
dst.put(c); |
|
225 |
} else { |
|
226 |
return CoderResult.malformedForLength(inputSize); |
|
227 |
} |
|
228 |
} else if (b1 == ISO_SO) { |
|
229 |
shiftOut = true; |
|
230 |
} else if (b1 == ISO_SI) { // shift back in |
|
231 |
shiftOut = false; |
|
232 |
} |
|
233 |
mark += inputSize; |
|
234 |
if (src.remaining() < 1) |
|
235 |
return CoderResult.UNDERFLOW; |
|
236 |
b1 = src.get(); |
|
237 |
inputSize = 1; |
|
238 |
} |
|
239 |
||
240 |
if (dst.remaining() < 1) |
|
241 |
return CoderResult.OVERFLOW; |
|
242 |
||
243 |
if (!shiftOut) { |
|
244 |
dst.put((char)(b1 & 0xff)); //clear the upper byte |
|
245 |
mark += inputSize; |
|
246 |
} else { |
|
247 |
if (src.remaining() < 1) |
|
248 |
return CoderResult.UNDERFLOW; |
|
249 |
b2 = src.get(); |
|
250 |
inputSize++; |
|
251 |
c = SODecode(b1, b2, currentSODesig); |
|
252 |
if (c == REPLACE_CHAR) |
|
253 |
return CoderResult.unmappableForLength(inputSize); |
|
254 |
dst.put(c); |
|
255 |
mark += inputSize; |
|
256 |
} |
|
257 |
} |
|
258 |
return CoderResult.UNDERFLOW; |
|
259 |
} finally { |
|
260 |
src.position(mark); |
|
261 |
} |
|
262 |
} |
|
263 |
||
264 |
private CoderResult decodeArrayLoop(ByteBuffer src, |
|
265 |
CharBuffer dst) |
|
266 |
{ |
|
267 |
int inputSize = 0; |
|
268 |
byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; |
|
269 |
char c = REPLACE_CHAR; |
|
270 |
||
271 |
byte[] sa = src.array(); |
|
272 |
int sp = src.arrayOffset() + src.position(); |
|
273 |
int sl = src.arrayOffset() + src.limit(); |
|
274 |
assert (sp <= sl); |
|
275 |
sp = (sp <= sl ? sp : sl); |
|
276 |
||
277 |
char[] da = dst.array(); |
|
278 |
int dp = dst.arrayOffset() + dst.position(); |
|
279 |
int dl = dst.arrayOffset() + dst.limit(); |
|
280 |
assert (dp <= dl); |
|
281 |
dp = (dp <= dl ? dp : dl); |
|
282 |
||
283 |
try { |
|
284 |
while (sp < sl) { |
|
285 |
b1 = sa[sp]; |
|
286 |
inputSize = 1; |
|
287 |
||
288 |
while (b1 == ISO_ESC || b1 == ISO_SO || b1 == ISO_SI) { |
|
289 |
if (b1 == ISO_ESC) { // ESC |
|
290 |
currentSODesig = SODesigGB; |
|
291 |
||
292 |
if (sp + 2 > sl) |
|
293 |
return CoderResult.UNDERFLOW; |
|
294 |
||
295 |
b2 = sa[sp + 1]; |
|
296 |
inputSize++; |
|
297 |
||
298 |
if ((b2 & (byte)0x80) != 0) |
|
299 |
return CoderResult.malformedForLength(inputSize); |
|
300 |
if (b2 == (byte)0x24) { |
|
301 |
if (sp + 3 > sl) |
|
302 |
return CoderResult.UNDERFLOW; |
|
303 |
||
304 |
b3 = sa[sp + 2]; |
|
305 |
inputSize++; |
|
306 |
||
307 |
if ((b3 & (byte)0x80) != 0) |
|
308 |
return CoderResult.malformedForLength(inputSize); |
|
309 |
if (b3 == 'A'){ // "$A" |
|
310 |
/* <ESC>$A is not a legal designator sequence for |
|
311 |
ISO2022_CN, it is listed as an escape sequence |
|
312 |
for GB2312 in ISO2022-JP-2. Keep it here just for |
|
313 |
the sake of "compatibility". |
|
314 |
*/ |
|
315 |
currentSODesig = SODesigGB; |
|
316 |
} else if (b3 == ')') { |
|
317 |
if (sp + 4 > sl) |
|
318 |
return CoderResult.UNDERFLOW; |
|
319 |
b4 = sa[sp + 3]; |
|
320 |
inputSize++; |
|
321 |
||
322 |
if (b4 == 'A'){ // "$)A" |
|
323 |
currentSODesig = SODesigGB; |
|
324 |
} else if (b4 == 'G'){ // "$)G" |
|
325 |
currentSODesig = SODesigCNS; |
|
326 |
} else { |
|
327 |
return CoderResult.malformedForLength(inputSize); |
|
328 |
} |
|
329 |
} else if (b3 == '*') { |
|
330 |
if (sp + 4 > sl) |
|
331 |
return CoderResult.UNDERFLOW; |
|
332 |
b4 = sa[sp + 3]; |
|
333 |
inputSize++; |
|
334 |
if (b4 != 'H'){ // "$*H" |
|
335 |
return CoderResult.malformedForLength(inputSize); |
|
336 |
} |
|
337 |
} else if (b3 == '+') { |
|
338 |
if (sp + 4 > sl) |
|
339 |
return CoderResult.UNDERFLOW; |
|
340 |
b4 = sa[sp + 3]; |
|
341 |
inputSize++; |
|
342 |
if (b4 != 'I'){ // "$+I" |
|
343 |
return CoderResult.malformedForLength(inputSize); |
|
344 |
} |
|
345 |
} else { |
|
346 |
return CoderResult.malformedForLength(inputSize); |
|
347 |
} |
|
348 |
} else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { |
|
349 |
if (sp + 4 > sl) { |
|
350 |
return CoderResult.UNDERFLOW; |
|
351 |
} |
|
352 |
b3 = sa[sp + 2]; |
|
353 |
b4 = sa[sp + 3]; |
|
354 |
if (dl - dp < 1) { |
|
355 |
return CoderResult.OVERFLOW; |
|
356 |
} |
|
357 |
inputSize += 2; |
|
358 |
c = cnsDecode(b3, b4, b2); |
|
359 |
if (c == REPLACE_CHAR) |
|
360 |
return CoderResult.unmappableForLength(inputSize); |
|
361 |
da[dp++] = c; |
|
362 |
} else { |
|
363 |
return CoderResult.malformedForLength(inputSize); |
|
364 |
} |
|
365 |
} else if (b1 == ISO_SO) { |
|
366 |
shiftOut = true; |
|
367 |
} else if (b1 == ISO_SI) { // shift back in |
|
368 |
shiftOut = false; |
|
369 |
} |
|
370 |
sp += inputSize; |
|
371 |
if (sp + 1 > sl) |
|
372 |
return CoderResult.UNDERFLOW; |
|
373 |
b1 = sa[sp]; |
|
374 |
inputSize = 1; |
|
375 |
} |
|
376 |
||
377 |
if (dl - dp < 1) { |
|
378 |
return CoderResult.OVERFLOW; |
|
379 |
} |
|
380 |
||
381 |
if (!shiftOut) { |
|
382 |
da[dp++] = (char)(b1 & 0xff); //clear the upper byte |
|
383 |
} else { |
|
384 |
if (sp + 2 > sl) |
|
385 |
return CoderResult.UNDERFLOW; |
|
386 |
b2 = sa[sp + 1]; |
|
387 |
inputSize++; |
|
388 |
c = SODecode(b1, b2, currentSODesig); |
|
389 |
if (c == REPLACE_CHAR) |
|
390 |
return CoderResult.unmappableForLength(inputSize); |
|
391 |
da[dp++] = c; |
|
392 |
} |
|
393 |
sp += inputSize; |
|
394 |
} |
|
395 |
return CoderResult.UNDERFLOW; |
|
396 |
} finally { |
|
397 |
src.position(sp - src.arrayOffset()); |
|
398 |
dst.position(dp - dst.arrayOffset()); |
|
399 |
} |
|
400 |
} |
|
401 |
||
402 |
protected CoderResult decodeLoop(ByteBuffer src, |
|
403 |
CharBuffer dst) |
|
404 |
{ |
|
405 |
if (src.hasArray() && dst.hasArray()) |
|
406 |
return decodeArrayLoop(src, dst); |
|
407 |
else |
|
408 |
return decodeBufferLoop(src, dst); |
|
409 |
} |
|
410 |
} |
|
411 |
} |