1 /* |
|
2 * Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 /* |
|
27 */ |
|
28 |
|
29 package sun.io; |
|
30 |
|
31 import sun.nio.cs.ext.GB18030; |
|
32 |
|
33 public class ByteToCharGB18030 extends ByteToCharGB18030DB { |
|
34 |
|
35 private static final int GB18030_SINGLE_BYTE = 1; |
|
36 private static final int GB18030_DOUBLE_BYTE = 2; |
|
37 private static final int GB18030_FOUR_BYTE = 3; |
|
38 private static short[] decoderIndex1; |
|
39 private static String[] decoderIndex2; |
|
40 |
|
41 private int currentState; |
|
42 private int savedSize; |
|
43 private byte[] savedBytes; |
|
44 |
|
45 public ByteToCharGB18030() { |
|
46 super(); |
|
47 GB18030 nioCoder = new GB18030(); |
|
48 savedBytes = new byte[3]; |
|
49 currentState = GB18030_DOUBLE_BYTE; |
|
50 decoderIndex1 = nioCoder.getDecoderIndex1(); |
|
51 decoderIndex2 = nioCoder.getDecoderIndex2(); |
|
52 savedSize = 0; |
|
53 } |
|
54 |
|
55 public short[] getOuter() { |
|
56 return(index1); |
|
57 } |
|
58 |
|
59 public String[] getInner() { |
|
60 return(index2); |
|
61 } |
|
62 |
|
63 public short[] getDBIndex1() { |
|
64 return(super.index1); |
|
65 } |
|
66 |
|
67 public String[] getDBIndex2() { |
|
68 return(super.index2); |
|
69 } |
|
70 |
|
71 public int flush(char [] output, int outStart, int outEnd) |
|
72 throws MalformedInputException |
|
73 { |
|
74 if (savedSize != 0) { |
|
75 savedSize = 0; |
|
76 currentState = GB18030_DOUBLE_BYTE; |
|
77 badInputLength = 0; |
|
78 throw new MalformedInputException(); |
|
79 } |
|
80 byteOff = charOff = 0; |
|
81 return 0; |
|
82 } |
|
83 |
|
84 |
|
85 /** |
|
86 * Character conversion |
|
87 */ |
|
88 public int convert(byte[] input, int inOff, int inEnd, |
|
89 char[] output, int outOff, int outEnd) |
|
90 throws UnknownCharacterException, MalformedInputException, |
|
91 ConversionBufferFullException |
|
92 { |
|
93 int inputSize = 0; |
|
94 char outputChar = '\uFFFD'; |
|
95 // readOff keeps the actual buffer's pointer. |
|
96 // byteOff keeps original buffer's pointer. |
|
97 int readOff = byteOff = inOff; |
|
98 |
|
99 if (savedSize != 0) { |
|
100 // Filter illegal bytes when they are detected in saved |
|
101 // partial input from a previous conversion attempt. |
|
102 if (((savedBytes[0] & 0xFF) < 0x81 || savedBytes[0] > 0xFE) || |
|
103 (savedSize > 1 && |
|
104 (savedBytes[1] & 0xFF) < 0x30 ) || |
|
105 (savedSize > 2 && |
|
106 ((savedBytes[2] & 0xFF) < 0x81 || |
|
107 (savedBytes[2] & 0xFF) > 0xFE ))) { |
|
108 badInputLength = 0; |
|
109 throw new MalformedInputException(); |
|
110 } |
|
111 |
|
112 byte[] newBuf = new byte[inEnd - inOff + savedSize]; |
|
113 for (int i = 0; i < savedSize; i++) { |
|
114 newBuf[i] = savedBytes[i]; |
|
115 } |
|
116 System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff); |
|
117 byteOff -= savedSize; |
|
118 input = newBuf; |
|
119 inOff = 0; |
|
120 inEnd = newBuf.length; |
|
121 savedSize = 0; |
|
122 } |
|
123 |
|
124 charOff = outOff; |
|
125 readOff = inOff; |
|
126 |
|
127 while(readOff < inEnd) { |
|
128 int byte1 = 0 , byte2 = 0, byte3 = 0, byte4 = 0; |
|
129 |
|
130 // Is there room in the output buffer for the result? |
|
131 if (charOff >= outEnd) { |
|
132 throw new ConversionBufferFullException(); |
|
133 } |
|
134 |
|
135 // Get the input byte |
|
136 byte1 = input[readOff++] & 0xFF; |
|
137 inputSize = 1; |
|
138 |
|
139 if ((byte1 & (byte)0x80) == 0){ // US-ASCII range |
|
140 outputChar = (char)byte1; |
|
141 currentState = GB18030_SINGLE_BYTE; |
|
142 } |
|
143 |
|
144 else if (byte1 < 0x81 || byte1 > 0xfe) { |
|
145 if (subMode) |
|
146 outputChar = subChars[0]; |
|
147 else { |
|
148 badInputLength = 1; |
|
149 throw new UnknownCharacterException(); |
|
150 } |
|
151 } |
|
152 else { |
|
153 // Either 2 or 4 byte sequence follows |
|
154 // If an underrun is detected save for later |
|
155 // replay. |
|
156 |
|
157 if (readOff + inputSize > inEnd) { |
|
158 savedBytes[0]=(byte)byte1; |
|
159 savedSize = 1; |
|
160 break; |
|
161 } |
|
162 |
|
163 byte2 = input[readOff++] & 0xFF; |
|
164 inputSize = 2; |
|
165 |
|
166 if (byte2 < 0x30) { |
|
167 badInputLength = 1; |
|
168 throw new MalformedInputException(); |
|
169 } |
|
170 else if (byte2 >= 0x30 && byte2 <= 0x39) { |
|
171 currentState = GB18030_FOUR_BYTE; |
|
172 inputSize = 4; |
|
173 |
|
174 if (readOff + 2 > inEnd) { |
|
175 if (readOff + 1 > inEnd) { |
|
176 savedBytes[0] = (byte)byte1; |
|
177 savedBytes[1] = (byte)byte2; |
|
178 savedSize = 2; |
|
179 } |
|
180 else { |
|
181 savedBytes[0] = (byte)byte1; |
|
182 savedBytes[1] = (byte)byte2; |
|
183 savedBytes[2] = input[readOff++]; |
|
184 savedSize = 3; |
|
185 } |
|
186 break; |
|
187 } |
|
188 byte3 = input[readOff++] & 0xFF; |
|
189 if (byte3 < 0x81 || byte3 > 0xfe) { |
|
190 badInputLength = 3; |
|
191 throw new MalformedInputException(); |
|
192 } |
|
193 |
|
194 byte4 = input[readOff++] & 0xFF; |
|
195 |
|
196 if (byte4 < 0x30 || byte4 > 0x39) { |
|
197 badInputLength = 4; |
|
198 throw new MalformedInputException(); |
|
199 } |
|
200 } |
|
201 else if (byte2 == 0x7f || byte2 == 0xff || |
|
202 (byte2 < 0x40 )) { |
|
203 badInputLength = 2; |
|
204 throw new MalformedInputException(); |
|
205 } |
|
206 else |
|
207 currentState = GB18030_DOUBLE_BYTE; |
|
208 } |
|
209 |
|
210 switch (currentState){ |
|
211 case GB18030_SINGLE_BYTE: |
|
212 output[charOff++] = (char)(byte1); |
|
213 break; |
|
214 case GB18030_DOUBLE_BYTE: |
|
215 output[charOff++] = super.getUnicode(byte1, byte2); |
|
216 break; |
|
217 case GB18030_FOUR_BYTE: |
|
218 int offset = (((byte1 - 0x81) * 10 + |
|
219 (byte2 - 0x30)) * 126 + |
|
220 byte3 - 0x81) * 10 + byte4 - 0x30; |
|
221 int hiByte = (offset >>8) & 0xFF; |
|
222 int lowByte = (offset & 0xFF); |
|
223 |
|
224 // Mixture of table lookups and algorithmic calculation |
|
225 // of character values. |
|
226 |
|
227 // BMP Ranges |
|
228 |
|
229 if (offset <= 0x4A62) |
|
230 output[charOff++] = getChar(offset); |
|
231 else if (offset > 0x4A62 && offset <= 0x82BC) |
|
232 output[charOff++] = (char) (offset + 0x5543); |
|
233 else if (offset >= 0x82BD && offset <= 0x830D) |
|
234 output[charOff++] = getChar(offset); |
|
235 else if (offset >= 0x830D && offset <= 0x93A8) |
|
236 output[charOff++] = (char) (offset + 0x6557); |
|
237 else if (offset >= 0x93A9 && offset <= 0x99FB) |
|
238 output[charOff++] = getChar(offset); |
|
239 // Supplemental UCS planes handled via surrogates |
|
240 else if (offset >= 0x2E248 && offset < 0x12E248) { |
|
241 if (offset >= 0x12E248) { |
|
242 if (subMode) |
|
243 return subChars[0]; |
|
244 else { |
|
245 badInputLength = 4; |
|
246 throw new UnknownCharacterException(); |
|
247 } |
|
248 } |
|
249 |
|
250 if (charOff +2 > outEnd) { |
|
251 throw new ConversionBufferFullException(); |
|
252 } |
|
253 offset -= 0x1e248; |
|
254 char highSurr = (char) ((offset - 0x10000) / 0x400 + 0xD800); |
|
255 char lowSurr = (char) ((offset - 0x10000) % 0x400 + 0xDC00); |
|
256 output[charOff++] = highSurr; |
|
257 output[charOff++] = lowSurr; |
|
258 } |
|
259 else { |
|
260 badInputLength = 4; |
|
261 throw new MalformedInputException(); |
|
262 } |
|
263 break; |
|
264 } |
|
265 byteOff += inputSize; |
|
266 } |
|
267 |
|
268 byteOff += savedSize; |
|
269 return charOff - outOff; |
|
270 } |
|
271 |
|
272 public void reset() { |
|
273 byteOff = charOff = 0; |
|
274 currentState = GB18030_DOUBLE_BYTE; |
|
275 savedSize = 0; |
|
276 } |
|
277 |
|
278 public String getCharacterEncoding() { |
|
279 return "GB18030"; |
|
280 } |
|
281 |
|
282 private char getChar(int offset) throws UnknownCharacterException { |
|
283 int byte1 = (offset >>8) & 0xFF; |
|
284 int byte2 = (offset & 0xFF); |
|
285 int start = 0, end = 0xFF; |
|
286 |
|
287 if (((byte1 < 0) || (byte1 > getOuter().length)) |
|
288 || ((byte2 < start) || (byte2 > end))) { |
|
289 if (subMode) |
|
290 return subChars[0]; |
|
291 else { |
|
292 badInputLength = 1; |
|
293 throw new UnknownCharacterException(); |
|
294 } |
|
295 } |
|
296 |
|
297 int n = (decoderIndex1[byte1] & 0xf) * (end - start + 1) + (byte2 - start); |
|
298 return decoderIndex2[decoderIndex1[byte1] >> 4].charAt(n); |
|
299 } |
|
300 } |
|