1 /* |
|
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 package sun.io; |
|
26 |
|
27 import java.io.UnsupportedEncodingException; |
|
28 import sun.nio.cs.ext.JISAutoDetect; |
|
29 |
|
30 public class ByteToCharJISAutoDetect extends ByteToCharConverter { |
|
31 |
|
32 private final static int EUCJP_MASK = 0x01; |
|
33 private final static int SJIS2B_MASK = 0x02; |
|
34 private final static int SJIS1B_MASK = 0x04; |
|
35 private final static int EUCJP_KANA1_MASK = 0x08; |
|
36 private final static int EUCJP_KANA2_MASK = 0x10; |
|
37 private final static byte[] maskTable1 = JISAutoDetect.getByteMask1(); |
|
38 private final static byte[] maskTable2 = JISAutoDetect.getByteMask2(); |
|
39 |
|
40 private final static int SS2 = 0x8e; |
|
41 private final static int SS3 = 0x8f; |
|
42 |
|
43 // SJISName is set to either "SJIS" or "MS932" |
|
44 private String SJISName; |
|
45 private String EUCJPName; |
|
46 |
|
47 private String convName = null; |
|
48 private ByteToCharConverter detectedConv = null; |
|
49 private ByteToCharConverter defaultConv = null; |
|
50 |
|
51 public ByteToCharJISAutoDetect() { |
|
52 super(); |
|
53 SJISName = CharacterEncoding.getSJISName(); |
|
54 EUCJPName = CharacterEncoding.getEUCJPName(); |
|
55 defaultConv = new ByteToCharISO8859_1(); |
|
56 defaultConv.subChars = subChars; |
|
57 defaultConv.subMode = subMode; |
|
58 } |
|
59 |
|
60 public int flush(char [] output, int outStart, int outEnd) |
|
61 throws MalformedInputException, ConversionBufferFullException |
|
62 { |
|
63 badInputLength = 0; |
|
64 if(detectedConv != null) |
|
65 return detectedConv.flush(output, outStart, outEnd); |
|
66 else |
|
67 return defaultConv.flush(output, outStart, outEnd); |
|
68 } |
|
69 |
|
70 |
|
71 /** |
|
72 * Character conversion |
|
73 */ |
|
74 public int convert(byte[] input, int inOff, int inEnd, |
|
75 char[] output, int outOff, int outEnd) |
|
76 throws UnknownCharacterException, MalformedInputException, |
|
77 ConversionBufferFullException |
|
78 { |
|
79 int num = 0; |
|
80 |
|
81 charOff = outOff; |
|
82 byteOff = inOff; |
|
83 |
|
84 try { |
|
85 if (detectedConv == null) { |
|
86 int euckana = 0; |
|
87 int ss2count = 0; |
|
88 int firstmask = 0; |
|
89 int secondmask = 0; |
|
90 int cnt; |
|
91 boolean nonAsciiFound = false; |
|
92 |
|
93 for (cnt = inOff; cnt < inEnd; cnt++) { |
|
94 firstmask = 0; |
|
95 secondmask = 0; |
|
96 int byte1 = input[cnt]&0xff; |
|
97 int byte2; |
|
98 |
|
99 // TODO: should check valid escape sequences! |
|
100 if (byte1 == 0x1b) { |
|
101 convName = "ISO2022JP"; |
|
102 break; |
|
103 } |
|
104 |
|
105 // Try to convert all leading ASCII characters. |
|
106 if ((nonAsciiFound == false) && (byte1 < 0x80)) { |
|
107 if (charOff >= outEnd) |
|
108 throw new ConversionBufferFullException(); |
|
109 output[charOff++] = (char) byte1; |
|
110 byteOff++; |
|
111 num++; |
|
112 continue; |
|
113 } |
|
114 |
|
115 // We can no longer convert ASCII. |
|
116 nonAsciiFound = true; |
|
117 |
|
118 firstmask = maskTable1[byte1]; |
|
119 if (byte1 == SS2) |
|
120 ss2count++; |
|
121 |
|
122 if (firstmask != 0) { |
|
123 if (cnt+1 < inEnd) { |
|
124 byte2 = input[++cnt] & 0xff; |
|
125 secondmask = maskTable2[byte2]; |
|
126 int mask = firstmask & secondmask; |
|
127 if (mask == EUCJP_MASK) { |
|
128 convName = EUCJPName; |
|
129 break; |
|
130 } |
|
131 if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK) |
|
132 || (JISAutoDetect.canBeSJIS1B(firstmask) && secondmask == 0)) { |
|
133 convName = SJISName; |
|
134 break; |
|
135 } |
|
136 |
|
137 // If the first byte is a SS3 and the third byte |
|
138 // is not an EUC byte, it should be SJIS. |
|
139 // Otherwise, we can't determine it yet, but it's |
|
140 // very likely SJIS. So we don't take the EUCJP CS3 |
|
141 // character boundary. If we tried both |
|
142 // possibilities here, it might be able to be |
|
143 // determined correctly. |
|
144 if ((byte1 == SS3) && JISAutoDetect.canBeEUCJP(secondmask)) { |
|
145 if (cnt+1 < inEnd) { |
|
146 int nextbyte = input[cnt+1] & 0xff; |
|
147 if (! JISAutoDetect.canBeEUCJP(maskTable2[nextbyte])) |
|
148 convName = SJISName; |
|
149 } else |
|
150 convName = SJISName; |
|
151 } |
|
152 if (JISAutoDetect.canBeEUCKana(firstmask, secondmask)) |
|
153 euckana++; |
|
154 } else { |
|
155 if ((firstmask & SJIS1B_MASK) != 0) { |
|
156 convName = SJISName; |
|
157 break; |
|
158 } |
|
159 } |
|
160 } |
|
161 } |
|
162 |
|
163 if (nonAsciiFound && (convName == null)) { |
|
164 if ((euckana > 1) || (ss2count > 1)) |
|
165 convName = EUCJPName; |
|
166 else |
|
167 convName = SJISName; |
|
168 } |
|
169 |
|
170 if (convName != null) { |
|
171 try { |
|
172 detectedConv = ByteToCharConverter.getConverter(convName); |
|
173 detectedConv.subChars = subChars; |
|
174 detectedConv.subMode = subMode; |
|
175 } catch (UnsupportedEncodingException e){ |
|
176 detectedConv = null; |
|
177 convName = null; |
|
178 } |
|
179 } |
|
180 } |
|
181 } catch (ConversionBufferFullException bufferFullException) { |
|
182 throw bufferFullException; |
|
183 } catch (Exception e) { |
|
184 // If we fail to detect the converter needed for any reason, |
|
185 // use the default converter. |
|
186 detectedConv = defaultConv; |
|
187 } |
|
188 |
|
189 // If we've converted all ASCII characters, then return. |
|
190 if (byteOff == inEnd) { |
|
191 return num; |
|
192 } |
|
193 |
|
194 if(detectedConv != null) { |
|
195 try { |
|
196 num += detectedConv.convert(input, inOff + num, inEnd, |
|
197 output, outOff + num, outEnd); |
|
198 } finally { |
|
199 charOff = detectedConv.nextCharIndex(); |
|
200 byteOff = detectedConv.nextByteIndex(); |
|
201 badInputLength = detectedConv.badInputLength; |
|
202 } |
|
203 } else { |
|
204 try { |
|
205 num += defaultConv.convert(input, inOff + num, inEnd, |
|
206 output, outOff + num, outEnd); |
|
207 } finally { |
|
208 charOff = defaultConv.nextCharIndex(); |
|
209 byteOff = defaultConv.nextByteIndex(); |
|
210 badInputLength = defaultConv.badInputLength; |
|
211 } |
|
212 } |
|
213 return num; |
|
214 } |
|
215 |
|
216 public void reset() { |
|
217 if(detectedConv != null) { |
|
218 detectedConv.reset(); |
|
219 detectedConv = null; |
|
220 convName = null; |
|
221 } else |
|
222 defaultConv.reset(); |
|
223 charOff = byteOff = 0; |
|
224 } |
|
225 |
|
226 public String getCharacterEncoding() { |
|
227 return "JISAutoDetect"; |
|
228 } |
|
229 |
|
230 public String toString() { |
|
231 String s = getCharacterEncoding(); |
|
232 if (detectedConv != null) { |
|
233 s += "[" + detectedConv.getCharacterEncoding() + "]"; |
|
234 } else { |
|
235 s += "[unknown]"; |
|
236 } |
|
237 return s; |
|
238 } |
|
239 } |
|