author | ohair |
Tue, 28 Dec 2010 15:53:50 -0800 | |
changeset 7668 | d4a77089c587 |
parent 5785 | 5dfabe612d10 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
7668 | 2 |
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
package sun.io; |
|
26 |
||
27 |
import java.io.UnsupportedEncodingException; |
|
28 |
import sun.nio.cs.ext.JISAutoDetect; |
|
29 |
||
30 |
public class ByteToCharJISAutoDetect extends ByteToCharConverter { |
|
31 |
||
32 |
private final static int EUCJP_MASK = 0x01; |
|
33 |
private final static int SJIS2B_MASK = 0x02; |
|
34 |
private final static int SJIS1B_MASK = 0x04; |
|
35 |
private final static int EUCJP_KANA1_MASK = 0x08; |
|
36 |
private final static int EUCJP_KANA2_MASK = 0x10; |
|
5785
5dfabe612d10
6959197: When building with JAVAC_MAX_WARNINGS=true, the build fails in sun/nio/cs due to the use of -Werror
andrew
parents:
5506
diff
changeset
|
37 |
private final static byte[] maskTable1 = JISAutoDetect.getByteMask1(); |
5dfabe612d10
6959197: When building with JAVAC_MAX_WARNINGS=true, the build fails in sun/nio/cs due to the use of -Werror
andrew
parents:
5506
diff
changeset
|
38 |
private final static byte[] maskTable2 = JISAutoDetect.getByteMask2(); |
2 | 39 |
|
40 |
private final static int SS2 = 0x8e; |
|
41 |
private final static int SS3 = 0x8f; |
|
42 |
||
43 |
// SJISName is set to either "SJIS" or "MS932" |
|
44 |
private String SJISName; |
|
45 |
private String EUCJPName; |
|
46 |
||
47 |
private String convName = null; |
|
48 |
private ByteToCharConverter detectedConv = null; |
|
49 |
private ByteToCharConverter defaultConv = null; |
|
50 |
||
51 |
public ByteToCharJISAutoDetect() { |
|
52 |
super(); |
|
53 |
SJISName = CharacterEncoding.getSJISName(); |
|
54 |
EUCJPName = CharacterEncoding.getEUCJPName(); |
|
55 |
defaultConv = new ByteToCharISO8859_1(); |
|
56 |
defaultConv.subChars = subChars; |
|
57 |
defaultConv.subMode = subMode; |
|
58 |
} |
|
59 |
||
60 |
public int flush(char [] output, int outStart, int outEnd) |
|
61 |
throws MalformedInputException, ConversionBufferFullException |
|
62 |
{ |
|
63 |
badInputLength = 0; |
|
64 |
if(detectedConv != null) |
|
65 |
return detectedConv.flush(output, outStart, outEnd); |
|
66 |
else |
|
67 |
return defaultConv.flush(output, outStart, outEnd); |
|
68 |
} |
|
69 |
||
70 |
||
71 |
/** |
|
72 |
* Character conversion |
|
73 |
*/ |
|
74 |
public int convert(byte[] input, int inOff, int inEnd, |
|
75 |
char[] output, int outOff, int outEnd) |
|
76 |
throws UnknownCharacterException, MalformedInputException, |
|
77 |
ConversionBufferFullException |
|
78 |
{ |
|
79 |
int num = 0; |
|
80 |
||
81 |
charOff = outOff; |
|
82 |
byteOff = inOff; |
|
83 |
||
84 |
try { |
|
85 |
if (detectedConv == null) { |
|
86 |
int euckana = 0; |
|
87 |
int ss2count = 0; |
|
88 |
int firstmask = 0; |
|
89 |
int secondmask = 0; |
|
90 |
int cnt; |
|
91 |
boolean nonAsciiFound = false; |
|
92 |
||
93 |
for (cnt = inOff; cnt < inEnd; cnt++) { |
|
94 |
firstmask = 0; |
|
95 |
secondmask = 0; |
|
96 |
int byte1 = input[cnt]&0xff; |
|
97 |
int byte2; |
|
98 |
||
99 |
// TODO: should check valid escape sequences! |
|
100 |
if (byte1 == 0x1b) { |
|
101 |
convName = "ISO2022JP"; |
|
102 |
break; |
|
103 |
} |
|
104 |
||
105 |
// Try to convert all leading ASCII characters. |
|
106 |
if ((nonAsciiFound == false) && (byte1 < 0x80)) { |
|
107 |
if (charOff >= outEnd) |
|
108 |
throw new ConversionBufferFullException(); |
|
109 |
output[charOff++] = (char) byte1; |
|
110 |
byteOff++; |
|
111 |
num++; |
|
112 |
continue; |
|
113 |
} |
|
114 |
||
115 |
// We can no longer convert ASCII. |
|
116 |
nonAsciiFound = true; |
|
117 |
||
118 |
firstmask = maskTable1[byte1]; |
|
119 |
if (byte1 == SS2) |
|
120 |
ss2count++; |
|
121 |
||
122 |
if (firstmask != 0) { |
|
123 |
if (cnt+1 < inEnd) { |
|
124 |
byte2 = input[++cnt] & 0xff; |
|
125 |
secondmask = maskTable2[byte2]; |
|
126 |
int mask = firstmask & secondmask; |
|
127 |
if (mask == EUCJP_MASK) { |
|
128 |
convName = EUCJPName; |
|
129 |
break; |
|
130 |
} |
|
131 |
if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK) |
|
5785
5dfabe612d10
6959197: When building with JAVAC_MAX_WARNINGS=true, the build fails in sun/nio/cs due to the use of -Werror
andrew
parents:
5506
diff
changeset
|
132 |
|| (JISAutoDetect.canBeSJIS1B(firstmask) && secondmask == 0)) { |
2 | 133 |
convName = SJISName; |
134 |
break; |
|
135 |
} |
|
136 |
||
137 |
// If the first byte is a SS3 and the third byte |
|
138 |
// is not an EUC byte, it should be SJIS. |
|
139 |
// Otherwise, we can't determine it yet, but it's |
|
140 |
// very likely SJIS. So we don't take the EUCJP CS3 |
|
141 |
// character boundary. If we tried both |
|
142 |
// possibilities here, it might be able to be |
|
143 |
// determined correctly. |
|
5785
5dfabe612d10
6959197: When building with JAVAC_MAX_WARNINGS=true, the build fails in sun/nio/cs due to the use of -Werror
andrew
parents:
5506
diff
changeset
|
144 |
if ((byte1 == SS3) && JISAutoDetect.canBeEUCJP(secondmask)) { |
2 | 145 |
if (cnt+1 < inEnd) { |
146 |
int nextbyte = input[cnt+1] & 0xff; |
|
5785
5dfabe612d10
6959197: When building with JAVAC_MAX_WARNINGS=true, the build fails in sun/nio/cs due to the use of -Werror
andrew
parents:
5506
diff
changeset
|
147 |
if (! JISAutoDetect.canBeEUCJP(maskTable2[nextbyte])) |
2 | 148 |
convName = SJISName; |
149 |
} else |
|
150 |
convName = SJISName; |
|
151 |
} |
|
5785
5dfabe612d10
6959197: When building with JAVAC_MAX_WARNINGS=true, the build fails in sun/nio/cs due to the use of -Werror
andrew
parents:
5506
diff
changeset
|
152 |
if (JISAutoDetect.canBeEUCKana(firstmask, secondmask)) |
2 | 153 |
euckana++; |
154 |
} else { |
|
155 |
if ((firstmask & SJIS1B_MASK) != 0) { |
|
156 |
convName = SJISName; |
|
157 |
break; |
|
158 |
} |
|
159 |
} |
|
160 |
} |
|
161 |
} |
|
162 |
||
163 |
if (nonAsciiFound && (convName == null)) { |
|
164 |
if ((euckana > 1) || (ss2count > 1)) |
|
165 |
convName = EUCJPName; |
|
166 |
else |
|
167 |
convName = SJISName; |
|
168 |
} |
|
169 |
||
170 |
if (convName != null) { |
|
171 |
try { |
|
172 |
detectedConv = ByteToCharConverter.getConverter(convName); |
|
173 |
detectedConv.subChars = subChars; |
|
174 |
detectedConv.subMode = subMode; |
|
175 |
} catch (UnsupportedEncodingException e){ |
|
176 |
detectedConv = null; |
|
177 |
convName = null; |
|
178 |
} |
|
179 |
} |
|
180 |
} |
|
181 |
} catch (ConversionBufferFullException bufferFullException) { |
|
182 |
throw bufferFullException; |
|
183 |
} catch (Exception e) { |
|
184 |
// If we fail to detect the converter needed for any reason, |
|
185 |
// use the default converter. |
|
186 |
detectedConv = defaultConv; |
|
187 |
} |
|
188 |
||
189 |
// If we've converted all ASCII characters, then return. |
|
190 |
if (byteOff == inEnd) { |
|
191 |
return num; |
|
192 |
} |
|
193 |
||
194 |
if(detectedConv != null) { |
|
195 |
try { |
|
196 |
num += detectedConv.convert(input, inOff + num, inEnd, |
|
197 |
output, outOff + num, outEnd); |
|
198 |
} finally { |
|
199 |
charOff = detectedConv.nextCharIndex(); |
|
200 |
byteOff = detectedConv.nextByteIndex(); |
|
201 |
badInputLength = detectedConv.badInputLength; |
|
202 |
} |
|
203 |
} else { |
|
204 |
try { |
|
205 |
num += defaultConv.convert(input, inOff + num, inEnd, |
|
206 |
output, outOff + num, outEnd); |
|
207 |
} finally { |
|
208 |
charOff = defaultConv.nextCharIndex(); |
|
209 |
byteOff = defaultConv.nextByteIndex(); |
|
210 |
badInputLength = defaultConv.badInputLength; |
|
211 |
} |
|
212 |
} |
|
213 |
return num; |
|
214 |
} |
|
215 |
||
216 |
public void reset() { |
|
217 |
if(detectedConv != null) { |
|
218 |
detectedConv.reset(); |
|
219 |
detectedConv = null; |
|
220 |
convName = null; |
|
221 |
} else |
|
222 |
defaultConv.reset(); |
|
223 |
charOff = byteOff = 0; |
|
224 |
} |
|
225 |
||
226 |
public String getCharacterEncoding() { |
|
227 |
return "JISAutoDetect"; |
|
228 |
} |
|
229 |
||
230 |
public String toString() { |
|
231 |
String s = getCharacterEncoding(); |
|
232 |
if (detectedConv != null) { |
|
233 |
s += "[" + detectedConv.getCharacterEncoding() + "]"; |
|
234 |
} else { |
|
235 |
s += "[unknown]"; |
|
236 |
} |
|
237 |
return s; |
|
238 |
} |
|
239 |
} |