|
1 /* |
|
2 * Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Sun designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Sun in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
22 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
23 * have any questions. |
|
24 */ |
|
25 |
|
26 package sun.io; |
|
27 |
|
28 import java.io.*; |
|
29 |
|
30 |
|
31 /** |
|
32 * An abstract base class for subclasses which convert Unicode |
|
33 * characters into an external encoding. |
|
34 * |
|
35 * @author Asmus Freytag |
|
36 * @author Lloyd Honomichl, Novell, Inc. |
|
37 * |
|
38 * @deprecated Replaced by {@link java.nio.charset}. THIS API WILL BE |
|
39 * REMOVED IN J2SE 1.6. |
|
40 */ |
|
41 @Deprecated |
|
42 public abstract class CharToByteConverter { |
|
43 |
|
44 /** |
|
45 * Substitution mode flag. |
|
46 */ |
|
47 protected boolean subMode = true; |
|
48 |
|
49 /** |
|
50 * Bytes to substitute for unmappable input. |
|
51 */ |
|
52 protected byte[] subBytes = { (byte)'?' }; |
|
53 |
|
54 /** |
|
55 * Offset of next character to be converted. |
|
56 */ |
|
57 protected int charOff; |
|
58 |
|
59 /** |
|
60 * Offset of next byte to be output. |
|
61 */ |
|
62 protected int byteOff; |
|
63 |
|
64 /** |
|
65 * Length of bad input that caused conversion to stop. |
|
66 */ |
|
67 protected int badInputLength; |
|
68 |
|
69 /** |
|
70 * Create an instance of the default CharToByteConverter subclass. |
|
71 */ |
|
72 public static CharToByteConverter getDefault() { |
|
73 Object cvt; |
|
74 cvt = Converters.newDefaultConverter(Converters.CHAR_TO_BYTE); |
|
75 return (CharToByteConverter)cvt; |
|
76 } |
|
77 |
|
78 /** |
|
79 * Returns appropriate CharToByteConverter subclass instance. |
|
80 * @param string represets encoding |
|
81 */ |
|
82 public static CharToByteConverter getConverter(String encoding) |
|
83 throws UnsupportedEncodingException |
|
84 { |
|
85 Object cvt; |
|
86 cvt = Converters.newConverter(Converters.CHAR_TO_BYTE, encoding); |
|
87 return (CharToByteConverter)cvt; |
|
88 } |
|
89 |
|
90 /** |
|
91 * Returns the character set id for the conversion. |
|
92 */ |
|
93 public abstract String getCharacterEncoding(); |
|
94 |
|
95 /** |
|
96 * Converts an array of Unicode characters into an array of bytes |
|
97 * in the target character encoding. This method allows a buffer by |
|
98 * buffer conversion of a data stream. The state of the conversion is |
|
99 * saved between calls to convert. If a call to convert results in |
|
100 * an exception, the conversion may be continued by calling convert again |
|
101 * with suitably modified parameters. All conversions should be finished |
|
102 * with a call to the flush method. |
|
103 * |
|
104 * @return the number of bytes written to output. |
|
105 * @param input array containing Unicode characters to be converted. |
|
106 * @param inStart begin conversion at this offset in input array. |
|
107 * @param inEnd stop conversion at this offset in input array (exclusive). |
|
108 * @param output byte array to receive conversion result. |
|
109 * @param outStart start writing to output array at this offset. |
|
110 * @param outEnd stop writing to output array at this offset (exclusive). |
|
111 * @exception MalformedInputException if the input buffer contains any |
|
112 * sequence of chars that is illegal in Unicode (principally unpaired |
|
113 * surrogates and \uFFFF or \uFFFE). After this exception is thrown, |
|
114 * the method nextCharIndex can be called to obtain the index of the |
|
115 * first invalid input character. The MalformedInputException can |
|
116 * be queried for the length of the invalid input. |
|
117 * @exception UnknownCharacterException for any character that |
|
118 * that cannot be converted to the external character encoding. Thrown |
|
119 * only when converter is not in substitution mode. |
|
120 * @exception ConversionBufferFullException if output array is filled prior |
|
121 * to converting all the input. |
|
122 */ |
|
123 public abstract int convert(char[] input, int inStart, int inEnd, |
|
124 byte[] output, int outStart, int outEnd) |
|
125 throws MalformedInputException, |
|
126 UnknownCharacterException, |
|
127 ConversionBufferFullException; |
|
128 |
|
129 /* |
|
130 * Converts any array of characters, including malformed surrogate |
|
131 * pairs, into an array of bytes in the target character encoding. |
|
132 * A precondition is that substitution mode is turned on. This method |
|
133 * allows a buffer by buffer conversion of a data stream. |
|
134 * The state of the conversion is saved between calls to convert. |
|
135 * All conversions should be finished with a call to the flushAny method. |
|
136 * |
|
137 * @return the number of bytes written to output. |
|
138 * @param input array containing Unicode characters to be converted. |
|
139 * @param inStart begin conversion at this offset in input array. |
|
140 * @param inEnd stop conversion at this offset in input array (exclusive). |
|
141 * @param output byte array to receive conversion result. |
|
142 * @param outStart start writing to output array at this offset. |
|
143 * @param outEnd stop writing to output array at this offset (exclusive). |
|
144 * @exception ConversionBufferFullException if output array is filled prior |
|
145 * to converting all the input. |
|
146 */ |
|
147 public int convertAny(char[] input, int inStart, int inEnd, |
|
148 byte[] output, int outStart, int outEnd) |
|
149 throws ConversionBufferFullException |
|
150 { |
|
151 if (!subMode) { /* Precondition: subMode == true */ |
|
152 throw new IllegalStateException("Substitution mode is not on"); |
|
153 } |
|
154 /* Rely on the untested precondition that the indices are meaningful */ |
|
155 /* For safety, use the public interface to charOff and byteOff, but |
|
156 badInputLength is directly modified.*/ |
|
157 int localInOff = inStart; |
|
158 int localOutOff = outStart; |
|
159 while(localInOff < inEnd) { |
|
160 try { |
|
161 int discard = convert(input, localInOff, inEnd, |
|
162 output, localOutOff, outEnd); |
|
163 return (nextByteIndex() - outStart); |
|
164 } catch (MalformedInputException e) { |
|
165 byte[] s = subBytes; |
|
166 int subSize = s.length; |
|
167 localOutOff = nextByteIndex(); |
|
168 if ((localOutOff + subSize) > outEnd) |
|
169 throw new ConversionBufferFullException(); |
|
170 for (int i = 0; i < subSize; i++) |
|
171 output[localOutOff++] = s[i]; |
|
172 localInOff = nextCharIndex(); |
|
173 localInOff += badInputLength; |
|
174 badInputLength = 0; |
|
175 if (localInOff >= inEnd){ |
|
176 byteOff = localOutOff; |
|
177 return (byteOff - outStart); |
|
178 } |
|
179 continue; |
|
180 }catch (UnknownCharacterException e) { |
|
181 /* Should never occur, since subMode == true */ |
|
182 throw new Error("UnknownCharacterException thrown " |
|
183 + "in substititution mode", |
|
184 e); |
|
185 } |
|
186 } |
|
187 return (nextByteIndex() - outStart); |
|
188 } |
|
189 |
|
190 |
|
191 |
|
192 /** |
|
193 * Converts an array of Unicode characters into an array of bytes |
|
194 * in the target character encoding. Unlike convert, this method |
|
195 * does not do incremental conversion. It assumes that the given |
|
196 * input array contains all the characters to be converted. The |
|
197 * state of the converter is reset at the beginning of this method |
|
198 * and is left in the reset state on successful termination. |
|
199 * The converter is not reset if an exception is thrown. |
|
200 * This allows the caller to determine where the bad input |
|
201 * was encountered by calling nextCharIndex. |
|
202 * <p> |
|
203 * This method uses substitution mode when performing the conversion. |
|
204 * The method setSubstitutionBytes may be used to determine what |
|
205 * bytes are substituted. Even though substitution mode is used, |
|
206 * the state of the converter's substitution mode is not changed |
|
207 * at the end of this method. |
|
208 * |
|
209 * @return an array of bytes containing the converted characters. |
|
210 * @param input array containing Unicode characters to be converted. |
|
211 * @exception MalformedInputException if the input buffer contains any |
|
212 * sequence of chars that is illegal in Unicode (principally unpaired |
|
213 * surrogates and \uFFFF or \uFFFE). After this exception is thrown, |
|
214 * the method nextCharIndex can be called to obtain the index of the |
|
215 * first invalid input character and getBadInputLength can be called |
|
216 * to determine the length of the invalid input. |
|
217 * |
|
218 * @see #nextCharIndex |
|
219 * @see #setSubstitutionMode |
|
220 * @see #setSubstitutionBytes |
|
221 * @see #getBadInputLength |
|
222 */ |
|
223 public byte[] convertAll( char input[] ) throws MalformedInputException { |
|
224 reset(); |
|
225 boolean savedSubMode = subMode; |
|
226 subMode = true; |
|
227 |
|
228 byte[] output = new byte[ getMaxBytesPerChar() * input.length ]; |
|
229 |
|
230 try { |
|
231 int outputLength = convert( input, 0, input.length, |
|
232 output, 0, output.length ); |
|
233 outputLength += flush( output, nextByteIndex(), output.length ); |
|
234 |
|
235 byte [] returnedOutput = new byte[ outputLength ]; |
|
236 System.arraycopy( output, 0, returnedOutput, 0, outputLength ); |
|
237 return returnedOutput; |
|
238 } |
|
239 catch( ConversionBufferFullException e ) { |
|
240 //Not supposed to happen. If it does, getMaxBytesPerChar() lied. |
|
241 throw new |
|
242 InternalError("this.getMaxBytesPerChar returned bad value"); |
|
243 } |
|
244 catch( UnknownCharacterException e ) { |
|
245 // Not supposed to happen since we're in substitution mode. |
|
246 throw new InternalError(); |
|
247 } |
|
248 finally { |
|
249 subMode = savedSubMode; |
|
250 } |
|
251 } |
|
252 |
|
253 /** |
|
254 * Writes any remaining output to the output buffer and resets the |
|
255 * converter to its initial state. |
|
256 * |
|
257 * @param output byte array to receive flushed output. |
|
258 * @param outStart start writing to output array at this offset. |
|
259 * @param outEnd stop writing to output array at this offset (exclusive). |
|
260 * @exception MalformedInputException if the output to be flushed contained |
|
261 * a partial or invalid multibyte character sequence. Will occur if the |
|
262 * input buffer on the last call to convert ended with the first character |
|
263 * of a surrogate pair. flush will write what it can to the output buffer |
|
264 * and reset the converter before throwing this exception. An additional |
|
265 * call to flush is not required. |
|
266 * @exception ConversionBufferFullException if output array is filled |
|
267 * before all the output can be flushed. flush will write what it can |
|
268 * to the output buffer and remember its state. An additional call to |
|
269 * flush with a new output buffer will conclude the operation. |
|
270 */ |
|
271 public abstract int flush( byte[] output, int outStart, int outEnd ) |
|
272 throws MalformedInputException, ConversionBufferFullException; |
|
273 |
|
274 /** |
|
275 * Writes any remaining output to the output buffer and resets the |
|
276 * converter to its initial state. May only be called when substitution |
|
277 * mode is turned on, and never complains about malformed input (always |
|
278 * substitutes). |
|
279 * |
|
280 * @param output byte array to receive flushed output. |
|
281 * @param outStart start writing to output array at this offset. |
|
282 * @param outEnd stop writing to output array at this offset (exclusive). |
|
283 * @return number of bytes writter into output. |
|
284 * @exception ConversionBufferFullException if output array is filled |
|
285 * before all the output can be flushed. flush will write what it can |
|
286 * to the output buffer and remember its state. An additional call to |
|
287 * flush with a new output buffer will conclude the operation. |
|
288 */ |
|
289 public int flushAny( byte[] output, int outStart, int outEnd ) |
|
290 throws ConversionBufferFullException |
|
291 { |
|
292 if (!subMode) { /* Precondition: subMode == true */ |
|
293 throw new IllegalStateException("Substitution mode is not on"); |
|
294 } |
|
295 try { |
|
296 return flush(output, outStart, outEnd); |
|
297 } catch (MalformedInputException e) { |
|
298 /* Assume that if a malformed input exception has occurred, |
|
299 no useful data has been placed in the output buffer. |
|
300 i.e. there is no mixture of left over good + some bad data. |
|
301 Usually occurs with a trailing high surrogate pair element. |
|
302 Special cases occur in Cp970, 949c and 933 that seem |
|
303 to be covered, but may require further investigation */ |
|
304 int subSize = subBytes.length; |
|
305 byte[] s = subBytes; |
|
306 int outIndex = outStart; |
|
307 if ((outStart + subSize) > outEnd) |
|
308 throw new ConversionBufferFullException(); |
|
309 for (int i = 0; i < subSize; i++) |
|
310 output[outIndex++] = s[i]; |
|
311 byteOff = charOff = 0; // Reset the internal state. |
|
312 badInputLength = 0; |
|
313 return subSize; |
|
314 } |
|
315 } |
|
316 |
|
317 /** |
|
318 * Resets converter to its initial state. |
|
319 */ |
|
320 public abstract void reset(); |
|
321 |
|
322 /** |
|
323 * Returns true if the given character can be converted to the |
|
324 * target character encoding. |
|
325 * @return true if given character is translatable, false otherwise. |
|
326 * @param c character to test |
|
327 */ |
|
328 public boolean canConvert(char c) { |
|
329 try { |
|
330 //FIXME output buffer size should use getMaxBytesPerChar value. |
|
331 char[] input = new char[1]; |
|
332 byte[] output = new byte[3]; |
|
333 input[0] = c; |
|
334 convert(input, 0, 1, output, 0, 3); |
|
335 return true; |
|
336 } catch(CharConversionException e){ |
|
337 return false; |
|
338 } |
|
339 } |
|
340 |
|
341 /** |
|
342 * Returns the maximum number of bytes needed to convert a char. Useful |
|
343 * for calculating the maximum output buffer size needed for a particular |
|
344 * input buffer. |
|
345 */ |
|
346 public abstract int getMaxBytesPerChar(); |
|
347 |
|
348 /** |
|
349 * Returns the length, in chars, of the input which caused a |
|
350 * MalformedInputException. Always refers to the last |
|
351 * MalformedInputException thrown by the converter. If none have |
|
352 * ever been thrown, returns 0. |
|
353 */ |
|
354 public int getBadInputLength() { |
|
355 return badInputLength; |
|
356 } |
|
357 |
|
358 /** |
|
359 * Returns the index of the character just past |
|
360 * the last character successfully converted by the previous call |
|
361 * to convert. |
|
362 */ |
|
363 public int nextCharIndex() { |
|
364 return charOff; |
|
365 } |
|
366 |
|
367 /** |
|
368 * Returns the index of the byte just past the last byte written by |
|
369 * the previous call to convert. |
|
370 */ |
|
371 public int nextByteIndex() { |
|
372 return byteOff; |
|
373 } |
|
374 |
|
375 /** |
|
376 * Sets converter into substitution mode. In substitution mode, |
|
377 * the converter will replace untranslatable characters in the source |
|
378 * encoding with the substitution character set by setSubstitutionBytes. |
|
379 * When not in substitution mode, the converter will throw an |
|
380 * UnknownCharacterException when it encounters untranslatable input. |
|
381 * |
|
382 * @param doSub if true, enable substitution mode. |
|
383 * @see #setSubstitutionBytes |
|
384 */ |
|
385 public void setSubstitutionMode(boolean doSub) { |
|
386 subMode = doSub; |
|
387 } |
|
388 |
|
389 /** |
|
390 * Sets the substitution bytes to use when the converter is in |
|
391 * substitution mode. The given bytes should represent a valid |
|
392 * character in the target character encoding and must not be |
|
393 * longer than the value returned by getMaxBytesPerChar for this |
|
394 * converter. |
|
395 * |
|
396 * @param newSubBytes the substitution bytes |
|
397 * @exception IllegalArgumentException if given byte array is longer than |
|
398 * the value returned by the method getMaxBytesPerChar. |
|
399 * @see #setSubstitutionMode |
|
400 * @see #getMaxBytesPerChar |
|
401 */ |
|
402 public void setSubstitutionBytes( byte[] newSubBytes ) |
|
403 throws IllegalArgumentException |
|
404 { |
|
405 if( newSubBytes.length > getMaxBytesPerChar() ) { |
|
406 throw new IllegalArgumentException(); |
|
407 } |
|
408 |
|
409 subBytes = new byte[ newSubBytes.length ]; |
|
410 System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length ); |
|
411 } |
|
412 |
|
413 /** |
|
414 * Returns a string representation of the class. |
|
415 */ |
|
416 public String toString() { |
|
417 return "CharToByteConverter: " + getCharacterEncoding(); |
|
418 } |
|
419 } |