diff -r fd16c54261b3 -r 90ce3da70b43 jdk/src/share/classes/sun/io/CharToByteConverter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/src/share/classes/sun/io/CharToByteConverter.java Sat Dec 01 00:00:00 2007 +0000 @@ -0,0 +1,419 @@ +/* + * Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +package sun.io; + +import java.io.*; + + +/** + * An abstract base class for subclasses which convert Unicode + * characters into an external encoding. + * + * @author Asmus Freytag + * @author Lloyd Honomichl, Novell, Inc. + * + * @deprecated Replaced by {@link java.nio.charset}. THIS API WILL BE + * REMOVED IN J2SE 1.6. + */ +@Deprecated +public abstract class CharToByteConverter { + + /** + * Substitution mode flag. + */ + protected boolean subMode = true; + + /** + * Bytes to substitute for unmappable input. + */ + protected byte[] subBytes = { (byte)'?' }; + + /** + * Offset of next character to be converted. + */ + protected int charOff; + + /** + * Offset of next byte to be output. + */ + protected int byteOff; + + /** + * Length of bad input that caused conversion to stop. + */ + protected int badInputLength; + + /** + * Create an instance of the default CharToByteConverter subclass. + */ + public static CharToByteConverter getDefault() { + Object cvt; + cvt = Converters.newDefaultConverter(Converters.CHAR_TO_BYTE); + return (CharToByteConverter)cvt; + } + + /** + * Returns appropriate CharToByteConverter subclass instance. + * @param string represets encoding + */ + public static CharToByteConverter getConverter(String encoding) + throws UnsupportedEncodingException + { + Object cvt; + cvt = Converters.newConverter(Converters.CHAR_TO_BYTE, encoding); + return (CharToByteConverter)cvt; + } + + /** + * Returns the character set id for the conversion. + */ + public abstract String getCharacterEncoding(); + + /** + * Converts an array of Unicode characters into an array of bytes + * in the target character encoding. This method allows a buffer by + * buffer conversion of a data stream. The state of the conversion is + * saved between calls to convert. If a call to convert results in + * an exception, the conversion may be continued by calling convert again + * with suitably modified parameters. All conversions should be finished + * with a call to the flush method. + * + * @return the number of bytes written to output. + * @param input array containing Unicode characters to be converted. + * @param inStart begin conversion at this offset in input array. + * @param inEnd stop conversion at this offset in input array (exclusive). + * @param output byte array to receive conversion result. + * @param outStart start writing to output array at this offset. + * @param outEnd stop writing to output array at this offset (exclusive). + * @exception MalformedInputException if the input buffer contains any + * sequence of chars that is illegal in Unicode (principally unpaired + * surrogates and \uFFFF or \uFFFE). After this exception is thrown, + * the method nextCharIndex can be called to obtain the index of the + * first invalid input character. The MalformedInputException can + * be queried for the length of the invalid input. + * @exception UnknownCharacterException for any character that + * that cannot be converted to the external character encoding. Thrown + * only when converter is not in substitution mode. + * @exception ConversionBufferFullException if output array is filled prior + * to converting all the input. + */ + public abstract int convert(char[] input, int inStart, int inEnd, + byte[] output, int outStart, int outEnd) + throws MalformedInputException, + UnknownCharacterException, + ConversionBufferFullException; + + /* + * Converts any array of characters, including malformed surrogate + * pairs, into an array of bytes in the target character encoding. + * A precondition is that substitution mode is turned on. This method + * allows a buffer by buffer conversion of a data stream. + * The state of the conversion is saved between calls to convert. + * All conversions should be finished with a call to the flushAny method. + * + * @return the number of bytes written to output. + * @param input array containing Unicode characters to be converted. + * @param inStart begin conversion at this offset in input array. + * @param inEnd stop conversion at this offset in input array (exclusive). + * @param output byte array to receive conversion result. + * @param outStart start writing to output array at this offset. + * @param outEnd stop writing to output array at this offset (exclusive). + * @exception ConversionBufferFullException if output array is filled prior + * to converting all the input. + */ + public int convertAny(char[] input, int inStart, int inEnd, + byte[] output, int outStart, int outEnd) + throws ConversionBufferFullException + { + if (!subMode) { /* Precondition: subMode == true */ + throw new IllegalStateException("Substitution mode is not on"); + } + /* Rely on the untested precondition that the indices are meaningful */ + /* For safety, use the public interface to charOff and byteOff, but + badInputLength is directly modified.*/ + int localInOff = inStart; + int localOutOff = outStart; + while(localInOff < inEnd) { + try { + int discard = convert(input, localInOff, inEnd, + output, localOutOff, outEnd); + return (nextByteIndex() - outStart); + } catch (MalformedInputException e) { + byte[] s = subBytes; + int subSize = s.length; + localOutOff = nextByteIndex(); + if ((localOutOff + subSize) > outEnd) + throw new ConversionBufferFullException(); + for (int i = 0; i < subSize; i++) + output[localOutOff++] = s[i]; + localInOff = nextCharIndex(); + localInOff += badInputLength; + badInputLength = 0; + if (localInOff >= inEnd){ + byteOff = localOutOff; + return (byteOff - outStart); + } + continue; + }catch (UnknownCharacterException e) { + /* Should never occur, since subMode == true */ + throw new Error("UnknownCharacterException thrown " + + "in substititution mode", + e); + } + } + return (nextByteIndex() - outStart); + } + + + + /** + * Converts an array of Unicode characters into an array of bytes + * in the target character encoding. Unlike convert, this method + * does not do incremental conversion. It assumes that the given + * input array contains all the characters to be converted. The + * state of the converter is reset at the beginning of this method + * and is left in the reset state on successful termination. + * The converter is not reset if an exception is thrown. + * This allows the caller to determine where the bad input + * was encountered by calling nextCharIndex. + *

+ * This method uses substitution mode when performing the conversion. + * The method setSubstitutionBytes may be used to determine what + * bytes are substituted. Even though substitution mode is used, + * the state of the converter's substitution mode is not changed + * at the end of this method. + * + * @return an array of bytes containing the converted characters. + * @param input array containing Unicode characters to be converted. + * @exception MalformedInputException if the input buffer contains any + * sequence of chars that is illegal in Unicode (principally unpaired + * surrogates and \uFFFF or \uFFFE). After this exception is thrown, + * the method nextCharIndex can be called to obtain the index of the + * first invalid input character and getBadInputLength can be called + * to determine the length of the invalid input. + * + * @see #nextCharIndex + * @see #setSubstitutionMode + * @see #setSubstitutionBytes + * @see #getBadInputLength + */ + public byte[] convertAll( char input[] ) throws MalformedInputException { + reset(); + boolean savedSubMode = subMode; + subMode = true; + + byte[] output = new byte[ getMaxBytesPerChar() * input.length ]; + + try { + int outputLength = convert( input, 0, input.length, + output, 0, output.length ); + outputLength += flush( output, nextByteIndex(), output.length ); + + byte [] returnedOutput = new byte[ outputLength ]; + System.arraycopy( output, 0, returnedOutput, 0, outputLength ); + return returnedOutput; + } + catch( ConversionBufferFullException e ) { + //Not supposed to happen. If it does, getMaxBytesPerChar() lied. + throw new + InternalError("this.getMaxBytesPerChar returned bad value"); + } + catch( UnknownCharacterException e ) { + // Not supposed to happen since we're in substitution mode. + throw new InternalError(); + } + finally { + subMode = savedSubMode; + } + } + + /** + * Writes any remaining output to the output buffer and resets the + * converter to its initial state. + * + * @param output byte array to receive flushed output. + * @param outStart start writing to output array at this offset. + * @param outEnd stop writing to output array at this offset (exclusive). + * @exception MalformedInputException if the output to be flushed contained + * a partial or invalid multibyte character sequence. Will occur if the + * input buffer on the last call to convert ended with the first character + * of a surrogate pair. flush will write what it can to the output buffer + * and reset the converter before throwing this exception. An additional + * call to flush is not required. + * @exception ConversionBufferFullException if output array is filled + * before all the output can be flushed. flush will write what it can + * to the output buffer and remember its state. An additional call to + * flush with a new output buffer will conclude the operation. + */ + public abstract int flush( byte[] output, int outStart, int outEnd ) + throws MalformedInputException, ConversionBufferFullException; + + /** + * Writes any remaining output to the output buffer and resets the + * converter to its initial state. May only be called when substitution + * mode is turned on, and never complains about malformed input (always + * substitutes). + * + * @param output byte array to receive flushed output. + * @param outStart start writing to output array at this offset. + * @param outEnd stop writing to output array at this offset (exclusive). + * @return number of bytes writter into output. + * @exception ConversionBufferFullException if output array is filled + * before all the output can be flushed. flush will write what it can + * to the output buffer and remember its state. An additional call to + * flush with a new output buffer will conclude the operation. + */ + public int flushAny( byte[] output, int outStart, int outEnd ) + throws ConversionBufferFullException + { + if (!subMode) { /* Precondition: subMode == true */ + throw new IllegalStateException("Substitution mode is not on"); + } + try { + return flush(output, outStart, outEnd); + } catch (MalformedInputException e) { + /* Assume that if a malformed input exception has occurred, + no useful data has been placed in the output buffer. + i.e. there is no mixture of left over good + some bad data. + Usually occurs with a trailing high surrogate pair element. + Special cases occur in Cp970, 949c and 933 that seem + to be covered, but may require further investigation */ + int subSize = subBytes.length; + byte[] s = subBytes; + int outIndex = outStart; + if ((outStart + subSize) > outEnd) + throw new ConversionBufferFullException(); + for (int i = 0; i < subSize; i++) + output[outIndex++] = s[i]; + byteOff = charOff = 0; // Reset the internal state. + badInputLength = 0; + return subSize; + } + } + + /** + * Resets converter to its initial state. + */ + public abstract void reset(); + + /** + * Returns true if the given character can be converted to the + * target character encoding. + * @return true if given character is translatable, false otherwise. + * @param c character to test + */ + public boolean canConvert(char c) { + try { + //FIXME output buffer size should use getMaxBytesPerChar value. + char[] input = new char[1]; + byte[] output = new byte[3]; + input[0] = c; + convert(input, 0, 1, output, 0, 3); + return true; + } catch(CharConversionException e){ + return false; + } + } + + /** + * Returns the maximum number of bytes needed to convert a char. Useful + * for calculating the maximum output buffer size needed for a particular + * input buffer. + */ + public abstract int getMaxBytesPerChar(); + + /** + * Returns the length, in chars, of the input which caused a + * MalformedInputException. Always refers to the last + * MalformedInputException thrown by the converter. If none have + * ever been thrown, returns 0. + */ + public int getBadInputLength() { + return badInputLength; + } + + /** + * Returns the index of the character just past + * the last character successfully converted by the previous call + * to convert. + */ + public int nextCharIndex() { + return charOff; + } + + /** + * Returns the index of the byte just past the last byte written by + * the previous call to convert. + */ + public int nextByteIndex() { + return byteOff; + } + + /** + * Sets converter into substitution mode. In substitution mode, + * the converter will replace untranslatable characters in the source + * encoding with the substitution character set by setSubstitutionBytes. + * When not in substitution mode, the converter will throw an + * UnknownCharacterException when it encounters untranslatable input. + * + * @param doSub if true, enable substitution mode. + * @see #setSubstitutionBytes + */ + public void setSubstitutionMode(boolean doSub) { + subMode = doSub; + } + + /** + * Sets the substitution bytes to use when the converter is in + * substitution mode. The given bytes should represent a valid + * character in the target character encoding and must not be + * longer than the value returned by getMaxBytesPerChar for this + * converter. + * + * @param newSubBytes the substitution bytes + * @exception IllegalArgumentException if given byte array is longer than + * the value returned by the method getMaxBytesPerChar. + * @see #setSubstitutionMode + * @see #getMaxBytesPerChar + */ + public void setSubstitutionBytes( byte[] newSubBytes ) + throws IllegalArgumentException + { + if( newSubBytes.length > getMaxBytesPerChar() ) { + throw new IllegalArgumentException(); + } + + subBytes = new byte[ newSubBytes.length ]; + System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length ); + } + + /** + * Returns a string representation of the class. + */ + public String toString() { + return "CharToByteConverter: " + getCharacterEncoding(); + } +}