--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/io/CharToByteConverter.java Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,419 @@
+/*
+ * Copyright 1996-2004 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.io;
+
+import java.io.*;
+
+
+/**
+ * An abstract base class for subclasses which convert Unicode
+ * characters into an external encoding.
+ *
+ * @author Asmus Freytag
+ * @author Lloyd Honomichl, Novell, Inc.
+ *
+ * @deprecated Replaced by {@link java.nio.charset}. THIS API WILL BE
+ * REMOVED IN J2SE 1.6.
+ */
+@Deprecated
+public abstract class CharToByteConverter {
+
+ /**
+ * Substitution mode flag.
+ */
+ protected boolean subMode = true;
+
+ /**
+ * Bytes to substitute for unmappable input.
+ */
+ protected byte[] subBytes = { (byte)'?' };
+
+ /**
+ * Offset of next character to be converted.
+ */
+ protected int charOff;
+
+ /**
+ * Offset of next byte to be output.
+ */
+ protected int byteOff;
+
+ /**
+ * Length of bad input that caused conversion to stop.
+ */
+ protected int badInputLength;
+
+ /**
+ * Create an instance of the default CharToByteConverter subclass.
+ */
+ public static CharToByteConverter getDefault() {
+ Object cvt;
+ cvt = Converters.newDefaultConverter(Converters.CHAR_TO_BYTE);
+ return (CharToByteConverter)cvt;
+ }
+
+ /**
+ * Returns appropriate CharToByteConverter subclass instance.
+ * @param string represets encoding
+ */
+ public static CharToByteConverter getConverter(String encoding)
+ throws UnsupportedEncodingException
+ {
+ Object cvt;
+ cvt = Converters.newConverter(Converters.CHAR_TO_BYTE, encoding);
+ return (CharToByteConverter)cvt;
+ }
+
+ /**
+ * Returns the character set id for the conversion.
+ */
+ public abstract String getCharacterEncoding();
+
+ /**
+ * Converts an array of Unicode characters into an array of bytes
+ * in the target character encoding. This method allows a buffer by
+ * buffer conversion of a data stream. The state of the conversion is
+ * saved between calls to convert. If a call to convert results in
+ * an exception, the conversion may be continued by calling convert again
+ * with suitably modified parameters. All conversions should be finished
+ * with a call to the flush method.
+ *
+ * @return the number of bytes written to output.
+ * @param input array containing Unicode characters to be converted.
+ * @param inStart begin conversion at this offset in input array.
+ * @param inEnd stop conversion at this offset in input array (exclusive).
+ * @param output byte array to receive conversion result.
+ * @param outStart start writing to output array at this offset.
+ * @param outEnd stop writing to output array at this offset (exclusive).
+ * @exception MalformedInputException if the input buffer contains any
+ * sequence of chars that is illegal in Unicode (principally unpaired
+ * surrogates and \uFFFF or \uFFFE). After this exception is thrown,
+ * the method nextCharIndex can be called to obtain the index of the
+ * first invalid input character. The MalformedInputException can
+ * be queried for the length of the invalid input.
+ * @exception UnknownCharacterException for any character that
+ * that cannot be converted to the external character encoding. Thrown
+ * only when converter is not in substitution mode.
+ * @exception ConversionBufferFullException if output array is filled prior
+ * to converting all the input.
+ */
+ public abstract int convert(char[] input, int inStart, int inEnd,
+ byte[] output, int outStart, int outEnd)
+ throws MalformedInputException,
+ UnknownCharacterException,
+ ConversionBufferFullException;
+
+ /*
+ * Converts any array of characters, including malformed surrogate
+ * pairs, into an array of bytes in the target character encoding.
+ * A precondition is that substitution mode is turned on. This method
+ * allows a buffer by buffer conversion of a data stream.
+ * The state of the conversion is saved between calls to convert.
+ * All conversions should be finished with a call to the flushAny method.
+ *
+ * @return the number of bytes written to output.
+ * @param input array containing Unicode characters to be converted.
+ * @param inStart begin conversion at this offset in input array.
+ * @param inEnd stop conversion at this offset in input array (exclusive).
+ * @param output byte array to receive conversion result.
+ * @param outStart start writing to output array at this offset.
+ * @param outEnd stop writing to output array at this offset (exclusive).
+ * @exception ConversionBufferFullException if output array is filled prior
+ * to converting all the input.
+ */
+ public int convertAny(char[] input, int inStart, int inEnd,
+ byte[] output, int outStart, int outEnd)
+ throws ConversionBufferFullException
+ {
+ if (!subMode) { /* Precondition: subMode == true */
+ throw new IllegalStateException("Substitution mode is not on");
+ }
+ /* Rely on the untested precondition that the indices are meaningful */
+ /* For safety, use the public interface to charOff and byteOff, but
+ badInputLength is directly modified.*/
+ int localInOff = inStart;
+ int localOutOff = outStart;
+ while(localInOff < inEnd) {
+ try {
+ int discard = convert(input, localInOff, inEnd,
+ output, localOutOff, outEnd);
+ return (nextByteIndex() - outStart);
+ } catch (MalformedInputException e) {
+ byte[] s = subBytes;
+ int subSize = s.length;
+ localOutOff = nextByteIndex();
+ if ((localOutOff + subSize) > outEnd)
+ throw new ConversionBufferFullException();
+ for (int i = 0; i < subSize; i++)
+ output[localOutOff++] = s[i];
+ localInOff = nextCharIndex();
+ localInOff += badInputLength;
+ badInputLength = 0;
+ if (localInOff >= inEnd){
+ byteOff = localOutOff;
+ return (byteOff - outStart);
+ }
+ continue;
+ }catch (UnknownCharacterException e) {
+ /* Should never occur, since subMode == true */
+ throw new Error("UnknownCharacterException thrown "
+ + "in substititution mode",
+ e);
+ }
+ }
+ return (nextByteIndex() - outStart);
+ }
+
+
+
+ /**
+ * Converts an array of Unicode characters into an array of bytes
+ * in the target character encoding. Unlike convert, this method
+ * does not do incremental conversion. It assumes that the given
+ * input array contains all the characters to be converted. The
+ * state of the converter is reset at the beginning of this method
+ * and is left in the reset state on successful termination.
+ * The converter is not reset if an exception is thrown.
+ * This allows the caller to determine where the bad input
+ * was encountered by calling nextCharIndex.
+ * <p>
+ * This method uses substitution mode when performing the conversion.
+ * The method setSubstitutionBytes may be used to determine what
+ * bytes are substituted. Even though substitution mode is used,
+ * the state of the converter's substitution mode is not changed
+ * at the end of this method.
+ *
+ * @return an array of bytes containing the converted characters.
+ * @param input array containing Unicode characters to be converted.
+ * @exception MalformedInputException if the input buffer contains any
+ * sequence of chars that is illegal in Unicode (principally unpaired
+ * surrogates and \uFFFF or \uFFFE). After this exception is thrown,
+ * the method nextCharIndex can be called to obtain the index of the
+ * first invalid input character and getBadInputLength can be called
+ * to determine the length of the invalid input.
+ *
+ * @see #nextCharIndex
+ * @see #setSubstitutionMode
+ * @see #setSubstitutionBytes
+ * @see #getBadInputLength
+ */
+ public byte[] convertAll( char input[] ) throws MalformedInputException {
+ reset();
+ boolean savedSubMode = subMode;
+ subMode = true;
+
+ byte[] output = new byte[ getMaxBytesPerChar() * input.length ];
+
+ try {
+ int outputLength = convert( input, 0, input.length,
+ output, 0, output.length );
+ outputLength += flush( output, nextByteIndex(), output.length );
+
+ byte [] returnedOutput = new byte[ outputLength ];
+ System.arraycopy( output, 0, returnedOutput, 0, outputLength );
+ return returnedOutput;
+ }
+ catch( ConversionBufferFullException e ) {
+ //Not supposed to happen. If it does, getMaxBytesPerChar() lied.
+ throw new
+ InternalError("this.getMaxBytesPerChar returned bad value");
+ }
+ catch( UnknownCharacterException e ) {
+ // Not supposed to happen since we're in substitution mode.
+ throw new InternalError();
+ }
+ finally {
+ subMode = savedSubMode;
+ }
+ }
+
+ /**
+ * Writes any remaining output to the output buffer and resets the
+ * converter to its initial state.
+ *
+ * @param output byte array to receive flushed output.
+ * @param outStart start writing to output array at this offset.
+ * @param outEnd stop writing to output array at this offset (exclusive).
+ * @exception MalformedInputException if the output to be flushed contained
+ * a partial or invalid multibyte character sequence. Will occur if the
+ * input buffer on the last call to convert ended with the first character
+ * of a surrogate pair. flush will write what it can to the output buffer
+ * and reset the converter before throwing this exception. An additional
+ * call to flush is not required.
+ * @exception ConversionBufferFullException if output array is filled
+ * before all the output can be flushed. flush will write what it can
+ * to the output buffer and remember its state. An additional call to
+ * flush with a new output buffer will conclude the operation.
+ */
+ public abstract int flush( byte[] output, int outStart, int outEnd )
+ throws MalformedInputException, ConversionBufferFullException;
+
+ /**
+ * Writes any remaining output to the output buffer and resets the
+ * converter to its initial state. May only be called when substitution
+ * mode is turned on, and never complains about malformed input (always
+ * substitutes).
+ *
+ * @param output byte array to receive flushed output.
+ * @param outStart start writing to output array at this offset.
+ * @param outEnd stop writing to output array at this offset (exclusive).
+ * @return number of bytes writter into output.
+ * @exception ConversionBufferFullException if output array is filled
+ * before all the output can be flushed. flush will write what it can
+ * to the output buffer and remember its state. An additional call to
+ * flush with a new output buffer will conclude the operation.
+ */
+ public int flushAny( byte[] output, int outStart, int outEnd )
+ throws ConversionBufferFullException
+ {
+ if (!subMode) { /* Precondition: subMode == true */
+ throw new IllegalStateException("Substitution mode is not on");
+ }
+ try {
+ return flush(output, outStart, outEnd);
+ } catch (MalformedInputException e) {
+ /* Assume that if a malformed input exception has occurred,
+ no useful data has been placed in the output buffer.
+ i.e. there is no mixture of left over good + some bad data.
+ Usually occurs with a trailing high surrogate pair element.
+ Special cases occur in Cp970, 949c and 933 that seem
+ to be covered, but may require further investigation */
+ int subSize = subBytes.length;
+ byte[] s = subBytes;
+ int outIndex = outStart;
+ if ((outStart + subSize) > outEnd)
+ throw new ConversionBufferFullException();
+ for (int i = 0; i < subSize; i++)
+ output[outIndex++] = s[i];
+ byteOff = charOff = 0; // Reset the internal state.
+ badInputLength = 0;
+ return subSize;
+ }
+ }
+
+ /**
+ * Resets converter to its initial state.
+ */
+ public abstract void reset();
+
+ /**
+ * Returns true if the given character can be converted to the
+ * target character encoding.
+ * @return true if given character is translatable, false otherwise.
+ * @param c character to test
+ */
+ public boolean canConvert(char c) {
+ try {
+ //FIXME output buffer size should use getMaxBytesPerChar value.
+ char[] input = new char[1];
+ byte[] output = new byte[3];
+ input[0] = c;
+ convert(input, 0, 1, output, 0, 3);
+ return true;
+ } catch(CharConversionException e){
+ return false;
+ }
+ }
+
+ /**
+ * Returns the maximum number of bytes needed to convert a char. Useful
+ * for calculating the maximum output buffer size needed for a particular
+ * input buffer.
+ */
+ public abstract int getMaxBytesPerChar();
+
+ /**
+ * Returns the length, in chars, of the input which caused a
+ * MalformedInputException. Always refers to the last
+ * MalformedInputException thrown by the converter. If none have
+ * ever been thrown, returns 0.
+ */
+ public int getBadInputLength() {
+ return badInputLength;
+ }
+
+ /**
+ * Returns the index of the character just past
+ * the last character successfully converted by the previous call
+ * to convert.
+ */
+ public int nextCharIndex() {
+ return charOff;
+ }
+
+ /**
+ * Returns the index of the byte just past the last byte written by
+ * the previous call to convert.
+ */
+ public int nextByteIndex() {
+ return byteOff;
+ }
+
+ /**
+ * Sets converter into substitution mode. In substitution mode,
+ * the converter will replace untranslatable characters in the source
+ * encoding with the substitution character set by setSubstitutionBytes.
+ * When not in substitution mode, the converter will throw an
+ * UnknownCharacterException when it encounters untranslatable input.
+ *
+ * @param doSub if true, enable substitution mode.
+ * @see #setSubstitutionBytes
+ */
+ public void setSubstitutionMode(boolean doSub) {
+ subMode = doSub;
+ }
+
+ /**
+ * Sets the substitution bytes to use when the converter is in
+ * substitution mode. The given bytes should represent a valid
+ * character in the target character encoding and must not be
+ * longer than the value returned by getMaxBytesPerChar for this
+ * converter.
+ *
+ * @param newSubBytes the substitution bytes
+ * @exception IllegalArgumentException if given byte array is longer than
+ * the value returned by the method getMaxBytesPerChar.
+ * @see #setSubstitutionMode
+ * @see #getMaxBytesPerChar
+ */
+ public void setSubstitutionBytes( byte[] newSubBytes )
+ throws IllegalArgumentException
+ {
+ if( newSubBytes.length > getMaxBytesPerChar() ) {
+ throw new IllegalArgumentException();
+ }
+
+ subBytes = new byte[ newSubBytes.length ];
+ System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length );
+ }
+
+ /**
+ * Returns a string representation of the class.
+ */
+ public String toString() {
+ return "CharToByteConverter: " + getCharacterEncoding();
+ }
+}