jdk/src/share/classes/sun/io/CharToByteConverter.java
changeset 2 90ce3da70b43
child 5506 202f599c92aa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/sun/io/CharToByteConverter.java	Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,419 @@
+/*
+ * Copyright 1996-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package sun.io;
+
+import java.io.*;
+
+
+/**
+ * An abstract base class for subclasses which convert Unicode
+ * characters into an external encoding.
+ *
+ * @author Asmus Freytag
+ * @author Lloyd Honomichl, Novell, Inc.
+ *
+ * @deprecated Replaced by {@link java.nio.charset}.  THIS API WILL BE
+ * REMOVED IN J2SE 1.6.
+ */
+@Deprecated
+public abstract class CharToByteConverter {
+
+    /**
+     * Substitution mode flag.
+     */
+    protected boolean subMode = true;
+
+    /**
+     * Bytes to substitute for unmappable input.
+     */
+    protected byte[] subBytes = { (byte)'?' };
+
+    /**
+     * Offset of next character to be converted.
+     */
+    protected int charOff;
+
+    /**
+     * Offset of next byte to be output.
+     */
+    protected int byteOff;
+
+    /**
+     * Length of bad input that caused conversion to stop.
+     */
+    protected int badInputLength;
+
+    /**
+     * Create an instance of the default CharToByteConverter subclass.
+     */
+    public static CharToByteConverter getDefault() {
+        Object cvt;
+        cvt = Converters.newDefaultConverter(Converters.CHAR_TO_BYTE);
+        return (CharToByteConverter)cvt;
+    }
+
+    /**
+     * Returns appropriate CharToByteConverter subclass instance.
+     * @param string represets encoding
+     */
+    public static CharToByteConverter getConverter(String encoding)
+        throws UnsupportedEncodingException
+    {
+        Object cvt;
+        cvt = Converters.newConverter(Converters.CHAR_TO_BYTE, encoding);
+        return (CharToByteConverter)cvt;
+    }
+
+    /**
+     * Returns the character set id for the conversion.
+     */
+    public abstract String getCharacterEncoding();
+
+    /**
+     * Converts an array of Unicode characters into an array of bytes
+     * in the target character encoding.  This method allows a buffer by
+     * buffer conversion of a data stream.  The state of the conversion is
+     * saved between calls to convert.  If a call to convert results in
+     * an exception, the conversion may be continued by calling convert again
+     * with suitably modified parameters.  All conversions should be finished
+     * with a call to the flush method.
+     *
+     * @return the number of bytes written to output.
+     * @param input array containing Unicode characters to be converted.
+     * @param inStart begin conversion at this offset in input array.
+     * @param inEnd stop conversion at this offset in input array (exclusive).
+     * @param output byte array to receive conversion result.
+     * @param outStart start writing to output array at this offset.
+     * @param outEnd stop writing to output array at this offset (exclusive).
+     * @exception MalformedInputException if the input buffer contains any
+     * sequence of chars that is illegal in Unicode (principally unpaired
+     * surrogates and \uFFFF or \uFFFE). After this exception is thrown,
+     * the method nextCharIndex can be called to obtain the index of the
+     * first invalid input character.  The MalformedInputException can
+     * be queried for the length of the invalid input.
+     * @exception UnknownCharacterException for any character that
+     * that cannot be converted to the external character encoding. Thrown
+     * only when converter is not in substitution mode.
+     * @exception ConversionBufferFullException if output array is filled prior
+     * to converting all the input.
+     */
+    public abstract int convert(char[] input, int inStart, int inEnd,
+                                byte[] output, int outStart, int outEnd)
+        throws MalformedInputException,
+               UnknownCharacterException,
+               ConversionBufferFullException;
+
+    /*
+     * Converts any array of characters, including malformed surrogate
+     * pairs, into an array of bytes in the target character encoding.
+     * A precondition is that substitution mode is turned on. This method
+     * allows a buffer by buffer conversion of a data stream.
+     * The state of the conversion is saved between calls to convert.
+     * All conversions should be finished with a call to the flushAny method.
+     *
+     * @return the number of bytes written to output.
+     * @param input array containing Unicode characters to be converted.
+     * @param inStart begin conversion at this offset in input array.
+     * @param inEnd stop conversion at this offset in input array (exclusive).
+     * @param output byte array to receive conversion result.
+     * @param outStart start writing to output array at this offset.
+     * @param outEnd stop writing to output array at this offset (exclusive).
+     * @exception ConversionBufferFullException if output array is filled prior
+     * to converting all the input.
+     */
+    public int convertAny(char[] input, int inStart, int inEnd,
+                          byte[] output, int outStart, int outEnd)
+        throws ConversionBufferFullException
+    {
+        if (!subMode) {             /* Precondition: subMode == true */
+            throw new IllegalStateException("Substitution mode is not on");
+        }
+        /* Rely on the untested precondition that the indices are meaningful */
+        /* For safety, use the public interface to charOff and byteOff, but
+           badInputLength is directly modified.*/
+        int localInOff = inStart;
+        int localOutOff = outStart;
+        while(localInOff < inEnd) {
+            try {
+                int discard = convert(input, localInOff, inEnd,
+                                      output, localOutOff, outEnd);
+                return (nextByteIndex() - outStart);
+            } catch (MalformedInputException e) {
+                byte[] s = subBytes;
+                int subSize = s.length;
+                localOutOff = nextByteIndex();
+                if ((localOutOff + subSize) > outEnd)
+                    throw new ConversionBufferFullException();
+                for (int i = 0; i < subSize; i++)
+                    output[localOutOff++] = s[i];
+                localInOff = nextCharIndex();
+                localInOff += badInputLength;
+                badInputLength = 0;
+                if (localInOff >= inEnd){
+                    byteOff = localOutOff;
+                    return (byteOff - outStart);
+                }
+                continue;
+            }catch (UnknownCharacterException e) {
+                /* Should never occur, since subMode == true */
+                throw new Error("UnknownCharacterException thrown "
+                                + "in substititution mode",
+                                e);
+            }
+        }
+        return (nextByteIndex() - outStart);
+    }
+
+
+
+    /**
+     * Converts an array of Unicode characters into an array of bytes
+     * in the target character encoding.  Unlike convert, this method
+     * does not do incremental conversion.  It assumes that the given
+     * input array contains all the characters to be converted. The
+     * state of the converter is reset at the beginning of this method
+     * and is left in the reset state on successful termination.
+     * The converter is not reset if an exception is thrown.
+     * This allows the caller to determine where the bad input
+     * was encountered by calling nextCharIndex.
+     * <p>
+     * This method uses substitution mode when performing the conversion.
+     * The method setSubstitutionBytes may be used to determine what
+     * bytes are substituted.  Even though substitution mode is used,
+     * the state of the converter's substitution mode is not changed
+     * at the end of this method.
+     *
+     * @return an array of bytes containing the converted characters.
+     * @param input array containing Unicode characters to be converted.
+     * @exception MalformedInputException if the input buffer contains any
+     * sequence of chars that is illegal in Unicode (principally unpaired
+     * surrogates and \uFFFF or \uFFFE). After this exception is thrown,
+     * the method nextCharIndex can be called to obtain the index of the
+     * first invalid input character and getBadInputLength can be called
+     * to determine the length of the invalid input.
+     *
+     * @see   #nextCharIndex
+     * @see   #setSubstitutionMode
+     * @see   #setSubstitutionBytes
+     * @see   #getBadInputLength
+     */
+    public byte[] convertAll( char input[] ) throws MalformedInputException {
+        reset();
+        boolean savedSubMode = subMode;
+        subMode = true;
+
+        byte[] output = new byte[ getMaxBytesPerChar() * input.length ];
+
+        try {
+            int outputLength = convert( input, 0, input.length,
+                                        output, 0, output.length );
+            outputLength += flush( output, nextByteIndex(), output.length );
+
+            byte [] returnedOutput = new byte[ outputLength ];
+            System.arraycopy( output, 0, returnedOutput, 0, outputLength );
+            return returnedOutput;
+        }
+        catch( ConversionBufferFullException e ) {
+            //Not supposed to happen.  If it does, getMaxBytesPerChar() lied.
+            throw new
+                InternalError("this.getMaxBytesPerChar returned bad value");
+        }
+        catch( UnknownCharacterException e ) {
+            // Not supposed to happen since we're in substitution mode.
+            throw new InternalError();
+        }
+        finally {
+            subMode = savedSubMode;
+        }
+    }
+
+    /**
+     * Writes any remaining output to the output buffer and resets the
+     * converter to its initial state.
+     *
+     * @param output byte array to receive flushed output.
+     * @param outStart start writing to output array at this offset.
+     * @param outEnd stop writing to output array at this offset (exclusive).
+     * @exception MalformedInputException if the output to be flushed contained
+     * a partial or invalid multibyte character sequence.  Will occur if the
+     * input buffer on the last call to convert ended with the first character
+     * of a surrogate pair. flush will write what it can to the output buffer
+     * and reset the converter before throwing this exception.  An additional
+     * call to flush is not required.
+     * @exception ConversionBufferFullException if output array is filled
+     * before all the output can be flushed. flush will write what it can
+     * to the output buffer and remember its state.  An additional call to
+     * flush with a new output buffer will conclude the operation.
+     */
+    public abstract int flush( byte[] output, int outStart, int outEnd )
+        throws MalformedInputException, ConversionBufferFullException;
+
+    /**
+     * Writes any remaining output to the output buffer and resets the
+     * converter to its initial state. May only be called when substitution
+     * mode is turned on, and never complains about malformed input (always
+     * substitutes).
+     *
+     * @param output byte array to receive flushed output.
+     * @param outStart start writing to output array at this offset.
+     * @param outEnd stop writing to output array at this offset (exclusive).
+     * @return number of bytes writter into output.
+     * @exception ConversionBufferFullException if output array is filled
+     * before all the output can be flushed. flush will write what it can
+     * to the output buffer and remember its state.  An additional call to
+     * flush with a new output buffer will conclude the operation.
+     */
+    public int flushAny( byte[] output, int outStart, int outEnd )
+        throws ConversionBufferFullException
+    {
+        if (!subMode) {             /* Precondition: subMode == true */
+            throw new IllegalStateException("Substitution mode is not on");
+        }
+        try {
+            return flush(output, outStart, outEnd);
+        } catch (MalformedInputException e) {
+            /* Assume that if a malformed input exception has occurred,
+               no useful data has been placed in the output buffer.
+               i.e. there is no mixture of left over good + some bad data.
+               Usually occurs with a trailing high surrogate pair element.
+               Special cases occur in Cp970, 949c and 933 that seem
+               to be covered, but may require further investigation */
+            int subSize = subBytes.length;
+            byte[] s = subBytes;
+            int outIndex = outStart;
+            if ((outStart + subSize) > outEnd)
+                throw new ConversionBufferFullException();
+            for (int i = 0; i < subSize; i++)
+                output[outIndex++] = s[i];
+            byteOff = charOff = 0; // Reset the internal state.
+            badInputLength = 0;
+            return subSize;
+        }
+    }
+
+    /**
+     * Resets converter to its initial state.
+     */
+    public abstract void reset();
+
+    /**
+     * Returns true if the given character can be converted to the
+     * target character encoding.
+     * @return true if given character is translatable, false otherwise.
+     * @param c character to test
+     */
+    public boolean canConvert(char c) {
+        try {
+            //FIXME output buffer size should use getMaxBytesPerChar value.
+            char[] input = new char[1];
+            byte[] output = new byte[3];
+            input[0] = c;
+            convert(input, 0, 1, output, 0, 3);
+            return true;
+        } catch(CharConversionException e){
+            return false;
+        }
+    }
+
+    /**
+     * Returns the maximum number of bytes needed to convert a char. Useful
+     * for calculating the maximum output buffer size needed for a particular
+     * input buffer.
+     */
+    public abstract int getMaxBytesPerChar();
+
+    /**
+     * Returns the length, in chars, of the input which caused a
+     * MalformedInputException.  Always refers to the last
+     * MalformedInputException thrown by the converter.  If none have
+     * ever been thrown, returns 0.
+     */
+    public int getBadInputLength() {
+        return badInputLength;
+    }
+
+    /**
+     * Returns the index of the character just past
+     * the last character successfully converted by the previous call
+     * to convert.
+     */
+    public int nextCharIndex() {
+        return charOff;
+    }
+
+    /**
+     * Returns the index of the byte just past the last byte written by
+     * the previous call to convert.
+     */
+    public int nextByteIndex() {
+        return byteOff;
+    }
+
+    /**
+     * Sets converter into substitution mode.  In substitution mode,
+     * the converter will replace untranslatable characters in the source
+     * encoding with the substitution character set by setSubstitutionBytes.
+     * When not in substitution mode, the converter will throw an
+     * UnknownCharacterException when it encounters untranslatable input.
+     *
+     * @param doSub if true, enable substitution mode.
+     * @see #setSubstitutionBytes
+     */
+    public void setSubstitutionMode(boolean doSub) {
+        subMode = doSub;
+    }
+
+    /**
+     * Sets the substitution bytes to use when the converter is in
+     * substitution mode.  The given bytes should represent a valid
+     * character in the target character encoding and must not be
+     * longer than the value returned by getMaxBytesPerChar for this
+     * converter.
+     *
+     * @param newSubBytes the substitution bytes
+     * @exception IllegalArgumentException if given byte array is longer than
+     *    the value returned by the method getMaxBytesPerChar.
+     * @see #setSubstitutionMode
+     * @see #getMaxBytesPerChar
+     */
+    public void setSubstitutionBytes( byte[] newSubBytes )
+        throws IllegalArgumentException
+    {
+        if( newSubBytes.length > getMaxBytesPerChar() ) {
+            throw new IllegalArgumentException();
+        }
+
+        subBytes = new byte[ newSubBytes.length ];
+        System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length );
+    }
+
+    /**
+     * Returns a string representation of the class.
+     */
+    public String toString() {
+        return "CharToByteConverter: " + getCharacterEncoding();
+    }
+}