jdk/src/share/classes/sun/io/CharToByteConverter.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1996, 2004, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.io;
       
    27 
       
    28 import java.io.*;
       
    29 
       
    30 
       
    31 /**
       
    32  * An abstract base class for subclasses which convert Unicode
       
    33  * characters into an external encoding.
       
    34  *
       
    35  * @author Asmus Freytag
       
    36  * @author Lloyd Honomichl, Novell, Inc.
       
    37  *
       
    38  * @deprecated Replaced by {@link java.nio.charset}.  THIS API WILL BE
       
    39  * REMOVED IN J2SE 1.6.
       
    40  */
       
    41 @Deprecated
       
    42 public abstract class CharToByteConverter {
       
    43 
       
    44     /**
       
    45      * Substitution mode flag.
       
    46      */
       
    47     protected boolean subMode = true;
       
    48 
       
    49     /**
       
    50      * Bytes to substitute for unmappable input.
       
    51      */
       
    52     protected byte[] subBytes = { (byte)'?' };
       
    53 
       
    54     /**
       
    55      * Offset of next character to be converted.
       
    56      */
       
    57     protected int charOff;
       
    58 
       
    59     /**
       
    60      * Offset of next byte to be output.
       
    61      */
       
    62     protected int byteOff;
       
    63 
       
    64     /**
       
    65      * Length of bad input that caused conversion to stop.
       
    66      */
       
    67     protected int badInputLength;
       
    68 
       
    69     /**
       
    70      * Create an instance of the default CharToByteConverter subclass.
       
    71      */
       
    72     public static CharToByteConverter getDefault() {
       
    73         Object cvt;
       
    74         cvt = Converters.newDefaultConverter(Converters.CHAR_TO_BYTE);
       
    75         return (CharToByteConverter)cvt;
       
    76     }
       
    77 
       
    78     /**
       
    79      * Returns appropriate CharToByteConverter subclass instance.
       
    80      * @param string represets encoding
       
    81      */
       
    82     public static CharToByteConverter getConverter(String encoding)
       
    83         throws UnsupportedEncodingException
       
    84     {
       
    85         Object cvt;
       
    86         cvt = Converters.newConverter(Converters.CHAR_TO_BYTE, encoding);
       
    87         return (CharToByteConverter)cvt;
       
    88     }
       
    89 
       
    90     /**
       
    91      * Returns the character set id for the conversion.
       
    92      */
       
    93     public abstract String getCharacterEncoding();
       
    94 
       
    95     /**
       
    96      * Converts an array of Unicode characters into an array of bytes
       
    97      * in the target character encoding.  This method allows a buffer by
       
    98      * buffer conversion of a data stream.  The state of the conversion is
       
    99      * saved between calls to convert.  If a call to convert results in
       
   100      * an exception, the conversion may be continued by calling convert again
       
   101      * with suitably modified parameters.  All conversions should be finished
       
   102      * with a call to the flush method.
       
   103      *
       
   104      * @return the number of bytes written to output.
       
   105      * @param input array containing Unicode characters to be converted.
       
   106      * @param inStart begin conversion at this offset in input array.
       
   107      * @param inEnd stop conversion at this offset in input array (exclusive).
       
   108      * @param output byte array to receive conversion result.
       
   109      * @param outStart start writing to output array at this offset.
       
   110      * @param outEnd stop writing to output array at this offset (exclusive).
       
   111      * @exception MalformedInputException if the input buffer contains any
       
   112      * sequence of chars that is illegal in Unicode (principally unpaired
       
   113      * surrogates and \uFFFF or \uFFFE). After this exception is thrown,
       
   114      * the method nextCharIndex can be called to obtain the index of the
       
   115      * first invalid input character.  The MalformedInputException can
       
   116      * be queried for the length of the invalid input.
       
   117      * @exception UnknownCharacterException for any character that
       
   118      * that cannot be converted to the external character encoding. Thrown
       
   119      * only when converter is not in substitution mode.
       
   120      * @exception ConversionBufferFullException if output array is filled prior
       
   121      * to converting all the input.
       
   122      */
       
   123     public abstract int convert(char[] input, int inStart, int inEnd,
       
   124                                 byte[] output, int outStart, int outEnd)
       
   125         throws MalformedInputException,
       
   126                UnknownCharacterException,
       
   127                ConversionBufferFullException;
       
   128 
       
   129     /*
       
   130      * Converts any array of characters, including malformed surrogate
       
   131      * pairs, into an array of bytes in the target character encoding.
       
   132      * A precondition is that substitution mode is turned on. This method
       
   133      * allows a buffer by buffer conversion of a data stream.
       
   134      * The state of the conversion is saved between calls to convert.
       
   135      * All conversions should be finished with a call to the flushAny method.
       
   136      *
       
   137      * @return the number of bytes written to output.
       
   138      * @param input array containing Unicode characters to be converted.
       
   139      * @param inStart begin conversion at this offset in input array.
       
   140      * @param inEnd stop conversion at this offset in input array (exclusive).
       
   141      * @param output byte array to receive conversion result.
       
   142      * @param outStart start writing to output array at this offset.
       
   143      * @param outEnd stop writing to output array at this offset (exclusive).
       
   144      * @exception ConversionBufferFullException if output array is filled prior
       
   145      * to converting all the input.
       
   146      */
       
   147     public int convertAny(char[] input, int inStart, int inEnd,
       
   148                           byte[] output, int outStart, int outEnd)
       
   149         throws ConversionBufferFullException
       
   150     {
       
   151         if (!subMode) {             /* Precondition: subMode == true */
       
   152             throw new IllegalStateException("Substitution mode is not on");
       
   153         }
       
   154         /* Rely on the untested precondition that the indices are meaningful */
       
   155         /* For safety, use the public interface to charOff and byteOff, but
       
   156            badInputLength is directly modified.*/
       
   157         int localInOff = inStart;
       
   158         int localOutOff = outStart;
       
   159         while(localInOff < inEnd) {
       
   160             try {
       
   161                 int discard = convert(input, localInOff, inEnd,
       
   162                                       output, localOutOff, outEnd);
       
   163                 return (nextByteIndex() - outStart);
       
   164             } catch (MalformedInputException e) {
       
   165                 byte[] s = subBytes;
       
   166                 int subSize = s.length;
       
   167                 localOutOff = nextByteIndex();
       
   168                 if ((localOutOff + subSize) > outEnd)
       
   169                     throw new ConversionBufferFullException();
       
   170                 for (int i = 0; i < subSize; i++)
       
   171                     output[localOutOff++] = s[i];
       
   172                 localInOff = nextCharIndex();
       
   173                 localInOff += badInputLength;
       
   174                 badInputLength = 0;
       
   175                 if (localInOff >= inEnd){
       
   176                     byteOff = localOutOff;
       
   177                     return (byteOff - outStart);
       
   178                 }
       
   179                 continue;
       
   180             }catch (UnknownCharacterException e) {
       
   181                 /* Should never occur, since subMode == true */
       
   182                 throw new Error("UnknownCharacterException thrown "
       
   183                                 + "in substititution mode",
       
   184                                 e);
       
   185             }
       
   186         }
       
   187         return (nextByteIndex() - outStart);
       
   188     }
       
   189 
       
   190 
       
   191 
       
   192     /**
       
   193      * Converts an array of Unicode characters into an array of bytes
       
   194      * in the target character encoding.  Unlike convert, this method
       
   195      * does not do incremental conversion.  It assumes that the given
       
   196      * input array contains all the characters to be converted. The
       
   197      * state of the converter is reset at the beginning of this method
       
   198      * and is left in the reset state on successful termination.
       
   199      * The converter is not reset if an exception is thrown.
       
   200      * This allows the caller to determine where the bad input
       
   201      * was encountered by calling nextCharIndex.
       
   202      * <p>
       
   203      * This method uses substitution mode when performing the conversion.
       
   204      * The method setSubstitutionBytes may be used to determine what
       
   205      * bytes are substituted.  Even though substitution mode is used,
       
   206      * the state of the converter's substitution mode is not changed
       
   207      * at the end of this method.
       
   208      *
       
   209      * @return an array of bytes containing the converted characters.
       
   210      * @param input array containing Unicode characters to be converted.
       
   211      * @exception MalformedInputException if the input buffer contains any
       
   212      * sequence of chars that is illegal in Unicode (principally unpaired
       
   213      * surrogates and \uFFFF or \uFFFE). After this exception is thrown,
       
   214      * the method nextCharIndex can be called to obtain the index of the
       
   215      * first invalid input character and getBadInputLength can be called
       
   216      * to determine the length of the invalid input.
       
   217      *
       
   218      * @see   #nextCharIndex
       
   219      * @see   #setSubstitutionMode
       
   220      * @see   #setSubstitutionBytes
       
   221      * @see   #getBadInputLength
       
   222      */
       
   223     public byte[] convertAll( char input[] ) throws MalformedInputException {
       
   224         reset();
       
   225         boolean savedSubMode = subMode;
       
   226         subMode = true;
       
   227 
       
   228         byte[] output = new byte[ getMaxBytesPerChar() * input.length ];
       
   229 
       
   230         try {
       
   231             int outputLength = convert( input, 0, input.length,
       
   232                                         output, 0, output.length );
       
   233             outputLength += flush( output, nextByteIndex(), output.length );
       
   234 
       
   235             byte [] returnedOutput = new byte[ outputLength ];
       
   236             System.arraycopy( output, 0, returnedOutput, 0, outputLength );
       
   237             return returnedOutput;
       
   238         }
       
   239         catch( ConversionBufferFullException e ) {
       
   240             //Not supposed to happen.  If it does, getMaxBytesPerChar() lied.
       
   241             throw new
       
   242                 InternalError("this.getMaxBytesPerChar returned bad value");
       
   243         }
       
   244         catch( UnknownCharacterException e ) {
       
   245             // Not supposed to happen since we're in substitution mode.
       
   246             throw new InternalError();
       
   247         }
       
   248         finally {
       
   249             subMode = savedSubMode;
       
   250         }
       
   251     }
       
   252 
       
   253     /**
       
   254      * Writes any remaining output to the output buffer and resets the
       
   255      * converter to its initial state.
       
   256      *
       
   257      * @param output byte array to receive flushed output.
       
   258      * @param outStart start writing to output array at this offset.
       
   259      * @param outEnd stop writing to output array at this offset (exclusive).
       
   260      * @exception MalformedInputException if the output to be flushed contained
       
   261      * a partial or invalid multibyte character sequence.  Will occur if the
       
   262      * input buffer on the last call to convert ended with the first character
       
   263      * of a surrogate pair. flush will write what it can to the output buffer
       
   264      * and reset the converter before throwing this exception.  An additional
       
   265      * call to flush is not required.
       
   266      * @exception ConversionBufferFullException if output array is filled
       
   267      * before all the output can be flushed. flush will write what it can
       
   268      * to the output buffer and remember its state.  An additional call to
       
   269      * flush with a new output buffer will conclude the operation.
       
   270      */
       
   271     public abstract int flush( byte[] output, int outStart, int outEnd )
       
   272         throws MalformedInputException, ConversionBufferFullException;
       
   273 
       
   274     /**
       
   275      * Writes any remaining output to the output buffer and resets the
       
   276      * converter to its initial state. May only be called when substitution
       
   277      * mode is turned on, and never complains about malformed input (always
       
   278      * substitutes).
       
   279      *
       
   280      * @param output byte array to receive flushed output.
       
   281      * @param outStart start writing to output array at this offset.
       
   282      * @param outEnd stop writing to output array at this offset (exclusive).
       
   283      * @return number of bytes writter into output.
       
   284      * @exception ConversionBufferFullException if output array is filled
       
   285      * before all the output can be flushed. flush will write what it can
       
   286      * to the output buffer and remember its state.  An additional call to
       
   287      * flush with a new output buffer will conclude the operation.
       
   288      */
       
   289     public int flushAny( byte[] output, int outStart, int outEnd )
       
   290         throws ConversionBufferFullException
       
   291     {
       
   292         if (!subMode) {             /* Precondition: subMode == true */
       
   293             throw new IllegalStateException("Substitution mode is not on");
       
   294         }
       
   295         try {
       
   296             return flush(output, outStart, outEnd);
       
   297         } catch (MalformedInputException e) {
       
   298             /* Assume that if a malformed input exception has occurred,
       
   299                no useful data has been placed in the output buffer.
       
   300                i.e. there is no mixture of left over good + some bad data.
       
   301                Usually occurs with a trailing high surrogate pair element.
       
   302                Special cases occur in Cp970, 949c and 933 that seem
       
   303                to be covered, but may require further investigation */
       
   304             int subSize = subBytes.length;
       
   305             byte[] s = subBytes;
       
   306             int outIndex = outStart;
       
   307             if ((outStart + subSize) > outEnd)
       
   308                 throw new ConversionBufferFullException();
       
   309             for (int i = 0; i < subSize; i++)
       
   310                 output[outIndex++] = s[i];
       
   311             byteOff = charOff = 0; // Reset the internal state.
       
   312             badInputLength = 0;
       
   313             return subSize;
       
   314         }
       
   315     }
       
   316 
       
   317     /**
       
   318      * Resets converter to its initial state.
       
   319      */
       
   320     public abstract void reset();
       
   321 
       
   322     /**
       
   323      * Returns true if the given character can be converted to the
       
   324      * target character encoding.
       
   325      * @return true if given character is translatable, false otherwise.
       
   326      * @param c character to test
       
   327      */
       
   328     public boolean canConvert(char c) {
       
   329         try {
       
   330             //FIXME output buffer size should use getMaxBytesPerChar value.
       
   331             char[] input = new char[1];
       
   332             byte[] output = new byte[3];
       
   333             input[0] = c;
       
   334             convert(input, 0, 1, output, 0, 3);
       
   335             return true;
       
   336         } catch(CharConversionException e){
       
   337             return false;
       
   338         }
       
   339     }
       
   340 
       
   341     /**
       
   342      * Returns the maximum number of bytes needed to convert a char. Useful
       
   343      * for calculating the maximum output buffer size needed for a particular
       
   344      * input buffer.
       
   345      */
       
   346     public abstract int getMaxBytesPerChar();
       
   347 
       
   348     /**
       
   349      * Returns the length, in chars, of the input which caused a
       
   350      * MalformedInputException.  Always refers to the last
       
   351      * MalformedInputException thrown by the converter.  If none have
       
   352      * ever been thrown, returns 0.
       
   353      */
       
   354     public int getBadInputLength() {
       
   355         return badInputLength;
       
   356     }
       
   357 
       
   358     /**
       
   359      * Returns the index of the character just past
       
   360      * the last character successfully converted by the previous call
       
   361      * to convert.
       
   362      */
       
   363     public int nextCharIndex() {
       
   364         return charOff;
       
   365     }
       
   366 
       
   367     /**
       
   368      * Returns the index of the byte just past the last byte written by
       
   369      * the previous call to convert.
       
   370      */
       
   371     public int nextByteIndex() {
       
   372         return byteOff;
       
   373     }
       
   374 
       
   375     /**
       
   376      * Sets converter into substitution mode.  In substitution mode,
       
   377      * the converter will replace untranslatable characters in the source
       
   378      * encoding with the substitution character set by setSubstitutionBytes.
       
   379      * When not in substitution mode, the converter will throw an
       
   380      * UnknownCharacterException when it encounters untranslatable input.
       
   381      *
       
   382      * @param doSub if true, enable substitution mode.
       
   383      * @see #setSubstitutionBytes
       
   384      */
       
   385     public void setSubstitutionMode(boolean doSub) {
       
   386         subMode = doSub;
       
   387     }
       
   388 
       
   389     /**
       
   390      * Sets the substitution bytes to use when the converter is in
       
   391      * substitution mode.  The given bytes should represent a valid
       
   392      * character in the target character encoding and must not be
       
   393      * longer than the value returned by getMaxBytesPerChar for this
       
   394      * converter.
       
   395      *
       
   396      * @param newSubBytes the substitution bytes
       
   397      * @exception IllegalArgumentException if given byte array is longer than
       
   398      *    the value returned by the method getMaxBytesPerChar.
       
   399      * @see #setSubstitutionMode
       
   400      * @see #getMaxBytesPerChar
       
   401      */
       
   402     public void setSubstitutionBytes( byte[] newSubBytes )
       
   403         throws IllegalArgumentException
       
   404     {
       
   405         if( newSubBytes.length > getMaxBytesPerChar() ) {
       
   406             throw new IllegalArgumentException();
       
   407         }
       
   408 
       
   409         subBytes = new byte[ newSubBytes.length ];
       
   410         System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length );
       
   411     }
       
   412 
       
   413     /**
       
   414      * Returns a string representation of the class.
       
   415      */
       
   416     public String toString() {
       
   417         return "CharToByteConverter: " + getCharacterEncoding();
       
   418     }
       
   419 }