jdk/src/share/classes/sun/io/ByteToCharConverter.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1996, 2004, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.io;
       
    27 
       
    28 import java.io.*;
       
    29 
       
    30 
       
    31 /**
       
    32  * An abstract base class for subclasses which convert character data
       
    33  * in an external encoding into Unicode characters.
       
    34  *
       
    35  * @author Asmus Freytag
       
    36  * @author Lloyd Honomichl
       
    37  *
       
    38  * @deprecated Replaced by {@link java.nio.charset}.  THIS API WILL BE
       
    39  * REMOVED IN J2SE 1.6.
       
    40  */
       
    41 @Deprecated
       
    42 public abstract class ByteToCharConverter {
       
    43 
       
    44     /*
       
    45      * Substitution mode flag.
       
    46      */
       
    47     protected boolean subMode = true;
       
    48 
       
    49     /*
       
    50      * Characters to use for automatic substitution.
       
    51      */
       
    52     protected char[] subChars = { '\uFFFD' };
       
    53 
       
    54     /*
       
    55      * Offset of next character to be output
       
    56      */
       
    57     protected int charOff;
       
    58 
       
    59     /*
       
    60      * Offset of next byte to be converted
       
    61      */
       
    62     protected int byteOff;
       
    63 
       
    64     /*
       
    65      * Length of bad input that caused a MalformedInputException.
       
    66      */
       
    67     protected int badInputLength;
       
    68 
       
    69     /**
       
    70      * Create an instance of the default ByteToCharConverter subclass.
       
    71      */
       
    72     public static ByteToCharConverter getDefault() {
       
    73         Object cvt;
       
    74         cvt = Converters.newDefaultConverter(Converters.BYTE_TO_CHAR);
       
    75         return (ByteToCharConverter)cvt;
       
    76     }
       
    77 
       
    78     /**
       
    79      * Returns appropriate ByteToCharConverter subclass instance.
       
    80      * @param string represents encoding
       
    81      */
       
    82     public static ByteToCharConverter getConverter(String encoding)
       
    83         throws UnsupportedEncodingException
       
    84     {
       
    85         Object cvt;
       
    86         cvt = Converters.newConverter(Converters.BYTE_TO_CHAR, encoding);
       
    87         return (ByteToCharConverter)cvt;
       
    88     }
       
    89 
       
    90     /**
       
    91      * Returns the character set id for the conversion
       
    92      */
       
    93     public abstract String getCharacterEncoding();
       
    94 
       
    95     /**
       
    96      * Converts an array of bytes containing characters in an external
       
    97      * encoding into an array of Unicode characters.  This  method allows
       
    98      * a buffer by buffer conversion of a data stream.  The state of the
       
    99      * conversion is saved between calls to convert.  Among other things,
       
   100      * this means multibyte input sequences can be split between calls.
       
   101      * If a call to convert results in an exception, the conversion may be
       
   102      * continued by calling convert again with suitably modified parameters.
       
   103      * All conversions should be finished with a call to the flush method.
       
   104      *
       
   105      * @return the number of bytes written to output.
       
   106      * @param input byte array containing text to be converted.
       
   107      * @param inStart begin conversion at this offset in input array.
       
   108      * @param inEnd stop conversion at this offset in input array (exclusive).
       
   109      * @param output character array to receive conversion result.
       
   110      * @param outStart start writing to output array at this offset.
       
   111      * @param outEnd stop writing to output array at this offset (exclusive).
       
   112      * @exception MalformedInputException if the input buffer contains any
       
   113      * sequence of bytes that is illegal for the input character set.
       
   114      * @exception UnknownCharacterException for any character that
       
   115      * that cannot be converted to Unicode. Thrown only when converter
       
   116      * is not in substitution mode.
       
   117      * @exception ConversionBufferFullException if output array is filled prior
       
   118      * to converting all the input.
       
   119      */
       
   120     public abstract int convert(byte[] input, int inStart, int inEnd,
       
   121                                 char[] output, int outStart, int outEnd)
       
   122             throws MalformedInputException,
       
   123                    UnknownCharacterException,
       
   124                    ConversionBufferFullException;
       
   125 
       
   126     /**
       
   127      * Converts an array of bytes containing characters in an external
       
   128      * encoding into an array of Unicode characters.  Unlike convert,
       
   129      * this method does not do incremental conversion.  It assumes that
       
   130      * the given input array contains all the characters to be
       
   131      * converted. The state of the converter is reset at the beginning
       
   132      * of this method and is left in the reset state on successful
       
   133      * termination.  The converter is not reset if an exception is
       
   134      * thrown.  This allows the caller to determine where the bad input
       
   135      * was encountered by calling nextByteIndex.
       
   136      * <p>
       
   137      * This method uses substitution mode when performing the
       
   138      * conversion.  The method setSubstitutionChars may be used to
       
   139      * determine what characters are substituted.  Even though substitution
       
   140      * mode is used, the state of the converter's substitution mode is
       
   141      * not changed at the end of this method.
       
   142      *
       
   143      * @return an array of chars containing the converted characters.
       
   144      * @param input array containing Unicode characters to be converted.
       
   145      * @exception MalformedInputException if the input buffer contains any
       
   146      * sequence of chars that is illegal in the input character encoding.
       
   147      * After this exception is thrown,
       
   148      * the method nextByteIndex can be called to obtain the index of the
       
   149      * first invalid input byte and getBadInputLength can be called
       
   150      * to determine the length of the invalid input.
       
   151      *
       
   152      * @see   #nextByteIndex
       
   153      * @see   #setSubstitutionMode
       
   154      * @see   sun.io.CharToByteConverter#setSubstitutionBytes(byte[])
       
   155      * @see   #getBadInputLength
       
   156      */
       
   157     public char[] convertAll( byte input[] ) throws MalformedInputException {
       
   158         reset();
       
   159         boolean savedSubMode = subMode;
       
   160         subMode = true;
       
   161 
       
   162         char[] output = new char[ getMaxCharsPerByte() * input.length ];
       
   163 
       
   164         try {
       
   165             int outputLength = convert( input, 0, input.length,
       
   166                                         output, 0, output.length );
       
   167             outputLength += flush( output, outputLength, output.length );
       
   168 
       
   169             char [] returnedOutput = new char[ outputLength ];
       
   170             System.arraycopy( output, 0, returnedOutput, 0, outputLength );
       
   171             return returnedOutput;
       
   172         }
       
   173         catch( ConversionBufferFullException e ) {
       
   174             //Not supposed to happen.  If it does, getMaxCharsPerByte() lied.
       
   175             throw new
       
   176                 InternalError("this.getMaxCharsBerByte returned bad value");
       
   177         }
       
   178         catch( UnknownCharacterException e ) {
       
   179             // Not supposed to happen since we're in substitution mode.
       
   180             throw new InternalError();
       
   181         }
       
   182         finally {
       
   183             subMode = savedSubMode;
       
   184         }
       
   185     }
       
   186 
       
   187     /**
       
   188      * Writes any remaining output to the output buffer and resets the
       
   189      * converter to its initial state.
       
   190      *
       
   191      * @param output char array to receive flushed output.
       
   192      * @param outStart start writing to output array at this offset.
       
   193      * @param outEnd stop writing to output array at this offset (exclusive).
       
   194      * @exception MalformedInputException if the output to be flushed contained
       
   195      * a partial or invalid multibyte character sequence.  flush will
       
   196      * write what it can to the output buffer and reset the converter before
       
   197      * throwing this exception.  An additional call to flush is not required.
       
   198      * @exception ConversionBufferFullException if output array is filled
       
   199      * before all the output can be flushed. flush will write what it can
       
   200      * to the output buffer and remember its state.  An additional call to
       
   201      * flush with a new output buffer will conclude the operation.
       
   202      */
       
   203     public abstract int flush( char[] output, int outStart, int outEnd )
       
   204         throws MalformedInputException, ConversionBufferFullException;
       
   205 
       
   206     /**
       
   207      * Resets converter to its initial state.
       
   208      */
       
   209     public abstract void reset();
       
   210 
       
   211     /**
       
   212      * Returns the maximum number of characters needed to convert a byte. Useful
       
   213      * for calculating the maximum output buffer size needed for a particular
       
   214      * input buffer.
       
   215      */
       
   216     public int getMaxCharsPerByte() {
       
   217         // Until UTF-16, this will do for every encoding
       
   218         return 1;
       
   219     }
       
   220 
       
   221     /**
       
   222      * Returns the length, in bytes, of the input which caused a
       
   223      * MalformedInputException.  Always refers to the last
       
   224      * MalformedInputException thrown by the converter.  If none have
       
   225      * ever been thrown, returns 0.
       
   226      */
       
   227     public int getBadInputLength() {
       
   228         return badInputLength;
       
   229     }
       
   230 
       
   231     /**
       
   232      * Returns the index of the  character just past the last character
       
   233      * written by the previous call to convert.
       
   234      */
       
   235     public int nextCharIndex() {
       
   236         return charOff;
       
   237     }
       
   238 
       
   239     /**
       
   240      * Returns the index of the byte just past the last byte successfully
       
   241      * converted by the previous call to convert.
       
   242      */
       
   243     public int nextByteIndex() {
       
   244         return byteOff;
       
   245     }
       
   246 
       
   247     /**
       
   248      * Sets converter into substitution mode.  In substitution mode,
       
   249      * the converter will replace untranslatable characters in the source
       
   250      * encoding with the substitution character set by setSubstitionChars.
       
   251      * When not in substitution mode, the converter will throw an
       
   252      * UnknownCharacterException when it encounters untranslatable input.
       
   253      *
       
   254      * @param doSub if true, enable substitution mode.
       
   255      * @see #setSubstitutionChars
       
   256      */
       
   257     public void setSubstitutionMode(boolean doSub) {
       
   258         subMode = doSub;
       
   259     }
       
   260 
       
   261     /**
       
   262      * Sets the substitution characters to use when the converter is in
       
   263      * substitution mode.  The given chars must not be
       
   264      * longer than the value returned by getMaxCharsPerByte for this
       
   265      * converter.
       
   266      *
       
   267      * @param newSubBytes the substitution bytes
       
   268      * @exception IllegalArgumentException if given byte array is longer than
       
   269      *    the value returned by the method getMaxBytesPerChar.
       
   270      * @see #setSubstitutionMode
       
   271      * @see #getMaxBytesPerChar
       
   272      */
       
   273     /**
       
   274      * sets the substitution character to use
       
   275      * @param c the substitution character
       
   276      */
       
   277     public void setSubstitutionChars(char[] c)
       
   278         throws IllegalArgumentException
       
   279     {
       
   280         if( c.length > getMaxCharsPerByte() ) {
       
   281             throw new IllegalArgumentException();
       
   282         }
       
   283 
       
   284         subChars = new char[ c.length ];
       
   285         System.arraycopy( c, 0, subChars, 0, c.length );
       
   286     }
       
   287 
       
   288     /**
       
   289      * returns a string representation of the character conversion
       
   290      */
       
   291     public String toString() {
       
   292         return "ByteToCharConverter: " + getCharacterEncoding();
       
   293     }
       
   294 }