jdk/src/share/classes/sun/io/CharToByteSingleByte.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1996, 2008, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.io;
       
    27 
       
    28 import static sun.nio.cs.CharsetMapping.*;
       
    29 
       
    30 /**
       
    31 * A table driven conversion from char to byte for single byte
       
    32 * character sets.  Tables will reside in the class CharToByteYYYYY,
       
    33 * where YYYYY is a unique character set identifier
       
    34 
       
    35     < TBD: Tables are of the form... >
       
    36 
       
    37 *
       
    38 * @author Lloyd Honomichl
       
    39 * @author Asmus Freytag
       
    40 * @version 8/28/96
       
    41 */
       
    42 
       
    43 public abstract class CharToByteSingleByte extends CharToByteConverter {
       
    44 
       
    45     /*
       
    46      * 1st level index, provided by subclass
       
    47      */
       
    48     protected char[] index1;
       
    49 
       
    50     /*
       
    51      * 2nd level index, provided by subclass
       
    52      */
       
    53     protected char[] index2;
       
    54 
       
    55     /*
       
    56      * Mask to isolate bits for 1st level index, from subclass
       
    57      */
       
    58     protected int   mask1;
       
    59 
       
    60     /*
       
    61      * Mask to isolate bits for 2nd level index, from subclass
       
    62      */
       
    63     protected int   mask2;
       
    64 
       
    65     /*
       
    66      * Shift to isolate bits for 1st level index, from subclass
       
    67      */
       
    68     protected int   shift;
       
    69 
       
    70     private char highHalfZoneCode;
       
    71 
       
    72     public char[] getIndex1() {
       
    73         return index1;
       
    74     }
       
    75 
       
    76     public char[] getIndex2() {
       
    77         return index2;
       
    78     }
       
    79     public int flush(byte[] output, int outStart, int outEnd)
       
    80         throws MalformedInputException
       
    81     {
       
    82         if (highHalfZoneCode != 0) {
       
    83             highHalfZoneCode = 0;
       
    84             badInputLength = 0;
       
    85             throw new MalformedInputException();
       
    86         }
       
    87         byteOff = charOff = 0;
       
    88         return 0;
       
    89     }
       
    90 
       
    91     /**
       
    92      * Converts characters to sequences of bytes.
       
    93      * Conversions that result in Exceptions can be restarted by calling
       
    94      * convert again, with appropriately modified parameters.
       
    95      * @return the characters written to output.
       
    96      * @param input char array containing text in Unicode
       
    97      * @param inStart offset in input array
       
    98      * @param inEnd offset of last byte to be converted
       
    99      * @param output byte array to receive conversion result
       
   100      * @param outStart starting offset
       
   101      * @param outEnd offset of last byte to be written to
       
   102      * @throw MalformedInputException for any sequence of chars that is
       
   103      * illegal in Unicode (principally unpaired surrogates
       
   104      * and \uFFFF or \uFFFE), including any partial surrogate pair
       
   105      * which occurs at the end of an input buffer.
       
   106      * @throw UnsupportedCharacterException for any character that
       
   107      * that cannot be converted to the external character set.
       
   108      */
       
   109     public int convert(char[] input, int inOff, int inEnd,
       
   110                        byte[] output, int outOff, int outEnd)
       
   111         throws MalformedInputException,
       
   112                UnknownCharacterException,
       
   113                ConversionBufferFullException
       
   114     {
       
   115         char    inputChar;          // Input character to be converted
       
   116         byte[]  outputByte;         // Output byte written to output
       
   117         int     inputSize;          // Size of input
       
   118         int     outputSize;         // Size of output
       
   119 
       
   120         byte[]  tmpArray = new byte[1];
       
   121 
       
   122         // Record beginning offsets
       
   123         charOff = inOff;
       
   124         byteOff = outOff;
       
   125 
       
   126         if (highHalfZoneCode != 0) {
       
   127             inputChar = highHalfZoneCode;
       
   128             highHalfZoneCode = 0;
       
   129             if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
       
   130                 // This is legal UTF16 sequence.
       
   131                 badInputLength = 1;
       
   132                 throw new UnknownCharacterException();
       
   133             } else {
       
   134                 // This is illegal UTF16 sequence.
       
   135                 badInputLength = 0;
       
   136                 throw new MalformedInputException();
       
   137             }
       
   138         }
       
   139 
       
   140         // Loop until we hit the end of the input
       
   141         while(charOff < inEnd) {
       
   142 
       
   143             outputByte = tmpArray;
       
   144 
       
   145             // Get the input character
       
   146             inputChar = input[charOff];
       
   147 
       
   148             // Default output size
       
   149             outputSize = 1;
       
   150 
       
   151             // Assume this is a simple character
       
   152             inputSize = 1;
       
   153 
       
   154             // Is this a high surrogate?
       
   155             if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
       
   156                 // Is this the last character in the input?
       
   157                 if (charOff + 1 >= inEnd) {
       
   158                     highHalfZoneCode = inputChar;
       
   159                     break;
       
   160                 }
       
   161 
       
   162                 // Is there a low surrogate following?
       
   163                 inputChar = input[charOff + 1];
       
   164                 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   165                     // We have a valid surrogate pair.  Too bad we don't map
       
   166                     //  surrogates.  Is substitution enabled?
       
   167                     if (subMode) {
       
   168                         outputByte = subBytes;
       
   169                         outputSize = subBytes.length;
       
   170                         inputSize = 2;
       
   171                     } else {
       
   172                         badInputLength = 2;
       
   173                         throw new UnknownCharacterException();
       
   174                     }
       
   175                 } else {
       
   176                     // We have a malformed surrogate pair
       
   177                     badInputLength = 1;
       
   178                     throw new MalformedInputException();
       
   179                 }
       
   180             }
       
   181 
       
   182             // Is this an unaccompanied low surrogate?
       
   183             else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   184                 badInputLength = 1;
       
   185                 throw new MalformedInputException();
       
   186             }
       
   187 
       
   188             // Not part of a surrogate, so look it up
       
   189             else {
       
   190                 // Get output using two level lookup
       
   191                 outputByte[0] = getNative(inputChar);
       
   192 
       
   193                 // Might this character be unmappable?
       
   194                 if (outputByte[0] == 0) {
       
   195                     // If outputByte is zero because the input was zero
       
   196                     //  then this character is actually mappable
       
   197                     if (input[charOff] != '\u0000') {
       
   198                         // We have an unmappable character
       
   199                         // Is substitution enabled?
       
   200                         if (subMode) {
       
   201                             outputByte = subBytes;
       
   202                             outputSize = subBytes.length;
       
   203                         } else {
       
   204                             badInputLength = 1;
       
   205                             throw new UnknownCharacterException();
       
   206                         }
       
   207                     }
       
   208                 }
       
   209             }
       
   210 
       
   211             // If we don't have room for the output, throw an exception
       
   212             if (byteOff + outputSize > outEnd)
       
   213                 throw new ConversionBufferFullException();
       
   214 
       
   215             // Put the byte in the output buffer
       
   216             for (int i = 0; i < outputSize; i++) {
       
   217                 output[byteOff++] = outputByte[i];
       
   218             }
       
   219             charOff += inputSize;
       
   220 
       
   221         }
       
   222 
       
   223         // Return the length written to the output buffer
       
   224         return byteOff - outOff;
       
   225     }
       
   226 
       
   227     /**
       
   228      * the maximum number of bytes needed to hold a converted char
       
   229      * @returns the maximum number of bytes needed for a converted char
       
   230      */
       
   231     public int getMaxBytesPerChar() {
       
   232         return 1;
       
   233     }
       
   234 
       
   235     int encodeChar(char ch) {
       
   236         char index = index1[ch >> 8];
       
   237         if (index == UNMAPPABLE_ENCODING)
       
   238             return UNMAPPABLE_ENCODING;
       
   239         return index2[index + (ch & 0xff)];
       
   240     }
       
   241 
       
   242     public byte getNative(char inputChar) {
       
   243         int b = encodeChar(inputChar);
       
   244         if (b == UNMAPPABLE_ENCODING)
       
   245             return 0;
       
   246         return (byte)b;
       
   247     }
       
   248 
       
   249     /**
       
   250      *  Resets the converter.
       
   251      * Call this method to reset the converter to its initial state
       
   252      */
       
   253     public void reset() {
       
   254         byteOff = charOff = 0;
       
   255         highHalfZoneCode = 0;
       
   256     }
       
   257 
       
   258     /**
       
   259      * Return whether a character is mappable or not
       
   260      * @return true if a character is mappable
       
   261      */
       
   262     public boolean canConvert(char ch) {
       
   263         return encodeChar(ch) != UNMAPPABLE_ENCODING;
       
   264     }
       
   265 }