jdk/src/share/classes/sun/io/CharToByteEUC.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1997, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 package sun.io;
       
    26 
       
    27 public abstract class CharToByteEUC extends CharToByteConverter
       
    28 {
       
    29 
       
    30     private char highHalfZoneCode;
       
    31     private byte[] outputByte;
       
    32 
       
    33     protected short  index1[];
       
    34     protected String index2;
       
    35     protected String index2a;
       
    36     protected String index2b;
       
    37     protected String index2c;
       
    38     protected int    mask1;
       
    39     protected int    mask2;
       
    40     protected int    shift;
       
    41 
       
    42     private byte[] workByte = new byte[4];
       
    43 
       
    44     /**
       
    45       * flush out any residual data and reset the buffer state
       
    46       */
       
    47     public int flush(byte [] output, int outStart, int outEnd)
       
    48         throws MalformedInputException, ConversionBufferFullException
       
    49     {
       
    50 
       
    51        if (highHalfZoneCode != 0) {
       
    52           reset();
       
    53           badInputLength = 0;
       
    54           throw new MalformedInputException();
       
    55        }
       
    56 
       
    57        reset();
       
    58        return 0;
       
    59     }
       
    60 
       
    61     /**
       
    62      * Character conversion
       
    63      */
       
    64     public int convert(char[] input, int inOff, int inEnd,
       
    65                        byte[] output, int outOff, int outEnd)
       
    66         throws UnknownCharacterException, MalformedInputException,
       
    67                ConversionBufferFullException
       
    68     {
       
    69         char    inputChar;
       
    70         int     inputSize;
       
    71 
       
    72         byteOff = outOff;
       
    73         charOff = inOff;
       
    74 
       
    75         while(charOff < inEnd) {
       
    76 
       
    77            outputByte = workByte;
       
    78 
       
    79            int     index;
       
    80            int     theBytes;
       
    81            int     spaceNeeded;
       
    82            boolean allZeroes = true;
       
    83            int     i;
       
    84 
       
    85 
       
    86            if (highHalfZoneCode == 0) {
       
    87               inputChar = input[charOff];
       
    88               inputSize = 1;
       
    89            } else {
       
    90               inputChar = highHalfZoneCode;
       
    91               inputSize = 0;
       
    92               highHalfZoneCode = 0;
       
    93            }
       
    94 
       
    95 
       
    96            // Is this a high surrogate?
       
    97            if(inputChar >= '\ud800' && inputChar <= '\udbff') {
       
    98               // Is this the last character of the input?
       
    99               if (charOff + inputSize >= inEnd) {
       
   100                  highHalfZoneCode = inputChar;
       
   101                  charOff += inputSize;
       
   102                  break;
       
   103               }
       
   104 
       
   105               // Is there a low surrogate following?
       
   106               inputChar = input[charOff + inputSize];
       
   107               if (inputChar >= '\udc00' && inputChar <= '\udfff') {
       
   108 
       
   109                  // We have a valid surrogate pair.  Too bad we don't do
       
   110                  // surrogates.  Is substitution enabled?
       
   111                  if (subMode) {
       
   112                     outputByte = subBytes;
       
   113                     inputSize++;
       
   114                  } else {
       
   115                     badInputLength = 2;
       
   116                     throw new UnknownCharacterException();
       
   117                  }
       
   118               } else {
       
   119 
       
   120                  // We have a malformed surrogate pair
       
   121                  badInputLength = 1;
       
   122                  throw new MalformedInputException();
       
   123               }
       
   124            }
       
   125 
       
   126            // Is this an unaccompanied low surrogate?
       
   127            else
       
   128               if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   129                  badInputLength = 1;
       
   130                  throw new MalformedInputException();
       
   131               } else {
       
   132 
       
   133                  String theChars;
       
   134                  char   aChar;
       
   135 
       
   136                  // We have a valid character, get the bytes for it
       
   137                  index = index1[((inputChar & mask1) >> shift)] + (inputChar & mask2);
       
   138 
       
   139                  if (index < 7500)
       
   140                    theChars = index2;
       
   141                  else
       
   142                    if (index < 15000) {
       
   143                      index = index - 7500;
       
   144                      theChars = index2a;
       
   145                    }
       
   146                    else
       
   147                      if (index < 22500){
       
   148                        index = index - 15000;
       
   149                        theChars = index2b;
       
   150                      }
       
   151                      else {
       
   152                        index = index - 22500;
       
   153                        theChars = index2c;
       
   154                      }
       
   155 
       
   156                  aChar = theChars.charAt(2*index);
       
   157                  outputByte[0] = (byte)((aChar & 0xff00)>>8);
       
   158                  outputByte[1] = (byte)(aChar & 0x00ff);
       
   159                  aChar = theChars.charAt(2*index + 1);
       
   160                  outputByte[2] = (byte)((aChar & 0xff00)>>8);
       
   161                  outputByte[3] = (byte)(aChar & 0x00ff);
       
   162               }
       
   163 
       
   164            // if there was no mapping - look for substitution characters
       
   165 
       
   166            for (i = 0; i < outputByte.length; i++) {
       
   167              if (outputByte[i] != 0x00) {
       
   168                allZeroes = false;
       
   169                break;
       
   170              }
       
   171            }
       
   172 
       
   173            if (allZeroes && inputChar != '\u0000')
       
   174            {
       
   175               if (subMode) {
       
   176                  outputByte = subBytes;
       
   177               } else {
       
   178                 badInputLength = 1;
       
   179                 throw new UnknownCharacterException();
       
   180               }
       
   181            }
       
   182 
       
   183            int oindex = 0;
       
   184            for (spaceNeeded = outputByte.length; spaceNeeded > 1; spaceNeeded--){
       
   185              if (outputByte[oindex++] != 0x00 )
       
   186                break;
       
   187            }
       
   188 
       
   189            if (byteOff + spaceNeeded > outEnd)
       
   190               throw new ConversionBufferFullException();
       
   191 
       
   192 
       
   193            for (i = outputByte.length - spaceNeeded; i < outputByte.length; i++) {
       
   194               output[byteOff++] = outputByte[i];
       
   195            }
       
   196 
       
   197            charOff += inputSize;
       
   198         }
       
   199 
       
   200         return byteOff - outOff;
       
   201     }
       
   202 
       
   203     /**
       
   204      * Resets converter to its initial state.
       
   205      */
       
   206     public void reset() {
       
   207        charOff = byteOff = 0;
       
   208        highHalfZoneCode = 0;
       
   209     }
       
   210 
       
   211     /**
       
   212      * Returns the maximum number of bytes needed to convert a char.
       
   213      */
       
   214     public int getMaxBytesPerChar() {
       
   215         return 2;
       
   216     }
       
   217 
       
   218 
       
   219     /**
       
   220      * Returns true if the given character can be converted to the
       
   221      * target character encoding.
       
   222      */
       
   223     public boolean canConvert(char ch) {
       
   224        int    index;
       
   225        String theChars;
       
   226 
       
   227        index = index1[((ch & mask1) >> shift)] + (ch & mask2);
       
   228 
       
   229        if (index < 7500)
       
   230          theChars = index2;
       
   231        else
       
   232          if (index < 15000) {
       
   233            index = index - 7500;
       
   234            theChars = index2a;
       
   235          }
       
   236          else
       
   237            if (index < 22500){
       
   238              index = index - 15000;
       
   239              theChars = index2b;
       
   240            }
       
   241            else {
       
   242              index = index - 22500;
       
   243              theChars = index2c;
       
   244            }
       
   245 
       
   246        if (theChars.charAt(2*index) != '\u0000' ||
       
   247                     theChars.charAt(2*index + 1) != '\u0000')
       
   248          return (true);
       
   249 
       
   250        // only return true if input char was unicode null - all others are
       
   251        //     undefined
       
   252        return( ch == '\u0000');
       
   253 
       
   254     }
       
   255 
       
   256 }