jdk/src/share/classes/sun/io/CharToByteISO2022JP.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1996, 1999, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.io;
       
    27 import java.io.*;
       
    28 
       
    29 public class CharToByteISO2022JP extends CharToByteJIS0208 {
       
    30 
       
    31     private static final int ASCII = 0;                 // ESC ( B
       
    32     private static final int JISX0201_1976 = 1;         // ESC ( J
       
    33     private static final int JISX0208_1978 = 2;         // ESC $ @
       
    34     private static final int JISX0208_1983 = 3;         // ESC $ B
       
    35     private static final int JISX0201_1976_KANA = 4;    // ESC ( I
       
    36 
       
    37     private char highHalfZoneCode;
       
    38     private boolean flushed = true;
       
    39 
       
    40     // JIS is state full encoding, so currentMode keep the
       
    41     // current codeset
       
    42     private int currentMode = ASCII;
       
    43 
       
    44     /**
       
    45      * Bytes for substitute for unmappable input.
       
    46      */
       
    47     // XXX: Assumes subBytes are ASCII string. Need to change Escape sequence
       
    48     // for other character sets.
       
    49     protected byte[] subBytesEscape = { (byte)0x1b, (byte)0x28, (byte)0x42 }; // ESC ( B
       
    50     protected int subBytesMode = ASCII;
       
    51 
       
    52     public int flush(byte[] output, int outStart, int outEnd)
       
    53         throws MalformedInputException, ConversionBufferFullException
       
    54     {
       
    55         if (highHalfZoneCode != 0) {
       
    56             highHalfZoneCode = 0;
       
    57             badInputLength = 0;
       
    58             throw new MalformedInputException();
       
    59         }
       
    60 
       
    61         if (!flushed && (currentMode != ASCII)) {
       
    62             if (outEnd - outStart < 3) {
       
    63                 throw new ConversionBufferFullException();
       
    64             }
       
    65             output[outStart]     = (byte)0x1b;
       
    66             output[outStart + 1] = (byte)0x28;
       
    67             output[outStart + 2] = (byte)0x42;
       
    68             byteOff += 3;
       
    69             byteOff = charOff = 0;
       
    70             flushed = true;
       
    71             currentMode = ASCII;
       
    72             return 3;
       
    73         }
       
    74         return 0;
       
    75     }
       
    76 
       
    77     public int convert(char[] input, int inOff, int inEnd,
       
    78                        byte[] output, int outOff, int outEnd)
       
    79         throws MalformedInputException, UnknownCharacterException,
       
    80                ConversionBufferFullException
       
    81 
       
    82     {
       
    83         char    inputChar;          // Input character to be converted
       
    84         int     inputSize;          // Size of the input
       
    85         int     outputSize;         // Size of the output
       
    86 
       
    87         // Buffer for output bytes
       
    88         byte[]  tmpArray = new byte[6];
       
    89         byte[]  outputByte;
       
    90 
       
    91         flushed = false;
       
    92 
       
    93         // Make copies of input and output indexes
       
    94         charOff = inOff;
       
    95         byteOff = outOff;
       
    96 
       
    97         if (highHalfZoneCode != 0) {
       
    98             inputChar = highHalfZoneCode;
       
    99             highHalfZoneCode = 0;
       
   100             if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
       
   101                 // This is legal UTF16 sequence.
       
   102                 badInputLength = 1;
       
   103                 throw new UnknownCharacterException();
       
   104             } else {
       
   105                 // This is illegal UTF16 sequence.
       
   106                 badInputLength = 0;
       
   107                 throw new MalformedInputException();
       
   108             }
       
   109         }
       
   110 
       
   111         // Loop until we run out of input
       
   112         while(charOff < inEnd) {
       
   113             outputByte = tmpArray;
       
   114             int newMode = currentMode; // Trace character mode changing
       
   115 
       
   116             // Get the input character
       
   117             inputChar = input[charOff];
       
   118             inputSize = 1;
       
   119             outputSize = 1;
       
   120 
       
   121             // Is this a high surrogate?
       
   122             if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
       
   123                 // Is this the last character of the input?
       
   124                 if (charOff + 1 >= inEnd) {
       
   125                     highHalfZoneCode = inputChar;
       
   126                     break;
       
   127                 }
       
   128 
       
   129                 // Is there a low surrogate following?
       
   130                 inputChar = input[charOff + 1];
       
   131                 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   132                     // We have a valid surrogate pair.  Too bad we don't do
       
   133                     // surrogates.  Is substitution enabled?
       
   134                     if (subMode) {
       
   135                         if (currentMode != subBytesMode) {
       
   136                             System.arraycopy(subBytesEscape, 0, outputByte, 0,
       
   137                                              subBytesEscape.length);
       
   138                             outputSize = subBytesEscape.length;
       
   139                             System.arraycopy(subBytes, 0, outputByte,
       
   140                                              outputSize, subBytes.length);
       
   141                             outputSize += subBytes.length;
       
   142                             newMode = subBytesMode;
       
   143                         } else {
       
   144                             outputByte = subBytes;
       
   145                             outputSize = subBytes.length;
       
   146                         }
       
   147                         inputSize = 2;
       
   148                     } else {
       
   149                         badInputLength = 2;
       
   150                         throw new UnknownCharacterException();
       
   151                     }
       
   152                 } else {
       
   153                     // We have a malformed surrogate pair
       
   154                     badInputLength = 1;
       
   155                     throw new MalformedInputException();
       
   156                 }
       
   157             }
       
   158 
       
   159             // Is this an unaccompanied low surrogate?
       
   160             else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
       
   161                 badInputLength = 1;
       
   162                 throw new MalformedInputException();
       
   163             } else {
       
   164                 // Not part of a surrogate
       
   165 
       
   166                 // Does this map to the Roman range?
       
   167                 if (inputChar <= '\u007F') {
       
   168                     if (currentMode != ASCII) {
       
   169                         outputByte[0] = (byte)0x1b;
       
   170                         outputByte[1] = (byte)0x28;
       
   171                         outputByte[2] = (byte)0x42;
       
   172                         outputByte[3] = (byte)inputChar;
       
   173                         outputSize = 4;
       
   174                         newMode = ASCII;
       
   175                     } else {
       
   176                         outputByte[0] = (byte)inputChar;
       
   177                         outputSize = 1;
       
   178                     }
       
   179                 }
       
   180                 // Is it a single byte kana?
       
   181                 else if (inputChar >= 0xFF61 && inputChar <= 0xFF9F) {
       
   182                     if (currentMode != JISX0201_1976_KANA) {
       
   183                         outputByte[0] = (byte)0x1b;
       
   184                         outputByte[1] = (byte)0x28;
       
   185                         outputByte[2] = (byte)0x49;
       
   186                         outputByte[3] = (byte)(inputChar - 0xff40);
       
   187                         outputSize = 4;
       
   188                         newMode = JISX0201_1976_KANA;
       
   189                     } else {
       
   190                         outputByte[0] = (byte)(inputChar - 0xff40);
       
   191                         outputSize = 1;
       
   192                     }
       
   193                 }
       
   194                 // Is it a yen sign?
       
   195                 else if (inputChar == '\u00A5') {
       
   196                     if (currentMode != JISX0201_1976) {
       
   197                         outputByte[0] = (byte)0x1b;
       
   198                         outputByte[1] = (byte)0x28;
       
   199                         outputByte[2] = (byte)0x4a;
       
   200                         outputByte[3] = (byte)0x5c;
       
   201                         outputSize = 4;
       
   202                         newMode = JISX0201_1976;
       
   203                     } else {
       
   204                         outputByte[0] = (byte)0x5C;
       
   205                         outputSize = 1;
       
   206                     }
       
   207                 }
       
   208                 // Is it a tilde?
       
   209                 else if (inputChar == '\u203E')
       
   210                     {
       
   211                         if (currentMode != JISX0201_1976) {
       
   212                             outputByte[0] = (byte)0x1b;
       
   213                             outputByte[1] = (byte)0x28;
       
   214                             outputByte[2] = (byte)0x4a;
       
   215                             outputByte[3] = (byte)0x7e;
       
   216                             outputSize = 4;
       
   217                             newMode = JISX0201_1976;
       
   218                         } else {
       
   219                             outputByte[0] = (byte)0x7e;
       
   220                             outputSize = 1;
       
   221                         }
       
   222                     }
       
   223                 // Is it a JIS-X-0208 character?
       
   224                 else {
       
   225                     int index = getNative(inputChar);
       
   226                     if (index != 0) {
       
   227                         if (currentMode != JISX0208_1983) {
       
   228                             outputByte[0] = (byte)0x1b;
       
   229                             outputByte[1] = (byte)0x24;
       
   230                             outputByte[2] = (byte)0x42;
       
   231                             outputByte[3] = (byte)(index >> 8);
       
   232                             outputByte[4] = (byte)(index & 0xff);
       
   233                             outputSize = 5;
       
   234                             newMode = JISX0208_1983;
       
   235                         } else {
       
   236                             outputByte[0] = (byte)(index >> 8);
       
   237                             outputByte[1] = (byte)(index & 0xff);
       
   238                             outputSize = 2;
       
   239                         }
       
   240                     }
       
   241                     // It doesn't map to JIS-0208!
       
   242                     else {
       
   243                         if (subMode) {
       
   244                             if (currentMode != subBytesMode) {
       
   245                                 System.arraycopy(subBytesEscape, 0, outputByte, 0,
       
   246                                                  subBytesEscape.length);
       
   247                                 outputSize = subBytesEscape.length;
       
   248                                 System.arraycopy(subBytes, 0, outputByte,
       
   249                                                  outputSize, subBytes.length);
       
   250                                 outputSize += subBytes.length;
       
   251                                 newMode = subBytesMode;
       
   252                             } else {
       
   253                                 outputByte = subBytes;
       
   254                                 outputSize = subBytes.length;
       
   255                             }
       
   256                         } else {
       
   257                             badInputLength = 1;
       
   258                             throw new UnknownCharacterException();
       
   259                         }
       
   260                     }
       
   261                 }
       
   262             }
       
   263 
       
   264             // Is there room in the output buffer?
       
   265             // XXX: The code assumes output buffer can hold at least 5 bytes,
       
   266             // in this coverter case. However, there is no way for apps to
       
   267             // see how many bytes will be necessary for next call.
       
   268             // getMaxBytesPerChar() should be overriden in every subclass of
       
   269             // CharToByteConverter and reflect real value (5 for this).
       
   270             if (byteOff + outputSize > outEnd)
       
   271                 throw new ConversionBufferFullException();
       
   272 
       
   273             // Put the output into the buffer
       
   274             for ( int i = 0 ; i < outputSize ; i++ )
       
   275                 output[byteOff++] = outputByte[i];
       
   276 
       
   277             // Advance the input pointer
       
   278             charOff += inputSize;
       
   279 
       
   280             // We can successfuly output the characters, changes
       
   281             // current mode. Fix for 4251646.
       
   282             currentMode = newMode;
       
   283         }
       
   284 
       
   285         // return mode ASCII at the end
       
   286         if (currentMode != ASCII){
       
   287             if (byteOff + 3 > outEnd)
       
   288                 throw new ConversionBufferFullException();
       
   289 
       
   290             output[byteOff++] = 0x1b;
       
   291             output[byteOff++] = 0x28;
       
   292             output[byteOff++] = 0x42;
       
   293             currentMode = ASCII;
       
   294         }
       
   295 
       
   296         // Return the length written to the output buffer
       
   297         return byteOff-outOff;
       
   298     }
       
   299 
       
   300     // Reset
       
   301     public void reset() {
       
   302         highHalfZoneCode = 0;
       
   303         byteOff = charOff = 0;
       
   304         currentMode = ASCII;
       
   305     }
       
   306 
       
   307     /**
       
   308      * returns the maximum number of bytes needed to convert a char
       
   309      */
       
   310     public int getMaxBytesPerChar() {
       
   311         return 8;
       
   312     }
       
   313 
       
   314     // Return the character set ID
       
   315     public String getCharacterEncoding() {
       
   316         return "ISO2022JP";
       
   317     }
       
   318 
       
   319 }