jdk/src/share/classes/sun/io/ByteToCharJISAutoDetect.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 package sun.io;
       
    26 
       
    27 import java.io.UnsupportedEncodingException;
       
    28 import sun.nio.cs.ext.JISAutoDetect;
       
    29 
       
    30 public class ByteToCharJISAutoDetect extends ByteToCharConverter {
       
    31 
       
    32     private final static int EUCJP_MASK = 0x01;
       
    33     private final static int SJIS2B_MASK = 0x02;
       
    34     private final static int SJIS1B_MASK = 0x04;
       
    35     private final static int EUCJP_KANA1_MASK = 0x08;
       
    36     private final static int EUCJP_KANA2_MASK = 0x10;
       
    37     private final static byte[] maskTable1 = JISAutoDetect.getByteMask1();
       
    38     private final static byte[] maskTable2 = JISAutoDetect.getByteMask2();
       
    39 
       
    40     private final static int SS2 = 0x8e;
       
    41     private final static int SS3 = 0x8f;
       
    42 
       
    43     // SJISName is set to either "SJIS" or "MS932"
       
    44     private String SJISName;
       
    45     private String EUCJPName;
       
    46 
       
    47     private String convName = null;
       
    48     private ByteToCharConverter detectedConv = null;
       
    49     private ByteToCharConverter defaultConv = null;
       
    50 
       
    51     public ByteToCharJISAutoDetect() {
       
    52         super();
       
    53         SJISName = CharacterEncoding.getSJISName();
       
    54         EUCJPName = CharacterEncoding.getEUCJPName();
       
    55         defaultConv = new ByteToCharISO8859_1();
       
    56         defaultConv.subChars = subChars;
       
    57         defaultConv.subMode = subMode;
       
    58     }
       
    59 
       
    60     public int flush(char [] output, int outStart, int outEnd)
       
    61         throws MalformedInputException, ConversionBufferFullException
       
    62     {
       
    63         badInputLength = 0;
       
    64         if(detectedConv != null)
       
    65              return detectedConv.flush(output, outStart, outEnd);
       
    66         else
       
    67              return defaultConv.flush(output, outStart, outEnd);
       
    68     }
       
    69 
       
    70 
       
    71     /**
       
    72      * Character conversion
       
    73      */
       
    74     public int convert(byte[] input, int inOff, int inEnd,
       
    75                        char[] output, int outOff, int outEnd)
       
    76         throws UnknownCharacterException, MalformedInputException,
       
    77                ConversionBufferFullException
       
    78     {
       
    79         int num = 0;
       
    80 
       
    81         charOff = outOff;
       
    82         byteOff = inOff;
       
    83 
       
    84         try {
       
    85             if (detectedConv == null) {
       
    86                 int euckana = 0;
       
    87                 int ss2count = 0;
       
    88                 int firstmask = 0;
       
    89                 int secondmask = 0;
       
    90                 int cnt;
       
    91                 boolean nonAsciiFound = false;
       
    92 
       
    93                 for (cnt = inOff; cnt < inEnd; cnt++) {
       
    94                     firstmask = 0;
       
    95                     secondmask = 0;
       
    96                     int byte1 = input[cnt]&0xff;
       
    97                     int byte2;
       
    98 
       
    99                     // TODO: should check valid escape sequences!
       
   100                     if (byte1 == 0x1b) {
       
   101                         convName = "ISO2022JP";
       
   102                         break;
       
   103                     }
       
   104 
       
   105                     // Try to convert all leading ASCII characters.
       
   106                     if ((nonAsciiFound == false) && (byte1 < 0x80)) {
       
   107                         if (charOff >= outEnd)
       
   108                             throw new ConversionBufferFullException();
       
   109                         output[charOff++] = (char) byte1;
       
   110                         byteOff++;
       
   111                         num++;
       
   112                         continue;
       
   113                     }
       
   114 
       
   115                     // We can no longer convert ASCII.
       
   116                     nonAsciiFound = true;
       
   117 
       
   118                     firstmask = maskTable1[byte1];
       
   119                     if (byte1 == SS2)
       
   120                         ss2count++;
       
   121 
       
   122                     if (firstmask != 0) {
       
   123                         if (cnt+1 < inEnd) {
       
   124                             byte2 = input[++cnt] & 0xff;
       
   125                             secondmask = maskTable2[byte2];
       
   126                             int mask = firstmask & secondmask;
       
   127                             if (mask == EUCJP_MASK) {
       
   128                                 convName = EUCJPName;
       
   129                                 break;
       
   130                             }
       
   131                             if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK)
       
   132                                 || (JISAutoDetect.canBeSJIS1B(firstmask) && secondmask == 0)) {
       
   133                                 convName = SJISName;
       
   134                                 break;
       
   135                             }
       
   136 
       
   137                             // If the first byte is a SS3 and the third byte
       
   138                             // is not an EUC byte, it should be SJIS.
       
   139                             // Otherwise, we can't determine it yet, but it's
       
   140                             // very likely SJIS. So we don't take the EUCJP CS3
       
   141                             // character boundary. If we tried both
       
   142                             // possibilities here, it might be able to be
       
   143                             // determined correctly.
       
   144                             if ((byte1 == SS3) && JISAutoDetect.canBeEUCJP(secondmask)) {
       
   145                                 if (cnt+1 < inEnd) {
       
   146                                     int nextbyte = input[cnt+1] & 0xff;
       
   147                                     if (! JISAutoDetect.canBeEUCJP(maskTable2[nextbyte]))
       
   148                                         convName = SJISName;
       
   149                                 } else
       
   150                                     convName = SJISName;
       
   151                             }
       
   152                             if (JISAutoDetect.canBeEUCKana(firstmask, secondmask))
       
   153                                 euckana++;
       
   154                         } else {
       
   155                             if ((firstmask & SJIS1B_MASK) != 0) {
       
   156                                 convName = SJISName;
       
   157                                 break;
       
   158                             }
       
   159                         }
       
   160                     }
       
   161                 }
       
   162 
       
   163                 if (nonAsciiFound && (convName == null)) {
       
   164                     if ((euckana > 1) || (ss2count > 1))
       
   165                         convName = EUCJPName;
       
   166                     else
       
   167                         convName = SJISName;
       
   168                 }
       
   169 
       
   170                 if (convName != null) {
       
   171                     try {
       
   172                         detectedConv = ByteToCharConverter.getConverter(convName);
       
   173                         detectedConv.subChars = subChars;
       
   174                         detectedConv.subMode = subMode;
       
   175                     } catch (UnsupportedEncodingException e){
       
   176                         detectedConv = null;
       
   177                         convName = null;
       
   178                     }
       
   179                 }
       
   180             }
       
   181         } catch (ConversionBufferFullException bufferFullException) {
       
   182                 throw bufferFullException;
       
   183         } catch (Exception e) {
       
   184             // If we fail to detect the converter needed for any reason,
       
   185             // use the default converter.
       
   186             detectedConv = defaultConv;
       
   187         }
       
   188 
       
   189         // If we've converted all ASCII characters, then return.
       
   190         if (byteOff == inEnd) {
       
   191             return num;
       
   192         }
       
   193 
       
   194         if(detectedConv != null) {
       
   195             try {
       
   196                 num += detectedConv.convert(input, inOff + num, inEnd,
       
   197                                             output, outOff + num, outEnd);
       
   198             } finally {
       
   199                 charOff = detectedConv.nextCharIndex();
       
   200                 byteOff = detectedConv.nextByteIndex();
       
   201                 badInputLength = detectedConv.badInputLength;
       
   202             }
       
   203         } else {
       
   204             try {
       
   205                 num += defaultConv.convert(input, inOff + num, inEnd,
       
   206                                            output, outOff + num, outEnd);
       
   207             } finally {
       
   208                 charOff = defaultConv.nextCharIndex();
       
   209                 byteOff = defaultConv.nextByteIndex();
       
   210                 badInputLength = defaultConv.badInputLength;
       
   211             }
       
   212         }
       
   213         return num;
       
   214     }
       
   215 
       
   216     public void reset() {
       
   217         if(detectedConv != null) {
       
   218              detectedConv.reset();
       
   219              detectedConv = null;
       
   220              convName = null;
       
   221         } else
       
   222              defaultConv.reset();
       
   223         charOff = byteOff = 0;
       
   224     }
       
   225 
       
   226     public String getCharacterEncoding() {
       
   227         return "JISAutoDetect";
       
   228     }
       
   229 
       
   230     public String toString() {
       
   231         String s = getCharacterEncoding();
       
   232         if (detectedConv != null) {
       
   233             s += "[" + detectedConv.getCharacterEncoding() + "]";
       
   234         } else {
       
   235             s += "[unknown]";
       
   236         }
       
   237         return s;
       
   238     }
       
   239 }