jdk/src/share/classes/sun/io/ByteToCharUnicode.java
changeset 10372 2f6d68f22eae
parent 10321 64f7ee2f31dd
parent 10371 7da2112e4236
child 10373 d4c5e59b82f8
equal deleted inserted replaced
10321:64f7ee2f31dd 10372:2f6d68f22eae
     1 /*
       
     2  * Copyright (c) 1996, 1999, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.io;
       
    27 import java.io.*;
       
    28 
       
    29 
       
    30 /**
       
    31  * Convert byte arrays containing Unicode characters into arrays of actual
       
    32  * Unicode characters.  This class may be used directly, in which case it
       
    33  * expects the input byte array to begin with a byte-order mark, or it may be
       
    34  * subclassed in order to preset the byte order and mark behavior.
       
    35  *
       
    36  * <p> Whether or not a mark is expected, if a mark that does not match the
       
    37  * established byte order is later discovered then a
       
    38  * <tt>MalformedInputException</tt> will be thrown by the <tt>convert</tt>
       
    39  * method.  If a correct mark is seen later in the input stream, it is passed
       
    40  * through as a character.
       
    41  *
       
    42  * @see ByteToCharUnicodeLittle
       
    43  * @see ByteToCharUnicodeLittleUnmarked
       
    44  * @see ByteToCharUnicodeBig
       
    45  * @see ByteToCharUnicodeBigUnmarked
       
    46  *
       
    47  * @author      Mark Reinhold
       
    48  */
       
    49 
       
    50 public class ByteToCharUnicode extends ByteToCharConverter {
       
    51 
       
    52     static final char BYTE_ORDER_MARK = (char) 0xfeff;
       
    53     static final char REVERSED_MARK = (char) 0xfffe;
       
    54 
       
    55     static final int AUTO = 0;
       
    56     static final int BIG = 1;
       
    57     static final int LITTLE = 2;
       
    58 
       
    59     int originalByteOrder;      /* Byte order specified at creation */
       
    60     int byteOrder;              /* Byte order in use */
       
    61     boolean usesMark;           /* Look for a mark and interpret it */
       
    62 
       
    63     /**
       
    64      * Creates a Unicode byte-to-char converter that expects the first pair of
       
    65      * input bytes to be a byte-order mark, which will be interpreted and
       
    66      * discarded.  If the first pair of bytes is not such a mark then a
       
    67      * <tt>MalformedInputException</tt> will be thrown by the convert method.
       
    68      */
       
    69     public ByteToCharUnicode() {
       
    70         originalByteOrder = byteOrder = AUTO;
       
    71         usesMark = true;
       
    72     }
       
    73 
       
    74     /**
       
    75      * Creates a Unicode byte-to-char converter that uses the given byte order
       
    76      * and may or may not insist upon an initial byte-order mark.
       
    77      */
       
    78     protected ByteToCharUnicode(int bo, boolean m) {
       
    79         originalByteOrder = byteOrder = bo;
       
    80         usesMark = m;
       
    81     }
       
    82 
       
    83     public String getCharacterEncoding() {
       
    84         switch (originalByteOrder) {
       
    85         case BIG:
       
    86             return usesMark ? "UnicodeBig" : "UnicodeBigUnmarked";
       
    87         case LITTLE:
       
    88             return usesMark ? "UnicodeLittle" : "UnicodeLittleUnmarked";
       
    89         default:
       
    90             return "Unicode";
       
    91         }
       
    92     }
       
    93 
       
    94     boolean started = false;
       
    95     int leftOverByte;
       
    96     boolean leftOver = false;
       
    97 
       
    98     public int convert(byte[] in, int inOff, int inEnd,
       
    99                        char[] out, int outOff, int outEnd)
       
   100         throws ConversionBufferFullException, MalformedInputException
       
   101     {
       
   102         byteOff = inOff;
       
   103         charOff = outOff;
       
   104 
       
   105         if (inOff >= inEnd)
       
   106             return 0;
       
   107 
       
   108         int b1, b2;
       
   109         int bc = 0;
       
   110         int inI = inOff, outI = outOff;
       
   111 
       
   112         if (leftOver) {
       
   113             b1 = leftOverByte & 0xff;
       
   114             leftOver = false;
       
   115         }
       
   116         else {
       
   117             b1 = in[inI++] & 0xff;
       
   118         }
       
   119         bc = 1;
       
   120 
       
   121         if (usesMark && !started) {     /* Read initial byte-order mark */
       
   122             if (inI < inEnd) {
       
   123                 b2 = in[inI++] & 0xff;
       
   124                 bc = 2;
       
   125 
       
   126                 char c = (char) ((b1 << 8) | b2);
       
   127                 int bo = AUTO;
       
   128 
       
   129                 if (c == BYTE_ORDER_MARK)
       
   130                     bo = BIG;
       
   131                 else if (c == REVERSED_MARK)
       
   132                     bo = LITTLE;
       
   133 
       
   134                 if (byteOrder == AUTO) {
       
   135                     if (bo == AUTO) {
       
   136                         badInputLength = bc;
       
   137                         throw new
       
   138                             MalformedInputException("Missing byte-order mark");
       
   139                     }
       
   140                     byteOrder = bo;
       
   141                     if (inI < inEnd) {
       
   142                         b1 = in[inI++] & 0xff;
       
   143                         bc = 1;
       
   144                     }
       
   145                 }
       
   146                 else if (bo == AUTO) {
       
   147                     inI--;
       
   148                     bc = 1;
       
   149                 }
       
   150                 else if (byteOrder == bo) {
       
   151                     if (inI < inEnd) {
       
   152                         b1 = in[inI++] & 0xff;
       
   153                         bc = 1;
       
   154                     }
       
   155                 }
       
   156                 else {
       
   157                     badInputLength = bc;
       
   158                     throw new
       
   159                         MalformedInputException("Incorrect byte-order mark");
       
   160                 }
       
   161 
       
   162                 started = true;
       
   163             }
       
   164         }
       
   165 
       
   166         /* Loop invariant: (b1 contains the next input byte) && (bc == 1) */
       
   167         while (inI < inEnd) {
       
   168             b2 = in[inI++] & 0xff;
       
   169             bc = 2;
       
   170 
       
   171             char c;
       
   172             if (byteOrder == BIG)
       
   173                 c = (char) ((b1 << 8) | b2);
       
   174             else
       
   175                 c = (char) ((b2 << 8) | b1);
       
   176 
       
   177             if (c == REVERSED_MARK)
       
   178                 throw new
       
   179                     MalformedInputException("Reversed byte-order mark");
       
   180 
       
   181             if (outI >= outEnd)
       
   182                 throw new ConversionBufferFullException();
       
   183             out[outI++] = c;
       
   184             byteOff = inI;
       
   185             charOff = outI;
       
   186 
       
   187             if (inI < inEnd) {
       
   188                 b1 = in[inI++] & 0xff;
       
   189                 bc = 1;
       
   190             }
       
   191         }
       
   192 
       
   193         if (bc == 1) {
       
   194             leftOverByte = b1;
       
   195             byteOff = inI;
       
   196             leftOver = true;
       
   197         }
       
   198 
       
   199         return outI - outOff;
       
   200     }
       
   201 
       
   202     public void reset() {
       
   203         leftOver = false;
       
   204         byteOff = charOff = 0;
       
   205         started = false;
       
   206         byteOrder = originalByteOrder;
       
   207     }
       
   208 
       
   209     public int flush(char buf[], int off, int len)
       
   210         throws MalformedInputException
       
   211     {
       
   212         if (leftOver) {
       
   213             reset();
       
   214             throw new MalformedInputException();
       
   215         }
       
   216         byteOff = charOff = 0;
       
   217         return 0;
       
   218     }
       
   219 
       
   220 }