jdk/src/share/classes/sun/io/ByteToCharUTF8.java
author sherman
Fri, 19 Jun 2009 14:39:06 -0700
changeset 3052 5c9886498f31
parent 2 90ce3da70b43
child 4818 fd477db6c4ee
permissions -rw-r--r--
6299219: euro sign failed to be printed in Console on Localized Windows platform with GBK encoding 4891024: EUC-KR and JOHAB converters need to be updated to include two new characters 4287467: Character converter generator tool Summary: Migrated some of the doublebyte charsets to the new implementation. Reviewed-by: okutsu
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
     2
 * Copyright 1996-1997 Sun Microsystems, Inc.  All Rights Reserved.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Sun designates this
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
     9
 * by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    21
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    22
 * CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    23
 * have any questions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
package sun.io;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * see CharToByteUTF8.java about UTF-8 format
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
public class ByteToCharUTF8 extends ByteToCharConverter {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
    private int savedSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
    private byte[] savedBytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
    public ByteToCharUTF8() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
        super();
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
        savedSize = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
        savedBytes = new byte[5];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
    public int flush(char[] output, int outStart, int outEnd)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
        throws MalformedInputException
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
    {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
        if (savedSize != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
            savedSize = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
            badInputLength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
            throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
        byteOff = charOff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
        return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
     * Character converson
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
    public int convert(byte[] input, int inOff, int inEnd,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
                       char[] output, int outOff, int outEnd)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
        throws MalformedInputException, ConversionBufferFullException
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
    {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
        int byte1, byte2, byte3, byte4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
        char[] outputChar = new char[2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
        int outputSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
        int byteOffAdjustment = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
        if (savedSize != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
            byte[] newBuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
            newBuf = new byte[inEnd - inOff + savedSize];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
            for (int i = 0; i < savedSize; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
                newBuf[i] = savedBytes[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
            System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
            input = newBuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
            inOff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
            inEnd = newBuf.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
            byteOffAdjustment = -savedSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
            savedSize = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
        charOff = outOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
        byteOff = inOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
        int startByteOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
        while(byteOff < inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
            startByteOff = byteOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
            byte1 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
            if ((byte1 & 0x80) == 0){
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
                outputChar[0] = (char)byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
                outputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
            } else if ((byte1 & 0xe0) == 0xc0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
                if (byteOff >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
                    savedSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
                    savedBytes[0] = (byte)byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
                byte2 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
                if ((byte2 & 0xc0) != 0x80) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
                    badInputLength = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
                    byteOff += byteOffAdjustment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
                    throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
                outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
                outputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
            } else if ((byte1 & 0xf0) == 0xe0){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
                if (byteOff + 1 >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
                        savedBytes[0] = (byte)byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
                    if (byteOff >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
                        savedSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
                        savedSize = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
                        savedBytes[1] = (byte)input[byteOff++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
                byte2 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
                byte3 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
                if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
                    badInputLength = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
                    byteOff += byteOffAdjustment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
                    throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
                outputChar[0] = (char)(((byte1 & 0x0f) << 12)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
                                       | ((byte2 & 0x3f) << 6)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
                                       | (byte3 & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
                outputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
            } else if ((byte1 & 0xf8) == 0xf0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
                if (byteOff + 2 >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
                    savedBytes[0] = (byte)byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
                    if (byteOff >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
                        savedSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
                    } else if (byteOff + 1 >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
                        savedSize = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
                        savedBytes[1] = (byte)input[byteOff++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
                        savedSize = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
                        savedBytes[1] = (byte)input[byteOff++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
                        savedBytes[2] = (byte)input[byteOff++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
                byte2 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
                byte3 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
                byte4 = input[byteOff++] & 0xff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
                if ((byte2 & 0xc0) != 0x80 ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
                    (byte3 & 0xc0) != 0x80 ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
                    (byte4 & 0xc0) != 0x80) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
                    badInputLength = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
                    byteOff += byteOffAdjustment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
                    throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
                // this byte sequence is UTF16 character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
                int ucs4 = (int)(0x07 & byte1) << 18 |
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
                           (int)(0x3f & byte2) << 12 |
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
                           (int)(0x3f & byte3) <<  6 |
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
                           (int)(0x3f & byte4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
                outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
                outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
                outputSize = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
                badInputLength = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
                byteOff += byteOffAdjustment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
                throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
            if (charOff + outputSize > outEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
                byteOff = startByteOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
                byteOff += byteOffAdjustment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
                throw new ConversionBufferFullException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
            for (int i = 0; i < outputSize; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
                output[charOff + i] = outputChar[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
            charOff += outputSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
        byteOff += byteOffAdjustment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
        return charOff - outOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
     * Return the character set id
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
    public String getCharacterEncoding() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
        return "UTF8";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
     *   Reset after finding bad input
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
    public void reset() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
        byteOff = charOff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
        savedSize = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
}