jdk/src/share/classes/sun/io/CharToByteUTF8.java
author duke
Sat, 01 Dec 2007 00:00:00 +0000
changeset 2 90ce3da70b43
child 5506 202f599c92aa
permissions -rw-r--r--
Initial load
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
     2
 * Copyright 1996-1997 Sun Microsystems, Inc.  All Rights Reserved.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Sun designates this
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
     9
 * by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    21
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    22
 * CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    23
 * have any questions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
package sun.io;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 * It's represented like below.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * # Bits   Bit pattern
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * 1    7   0xxxxxxx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * 2   11   110xxxxx 10xxxxxx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * 3   16   1110xxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 * 4   21   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * 5   26   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * 6   31   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 *     UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
public class CharToByteUTF8 extends CharToByteConverter {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
    private char highHalfZoneCode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
    public int flush(byte[] output, int outStart, int outEnd)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
        throws MalformedInputException
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
    {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
        if (highHalfZoneCode != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
            highHalfZoneCode = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
            badInputLength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
            throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
        byteOff = charOff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
        return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
     * Character conversion
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
    public int convert(char[] input, int inOff, int inEnd,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
                       byte[] output, int outOff, int outEnd)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
        throws ConversionBufferFullException, MalformedInputException
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
    {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
        char inputChar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
        byte[] outputByte = new byte[6];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
        int inputSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
        int outputSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
        charOff = inOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
        byteOff = outOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
        if (highHalfZoneCode != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
            inputChar = highHalfZoneCode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
            highHalfZoneCode = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
            if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
                // This is legal UTF16 sequence.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
                int ucs4 = (highHalfZoneCode - 0xd800) * 0x400
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
                    + (input[inOff] - 0xdc00) + 0x10000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
                output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
                output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
                output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
                output[3] = (byte)(0x80 | (ucs4 & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
                charOff++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
                highHalfZoneCode = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
                // This is illegal UTF16 sequence.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
                badInputLength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
                throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
        while(charOff < inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
            inputChar = input[charOff];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
            if (inputChar < 0x80) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
                outputByte[0] = (byte)inputChar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
                inputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
                outputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
            } else if (inputChar < 0x800) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
                outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
                outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
                inputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
                outputSize = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
            } else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
                // this is <high-half zone code> in UTF-16
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
                if (charOff + 1 >= inEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
                    highHalfZoneCode = inputChar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
                // check next char is valid <low-half zone code>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
                char lowChar = input[charOff + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
                if (lowChar < 0xdc00 || lowChar > 0xdfff) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
                    badInputLength = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
                    throw new MalformedInputException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
                int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
                    + 0x10000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
                outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
                outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
                outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
                outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
                outputSize = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
                inputSize = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
                outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
                outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
                outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
                inputSize = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
                outputSize = 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
            if (byteOff + outputSize > outEnd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
                throw new ConversionBufferFullException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
            for (int i = 0; i < outputSize; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
                output[byteOff++] = outputByte[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
            charOff += inputSize;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
        return byteOff - outOff;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
    public boolean canConvert(char ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
        return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    public int getMaxBytesPerChar() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
        return 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
    public void reset() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
        byteOff = charOff = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
        highHalfZoneCode = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    public String getCharacterEncoding() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
        return "UTF8";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
}