jdk/test/sun/nio/cs/TestUTF8.java
author never
Mon, 12 Jul 2010 22:27:18 -0700
changeset 5926 a36f90d986b6
parent 5506 202f599c92aa
child 9547 454881baaca0
permissions -rw-r--r--
6968385: malformed xml in sweeper logging Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1091
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1091
diff changeset
     2
 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
1091
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     4
 *
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     7
 * published by the Free Software Foundation.
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     8
 *
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    13
 * accompanied this code).
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    14
 *
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    18
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1091
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1091
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1091
diff changeset
    21
 * questions.
1091
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    22
 */
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    23
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    24
/*
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    25
 * @test
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    26
 * @bug 4486841
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    27
 * @summary Test UTF-8 charset
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    28
 */
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    29
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    30
import java.nio.charset.*;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    31
import java.nio.*;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    32
import java.util.*;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    33
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    34
public class TestUTF8 {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    35
    static char[] decode(byte[] bb, String csn, boolean testDirect)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    36
        throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    37
        CharsetDecoder dec = Charset.forName(csn).newDecoder();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    38
        ByteBuffer bbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    39
        CharBuffer cbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    40
        if (testDirect) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    41
            bbf = ByteBuffer.allocateDirect(bb.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    42
            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    43
            bbf.put(bb).flip();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    44
        } else {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    45
            bbf = ByteBuffer.wrap(bb);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    46
            cbf = CharBuffer.allocate(bb.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    47
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    48
        CoderResult cr = dec.decode(bbf, cbf, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    49
        if (cr != CoderResult.UNDERFLOW)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    50
            throw new RuntimeException("Decoding err: " + csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    51
        char[] cc = new char[cbf.position()];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    52
        cbf.flip(); cbf.get(cc);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    53
        return cc;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    54
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    55
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    56
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    57
    static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    58
        throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    59
        CharsetDecoder dec = Charset.forName(csn).newDecoder();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    60
        ByteBuffer bbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    61
        CharBuffer cbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    62
        if (testDirect) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    63
            bbf = ByteBuffer.allocateDirect(bb.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    64
            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    65
            bbf.put(bb).flip();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    66
        } else {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    67
            bbf = ByteBuffer.wrap(bb);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    68
            cbf = CharBuffer.allocate(bb.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    69
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    70
        return dec.decode(bbf, cbf, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    71
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    72
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    73
    static byte[] encode(char[] cc, String csn, boolean testDirect)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    74
        throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    75
        ByteBuffer bbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    76
        CharBuffer cbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    77
        CharsetEncoder enc = Charset.forName(csn).newEncoder();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    78
        if (testDirect) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    79
            bbf = ByteBuffer.allocateDirect(cc.length * 4);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    80
            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    81
            cbf.put(cc).flip();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    82
        } else {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    83
            bbf = ByteBuffer.allocate(cc.length * 4);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    84
            cbf = CharBuffer.wrap(cc);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    85
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    86
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    87
        CoderResult cr = enc.encode(cbf, bbf, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    88
        if (cr != CoderResult.UNDERFLOW)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    89
            throw new RuntimeException("Encoding err: " + csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    90
        byte[] bb = new byte[bbf.position()];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    91
        bbf.flip(); bbf.get(bb);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    92
        return bb;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    93
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    94
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    95
    static CoderResult encodeCR(char[] cc, String csn, boolean testDirect)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    96
        throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    97
        ByteBuffer bbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    98
        CharBuffer cbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
    99
        CharsetEncoder enc = Charset.forName(csn).newEncoder();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   100
        if (testDirect) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   101
            bbf = ByteBuffer.allocateDirect(cc.length * 4);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   102
            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   103
            cbf.put(cc).flip();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   104
        } else {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   105
            bbf = ByteBuffer.allocate(cc.length * 4);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   106
            cbf = CharBuffer.wrap(cc);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   107
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   108
        return enc.encode(cbf, bbf, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   109
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   110
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   111
    static char[] getUTFChars() {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   112
        char[] cc = new char[0x10000 - 0xe000 + 0xd800 + //bmp
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   113
                             (0x110000 - 0x10000) * 2];    //supp
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   114
        int pos = 0;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   115
        int i = 0;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   116
        for (i = 0; i < 0xd800; i++)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   117
            cc[pos++] = (char)i;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   118
        for (i = 0xe000; i < 0x10000; i++)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   119
            cc[pos++] = (char)i;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   120
        for (i = 0x10000; i < 0x110000; i++) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   121
            pos += Character.toChars(i, cc, pos);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   122
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   123
        return cc;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   124
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   125
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   126
    static int to3ByteUTF8(char c, byte[] bb, int pos) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   127
        bb[pos++] = (byte)(0xe0 | ((c >> 12)));
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   128
        bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   129
        bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   130
        return 3;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   131
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   132
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   133
    static void checkRoundtrip(String csn) throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   134
        System.out.printf("    Check roundtrip <%s>...", csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   135
        char[] cc = getUTFChars();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   136
        byte[] bb = encode(cc, csn, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   137
        char[] ccO = decode(bb, csn, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   138
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   139
        if (!Arrays.equals(cc, ccO)) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   140
            System.out.printf("    non-direct failed");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   141
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   142
        bb = encode(cc, csn, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   143
        ccO = decode(bb, csn, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   144
        if (!Arrays.equals(cc, ccO)) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   145
            System.out.printf("    (direct) failed");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   146
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   147
        System.out.println();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   148
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   149
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   150
    static void check6ByteSurrs(String csn) throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   151
        System.out.printf("    Check 6-byte Surrogates <%s>...%n", csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   152
        byte[] bb = new byte[(0x110000 - 0x10000) * 6];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   153
        char[] cc = new char[(0x110000 - 0x10000) * 2];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   154
        int bpos = 0;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   155
        int cpos = 0;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   156
        for (int i = 0x10000; i < 0x110000; i++) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   157
            Character.toChars(i, cc, cpos);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   158
            bpos += to3ByteUTF8(cc[cpos], bb, bpos);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   159
            bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   160
            cpos += 2;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   161
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   162
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   163
        char[] ccO = decode(bb, csn, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   164
        if (!Arrays.equals(cc, ccO)) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   165
            System.out.printf("    decoding failed%n");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   166
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   167
        ccO = decode(bb, csn, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   168
        if (!Arrays.equals(cc, ccO)) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   169
            System.out.printf("    decoding(direct) failed%n");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   170
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   171
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   172
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   173
    static void compare(String csn1, String csn2) throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   174
        System.out.printf("    Diff <%s> <%s>...%n", csn1, csn2);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   175
        char[] cc = getUTFChars();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   176
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   177
        byte[] bb1 = encode(cc, csn1, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   178
        byte[] bb2 = encode(cc, csn2, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   179
        if (!Arrays.equals(bb1, bb2))
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   180
            System.out.printf("        encoding failed%n");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   181
        char[] cc1 = decode(bb1, csn1, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   182
        char[] cc2 = decode(bb1, csn2, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   183
        if (!Arrays.equals(cc1, cc2)) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   184
            System.out.printf("        decoding failed%n");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   185
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   186
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   187
        bb1 = encode(cc, csn1, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   188
        bb2 = encode(cc, csn2, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   189
        if (!Arrays.equals(bb1, bb2))
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   190
            System.out.printf("        encoding (direct) failed%n");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   191
        cc1 = decode(bb1, csn1, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   192
        cc2 = decode(bb1, csn2, true);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   193
        if (!Arrays.equals(cc1, cc2)) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   194
            System.out.printf("        decoding (direct) failed%n");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   195
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   196
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   197
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   198
    // The first byte is the length of malformed bytes
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   199
    static byte[][] malformed = {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   200
        // One-byte sequences:
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   201
        {1, (byte)0xFF },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   202
        {1, (byte)0xC0 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   203
        {1, (byte)0x80 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   204
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   205
        {1, (byte)0xFF, (byte)0xFF}, // all ones
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   206
        {1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   207
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   208
        // Two-byte sequences:
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   209
        {1, (byte)0xC0, (byte)0x80}, // invalid first byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   210
        {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   211
        {1, (byte)0xC2, (byte)0x00}, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   212
        {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   213
        {1, (byte)0xD0, (byte)0x00}, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   214
        {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   215
        {1, (byte)0xDF, (byte)0x00}, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   216
        {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   217
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   218
        // Three-byte sequences
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   219
        {1, (byte)0xE0, (byte)0x80, (byte)0x80},  // 111x first byte first nibble
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   220
        {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   221
        {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   222
        {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   223
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   224
        {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   225
        {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   226
        {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   227
        {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   228
        {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   229
        {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   230
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   231
        // Four-byte sequences
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   232
        {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   233
        {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   234
        {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   235
        {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   236
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   237
        {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   238
        {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   239
        {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   240
        {2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   241
        {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid third byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   242
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   243
        {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   244
        {2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   245
        {3, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   246
        {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   247
        {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   248
        {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   249
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   250
        // Five-byte sequences
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   251
        {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid first byte
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   252
        {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   253
        {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   254
        {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   255
        {5, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   256
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   257
        {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   258
        {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   259
        {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   260
        {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   261
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   262
        // Six-byte sequences
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   263
        {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   264
        {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   265
        {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   266
        {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   267
        {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   268
        {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   269
        {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   270
        {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   271
        {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   272
    };
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   273
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   274
    static void checkMalformed(String csn) throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   275
        boolean failed = false;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   276
        System.out.printf("    Check malformed <%s>...%n", csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   277
        for (boolean direct: new boolean[] {false, true}) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   278
            for (byte[] bins : malformed) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   279
                int mlen = bins[0];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   280
                byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   281
                CoderResult cr = decodeCR(bin, csn, direct);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   282
                String ashex = "";
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   283
                for (int i = 0; i < bin.length; i++) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   284
                    if (i > 0) ashex += " ";
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   285
                        ashex += Integer.toBinaryString((int)bin[i] & 0xff);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   286
                }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   287
                if (!cr.isMalformed()) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   288
                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   289
                    failed = true;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   290
                } else if (cr.length() != mlen) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   291
                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   292
                    failed = true;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   293
                }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   294
            }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   295
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   296
        if (failed)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   297
            throw new RuntimeException("Check malformed failed " + csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   298
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   299
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   300
    static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   301
        int inPos = flow[0];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   302
        int inLen = flow[1];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   303
        int outPos = flow[2];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   304
        int outLen = flow[3];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   305
        int expedInPos = flow[4];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   306
        int expedOutPos = flow[5];
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   307
        CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   308
                                          :CoderResult.OVERFLOW;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   309
        ByteBuffer bbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   310
        CharBuffer cbf;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   311
        if (direct) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   312
            bbf = ByteBuffer.allocateDirect(inPos + utf8s.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   313
            cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   314
        } else {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   315
            bbf = ByteBuffer.allocate(inPos + utf8s.length);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   316
            cbf = CharBuffer.allocate(outPos + outLen);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   317
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   318
        bbf.position(inPos);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   319
        bbf.put(utf8s).flip().position(inPos).limit(inPos + inLen);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   320
        cbf.position(outPos);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   321
        dec.reset();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   322
        CoderResult cr = dec.decode(bbf, cbf, false);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   323
        if (cr != expedCR ||
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   324
            bbf.position() != expedInPos ||
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   325
            cbf.position() != expedOutPos) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   326
            System.out.printf("Expected(direct=%5b): [", direct);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   327
            for (int i:flow) System.out.print(" " + i);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   328
            System.out.println("]  CR=" + cr +
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   329
                               ", inPos=" + bbf.position() +
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   330
                               ", outPos=" + cbf.position());
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   331
            return false;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   332
        }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   333
        return true;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   334
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   335
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   336
    static void checkUnderOverflow(String csn) throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   337
        System.out.printf("    Check under/overflow <%s>...%n", csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   338
        CharsetDecoder dec = Charset.forName(csn).newDecoder();
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   339
        boolean failed = false;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   340
        byte[] utf8s = new String("\u007f\u07ff\ue000\ud800\udc00").getBytes("UTF-8");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   341
        int    inlen = utf8s.length;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   342
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   343
        for (int inoff = 0; inoff < 20; inoff++) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   344
            for (int outoff = 0; outoff < 20; outoff++) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   345
        int[][] Flows = {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   346
            //inpos, inLen, outPos,  outLen, inPosEP,   outposEP,   under(0)/over(1)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   347
            {inoff,  inlen, outoff,  1,      inoff + 1, outoff + 1, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   348
            {inoff,  inlen, outoff,  2,      inoff + 3, outoff + 2, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   349
            {inoff,  inlen, outoff,  3,      inoff + 6, outoff + 3, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   350
            {inoff,  inlen, outoff,  4,      inoff + 6, outoff + 3, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   351
            {inoff,  inlen, outoff,  5,      inoff + 10,outoff + 5, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   352
             // underflow
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   353
            {inoff,  1,     outoff,  5,      inoff + 1, outoff + 1, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   354
            {inoff,  2,     outoff,  5,      inoff + 1, outoff + 1, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   355
            {inoff,  3,     outoff,  5,      inoff + 3, outoff + 2, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   356
            {inoff,  4,     outoff,  5,      inoff + 3, outoff + 2, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   357
            {inoff,  5,     outoff,  5,      inoff + 3, outoff + 2, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   358
            {inoff,  6,     outoff,  5,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   359
            {inoff,  7,     outoff,  5,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   360
            {inoff,  8,     outoff,  5,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   361
            {inoff,  9,     outoff,  5,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   362
            {inoff,  10,    outoff,  5,      inoff + 10,outoff + 5, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   363
             // 2-byte underflow/overflow
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   364
            {inoff,  2,     outoff,  1,      inoff + 1, outoff + 1, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   365
            {inoff,  3,     outoff,  1,      inoff + 1, outoff + 1, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   366
             // 3-byte underflow/overflow
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   367
            {inoff,  4,     outoff,  2,      inoff + 3, outoff + 2, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   368
            {inoff,  5,     outoff,  2,      inoff + 3, outoff + 2, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   369
            {inoff,  6,     outoff,  2,      inoff + 3, outoff + 2, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   370
             // 4-byte underflow/overflow
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   371
            {inoff,  7,     outoff,  4,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   372
            {inoff,  8,     outoff,  4,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   373
            {inoff,  9,     outoff,  4,      inoff + 6, outoff + 3, 0},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   374
            {inoff,  10,    outoff,  4,      inoff + 6, outoff + 3, 1},
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   375
        };
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   376
        for (boolean direct: new boolean[] {false, true}) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   377
            for (int[] flow: Flows) {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   378
                if (!check(dec, utf8s, direct, flow))
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   379
                    failed = true;
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   380
            }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   381
        }}}
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   382
        if (failed)
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   383
            throw new RuntimeException("Check under/overflow failed " + csn);
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   384
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   385
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   386
    public static void main(String[] args) throws Exception {
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   387
        checkRoundtrip("UTF-8");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   388
        check6ByteSurrs("UTF-8");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   389
        //compare("UTF-8", "UTF-8-OLD");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   390
        checkMalformed("UTF-8");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   391
        checkUnderOverflow("UTF-8");
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   392
    }
136d19d6c372 4486841: UTF-8 decoder should adhere to corrigendum to Unicode 3.0.1
sherman
parents:
diff changeset
   393
}