jdk/test/sun/nio/cs/TestEUC_TW.java
changeset 2914 1375969fd02e
child 2921 d9d491a5a169
equal deleted inserted replaced
2913:39a9cc073b84 2914:1375969fd02e
       
     1 /*
       
     2  * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    21  * have any questions.
       
    22  */
       
    23 
       
    24 /*
       
    25  * @test
       
    26  * @bug 6831794 6229811
       
    27  * @summary Test EUC_TW charset
       
    28  */
       
    29 
       
    30 import java.nio.charset.*;
       
    31 import java.nio.*;
       
    32 import java.util.*;
       
    33 
       
    34 public class TestEUC_TW {
       
    35 
       
    36     static class Time {
       
    37         long t;
       
    38     }
       
    39     static int iteration = 1000;
       
    40 
       
    41     static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
       
    42         throws Exception {
       
    43         String csn = cs.name();
       
    44         CharsetDecoder dec = cs.newDecoder();
       
    45         ByteBuffer bbf;
       
    46         CharBuffer cbf;
       
    47         if (testDirect) {
       
    48             bbf = ByteBuffer.allocateDirect(bb.length);
       
    49             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
       
    50             bbf.put(bb);
       
    51         } else {
       
    52             bbf = ByteBuffer.wrap(bb);
       
    53             cbf = CharBuffer.allocate(bb.length);
       
    54         }
       
    55         CoderResult cr = null;
       
    56         long t1 = System.nanoTime()/1000;
       
    57         for (int i = 0; i < iteration; i++) {
       
    58             bbf.rewind();
       
    59             cbf.clear();
       
    60             dec.reset();
       
    61             cr = dec.decode(bbf, cbf, true);
       
    62         }
       
    63         long t2 = System.nanoTime()/1000;
       
    64         if (t != null)
       
    65         t.t = (t2 - t1)/iteration;
       
    66         if (cr != CoderResult.UNDERFLOW) {
       
    67             System.out.println("DEC-----------------");
       
    68             int pos = bbf.position();
       
    69             System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
       
    70                               cr.toString(), pos,
       
    71                               bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
       
    72             throw new RuntimeException("Decoding err: " + csn);
       
    73         }
       
    74         char[] cc = new char[cbf.position()];
       
    75         cbf.flip(); cbf.get(cc);
       
    76         return cc;
       
    77 
       
    78     }
       
    79 
       
    80     static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
       
    81         throws Exception {
       
    82         CharsetDecoder dec = cs.newDecoder();
       
    83         ByteBuffer bbf;
       
    84         CharBuffer cbf;
       
    85         if (testDirect) {
       
    86             bbf = ByteBuffer.allocateDirect(bb.length);
       
    87             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
       
    88             bbf.put(bb).flip();
       
    89         } else {
       
    90             bbf = ByteBuffer.wrap(bb);
       
    91             cbf = CharBuffer.allocate(bb.length);
       
    92         }
       
    93         return dec.decode(bbf, cbf, true);
       
    94     }
       
    95 
       
    96     static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
       
    97         throws Exception {
       
    98         ByteBuffer bbf;
       
    99         CharBuffer cbf;
       
   100         CharsetEncoder enc = cs.newEncoder();
       
   101         String csn = cs.name();
       
   102         if (testDirect) {
       
   103             bbf = ByteBuffer.allocateDirect(cc.length * 4);
       
   104             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
       
   105             cbf.put(cc).flip();
       
   106         } else {
       
   107             bbf = ByteBuffer.allocate(cc.length * 4);
       
   108             cbf = CharBuffer.wrap(cc);
       
   109         }
       
   110         CoderResult cr = null;
       
   111         long t1 = System.nanoTime()/1000;
       
   112         for (int i = 0; i < iteration; i++) {
       
   113             cbf.rewind();
       
   114             bbf.clear();
       
   115             enc.reset();
       
   116             cr = enc.encode(cbf, bbf, true);
       
   117         }
       
   118         long t2 = System.nanoTime()/1000;
       
   119         if (t != null)
       
   120         t.t = (t2 - t1)/iteration;
       
   121         if (cr != CoderResult.UNDERFLOW) {
       
   122             System.out.println("ENC-----------------");
       
   123             int pos = cbf.position();
       
   124             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
       
   125                               cr.toString(), pos, cc[pos]&0xffff);
       
   126             throw new RuntimeException("Encoding err: " + csn);
       
   127         }
       
   128         byte[] bb = new byte[bbf.position()];
       
   129         bbf.flip(); bbf.get(bb);
       
   130         return bb;
       
   131     }
       
   132 
       
   133     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
       
   134         throws Exception {
       
   135         ByteBuffer bbf;
       
   136         CharBuffer cbf;
       
   137         CharsetEncoder enc = cs.newEncoder();
       
   138         if (testDirect) {
       
   139             bbf = ByteBuffer.allocateDirect(cc.length * 4);
       
   140             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
       
   141             cbf.put(cc).flip();
       
   142         } else {
       
   143             bbf = ByteBuffer.allocate(cc.length * 4);
       
   144             cbf = CharBuffer.wrap(cc);
       
   145         }
       
   146         return enc.encode(cbf, bbf, true);
       
   147     }
       
   148 
       
   149     static char[] getEUC_TWChars(boolean skipNR) {
       
   150         //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder();
       
   151         CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
       
   152         CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
       
   153         char[] cc = new char[0x20000];
       
   154         char[] c2 = new char[2];
       
   155         int pos = 0;
       
   156         int i = 0;
       
   157         //bmp
       
   158         for (i = 0; i < 0x10000; i++) {
       
   159             //SKIP these 3 NR codepoints if compared to EUC_TW
       
   160             if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
       
   161                 continue;
       
   162             if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
       
   163                 System.out.printf("  Err i=%x:  old=%b new=%b%n", i,
       
   164                                   encOLD.canEncode((char)i),
       
   165                                   enc.canEncode((char)i));
       
   166                 throw new RuntimeException("canEncode() err!");
       
   167             }
       
   168 
       
   169             if (enc.canEncode((char)i)) {
       
   170                 cc[pos++] = (char)i;
       
   171             }
       
   172         }
       
   173 
       
   174         //supp
       
   175         CharBuffer cb = CharBuffer.wrap(new char[2]);
       
   176         for (i = 0x20000; i < 0x30000; i++) {
       
   177             Character.toChars(i, c2, 0);
       
   178             cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
       
   179 
       
   180             if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
       
   181                 throw new RuntimeException("canEncode() err!");
       
   182             }
       
   183 
       
   184             if (enc.canEncode(cb)) {
       
   185                 //System.out.printf("cp=%x,  (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff);
       
   186                 cc[pos++] = c2[0];
       
   187                 cc[pos++] = c2[1];
       
   188             }
       
   189         }
       
   190 
       
   191         return Arrays.copyOf(cc, pos);
       
   192     }
       
   193 
       
   194     static void checkRoundtrip(Charset cs) throws Exception {
       
   195         char[] cc = getEUC_TWChars(false);
       
   196         System.out.printf("Check roundtrip <%s>...", cs.name());
       
   197         byte[] bb = encode(cc, cs, false, null);
       
   198         char[] ccO = decode(bb, cs, false, null);
       
   199 
       
   200         if (!Arrays.equals(cc, ccO)) {
       
   201             System.out.printf("    non-direct failed");
       
   202         }
       
   203         bb = encode(cc, cs, true, null);
       
   204         ccO = decode(bb, cs, true, null);
       
   205         if (!Arrays.equals(cc, ccO)) {
       
   206             System.out.printf("    (direct) failed");
       
   207         }
       
   208         System.out.println();
       
   209     }
       
   210 
       
   211     static void checkInit(String csn) throws Exception {
       
   212         System.out.printf("Check init <%s>...%n", csn);
       
   213         Charset.forName("Big5");    // load in the ExtendedCharsets
       
   214         long t1 = System.nanoTime()/1000;
       
   215         Charset cs = Charset.forName(csn);
       
   216         long t2 = System.nanoTime()/1000;
       
   217         System.out.printf("    charset     :%d%n", t2 - t1);
       
   218         t1 = System.nanoTime()/1000;
       
   219             cs.newDecoder();
       
   220         t2 = System.nanoTime()/1000;
       
   221         System.out.printf("    new Decoder :%d%n", t2 - t1);
       
   222 
       
   223         t1 = System.nanoTime()/1000;
       
   224             cs.newEncoder();
       
   225         t2 = System.nanoTime()/1000;
       
   226         System.out.printf("    new Encoder :%d%n", t2 - t1);
       
   227     }
       
   228 
       
   229     static void compare(Charset cs1, Charset cs2) throws Exception {
       
   230         char[] cc = getEUC_TWChars(true);
       
   231 
       
   232         String csn1 = cs1.name();
       
   233         String csn2 = cs2.name();
       
   234         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
       
   235 
       
   236         Time t1 = new Time();
       
   237         Time t2 = new Time();
       
   238 
       
   239         byte[] bb1 = encode(cc, cs1, false, t1);
       
   240         byte[] bb2 = encode(cc, cs2, false, t2);
       
   241 
       
   242         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
       
   243                           csn2, csn1,
       
   244                           t2.t, t1.t,
       
   245                           (double)(t2.t)/(t1.t));
       
   246         if (!Arrays.equals(bb1, bb2)) {
       
   247             System.out.printf("        encoding failed%n");
       
   248         }
       
   249 
       
   250         char[] cc2 = decode(bb1, cs2, false, t2);
       
   251         char[] cc1 = decode(bb1, cs1, false, t1);
       
   252         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
       
   253                           csn2, csn1,
       
   254                           t2.t, t1.t,
       
   255                           (double)(t2.t)/(t1.t));
       
   256         if (!Arrays.equals(cc1, cc2)) {
       
   257             System.out.printf("        decoding failed%n");
       
   258         }
       
   259 
       
   260         bb1 = encode(cc, cs1, true, t1);
       
   261         bb2 = encode(cc, cs2, true, t2);
       
   262 
       
   263         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
       
   264                           csn2, csn1,
       
   265                           t2.t, t1.t,
       
   266                           (double)(t2.t)/(t1.t));
       
   267 
       
   268         if (!Arrays.equals(bb1, bb2))
       
   269             System.out.printf("        encoding (direct) failed%n");
       
   270 
       
   271         cc1 = decode(bb1, cs1, true, t1);
       
   272         cc2 = decode(bb1, cs2, true, t2);
       
   273         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
       
   274                           csn2, csn1,
       
   275                           t2.t, t1.t,
       
   276                           (double)(t2.t)/(t1.t));
       
   277         if (!Arrays.equals(cc1, cc2)) {
       
   278             System.out.printf("        decoding (direct) failed%n");
       
   279         }
       
   280     }
       
   281 
       
   282     // The first byte is the length of malformed bytes
       
   283     static byte[][] malformed = {
       
   284         //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
       
   285     };
       
   286 
       
   287     static void checkMalformed(Charset cs) throws Exception {
       
   288         boolean failed = false;
       
   289         String csn = cs.name();
       
   290         System.out.printf("Check malformed <%s>...%n", csn);
       
   291         for (boolean direct: new boolean[] {false, true}) {
       
   292             for (byte[] bins : malformed) {
       
   293                 int mlen = bins[0];
       
   294                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
       
   295                 CoderResult cr = decodeCR(bin, cs, direct);
       
   296                 String ashex = "";
       
   297                 for (int i = 0; i < bin.length; i++) {
       
   298                     if (i > 0) ashex += " ";
       
   299                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
       
   300                 }
       
   301                 if (!cr.isMalformed()) {
       
   302                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
       
   303                     failed = true;
       
   304                 } else if (cr.length() != mlen) {
       
   305                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
       
   306                     failed = true;
       
   307                 }
       
   308             }
       
   309         }
       
   310         if (failed)
       
   311             throw new RuntimeException("Check malformed failed " + csn);
       
   312     }
       
   313 
       
   314     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
       
   315         int inPos = flow[0];
       
   316         int inLen = flow[1];
       
   317         int outPos = flow[2];
       
   318         int outLen = flow[3];
       
   319         int expedInPos = flow[4];
       
   320         int expedOutPos = flow[5];
       
   321         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
       
   322                                           :CoderResult.OVERFLOW;
       
   323         ByteBuffer bbf;
       
   324         CharBuffer cbf;
       
   325         if (direct) {
       
   326             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
       
   327             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
       
   328         } else {
       
   329             bbf = ByteBuffer.allocate(inPos + bytes.length);
       
   330             cbf = CharBuffer.allocate(outPos + outLen);
       
   331         }
       
   332         bbf.position(inPos);
       
   333         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
       
   334         cbf.position(outPos);
       
   335         dec.reset();
       
   336         CoderResult cr = dec.decode(bbf, cbf, false);
       
   337         if (cr != expedCR ||
       
   338             bbf.position() != expedInPos ||
       
   339             cbf.position() != expedOutPos) {
       
   340             System.out.printf("Expected(direct=%5b): [", direct);
       
   341             for (int i:flow) System.out.print(" " + i);
       
   342             System.out.println("]  CR=" + cr +
       
   343                                ", inPos=" + bbf.position() +
       
   344                                ", outPos=" + cbf.position());
       
   345             return false;
       
   346         }
       
   347         return true;
       
   348     }
       
   349 
       
   350     static void checkUnderOverflow(Charset cs) throws Exception {
       
   351         String csn = cs.name();
       
   352         System.out.printf("Check under/overflow <%s>...%n", csn);
       
   353         CharsetDecoder dec = cs.newDecoder();
       
   354         boolean failed = false;
       
   355         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
       
   356         //0   1 2   3         7         11
       
   357         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
       
   358         int    inlen = bytes.length;
       
   359 
       
   360         int MAXOFF = 20;
       
   361         for (int inoff = 0; inoff < MAXOFF; inoff++) {
       
   362             for (int outoff = 0; outoff < MAXOFF; outoff++) {
       
   363         int[][] Flows = {
       
   364             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
       
   365             //overflow
       
   366             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
       
   367             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
       
   368             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
       
   369             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
       
   370             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
       
   371             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
       
   372             //underflow
       
   373             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
       
   374             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
       
   375             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
       
   376             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
       
   377             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
       
   378             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
       
   379             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
       
   380             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
       
   381             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
       
   382             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
       
   383             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
       
   384             // 2-byte under/overflow
       
   385             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
       
   386             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
       
   387             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
       
   388             // 4-byte  under/overflow
       
   389             {inoff,  4,     outoff,  2,      inoff + 3,  outoff + 2, 0},
       
   390             {inoff,  5,     outoff,  2,      inoff + 3,  outoff + 2, 0},
       
   391             {inoff,  6,     outoff,  2,      inoff + 3,  outoff + 2, 0},
       
   392             {inoff,  7,     outoff,  2,      inoff + 3,  outoff + 2, 1},
       
   393             {inoff,  7,     outoff,  3,      inoff + 7,  outoff + 3, 0},
       
   394             // 4-byte  under/overflow
       
   395             {inoff,  8,     outoff,  3,      inoff + 7,  outoff + 3, 0},
       
   396             {inoff,  9,     outoff,  3,      inoff + 7,  outoff + 3, 0},
       
   397             {inoff, 10,     outoff,  3,      inoff + 7,  outoff + 3, 0},
       
   398             {inoff, 11,     outoff,  3,      inoff + 7,  outoff + 3, 1},
       
   399             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
       
   400             // 4-byte/supp  under/overflow
       
   401             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
       
   402             {inoff, 12,     outoff,  4,      inoff +11,  outoff + 4, 0},
       
   403             {inoff, 13,     outoff,  4,      inoff +11,  outoff + 4, 0},
       
   404             {inoff, 14,     outoff,  4,      inoff +11,  outoff + 4, 0},
       
   405             {inoff, 15,     outoff,  4,      inoff +11,  outoff + 4, 1},
       
   406             {inoff, 15,     outoff,  5,      inoff +11,  outoff + 4, 1},
       
   407             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
       
   408         };
       
   409         for (boolean direct: new boolean[] {false, true}) {
       
   410             for (int[] flow: Flows) {
       
   411                 if (!check(dec, bytes, direct, flow))
       
   412                     failed = true;
       
   413             }
       
   414         }}}
       
   415         if (failed)
       
   416             throw new RuntimeException("Check under/overflow failed " + csn);
       
   417     }
       
   418 
       
   419     public static void main(String[] args) throws Exception {
       
   420         // be the first one
       
   421         //checkInit("EUC_TW_OLD");
       
   422         checkInit("EUC_TW");
       
   423         Charset euctw = Charset.forName("EUC_TW");
       
   424         checkRoundtrip(euctw);
       
   425         compare(euctw, new EUC_TW_OLD());
       
   426         checkMalformed(euctw);
       
   427         checkUnderOverflow(euctw);
       
   428     }
       
   429 }