jdk/make/tools/src/build/tools/charsetmapping/GenerateEUC_TW.java
changeset 5222 a430c36e9f2c
parent 5221 8d57da0c20f6
parent 5211 1d7f06d3f89d
child 5223 44158f6d3b94
equal deleted inserted replaced
5221:8d57da0c20f6 5222:a430c36e9f2c
     1 /*
       
     2  * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 package build.tools.charsetmapping;
       
    27 
       
    28 import java.io.*;
       
    29 import java.util.ArrayList;
       
    30 import java.util.Scanner;
       
    31 import java.util.Formatter;
       
    32 import java.util.regex.*;
       
    33 import java.nio.charset.*;
       
    34 import static build.tools.charsetmapping.CharsetMapping.*;
       
    35 
       
    36 public class GenerateEUC_TW {
       
    37 
       
    38     public static void genEUC_TW(String args[]) throws Exception {
       
    39         genClass(new FileInputStream(new File(args[0], "euc_tw.map")),
       
    40                  new PrintStream(new File(args[1], "EUC_TWMapping.java"), "ISO-8859-1"),
       
    41                  getCopyright(new File(args[3])));
       
    42     }
       
    43 
       
    44     private static String getCopyright(File f) throws IOException {
       
    45         Scanner s = new Scanner(f, "ISO-8859-1");
       
    46         StringBuilder sb = new StringBuilder();
       
    47         while (s.hasNextLine()) {
       
    48             String ln = s.nextLine();
       
    49             sb.append(ln + "\n");
       
    50             // assume we have the copyright as the first comment
       
    51             if (ln.matches("^\\s\\*\\/$"))
       
    52                 break;
       
    53         }
       
    54         s.close();
       
    55         return sb.toString();
       
    56     }
       
    57 
       
    58     private static char[] toCharArray(int[] db,
       
    59                                       int b1Min, int b1Max,
       
    60                                       int b2Min, int b2Max)
       
    61     {
       
    62         char[] ca = new char[(b1Max - b1Min + 1) * (b2Max - b2Min + 1)];
       
    63         int off = 0;
       
    64         for (int b1 = b1Min; b1 <= b1Max; b1++) {
       
    65             for (int b2 = b2Min; b2 <= b2Max; b2++) {
       
    66                 ca[off++] = (char)(db[b1 * 256 + b2] & 0xffff);
       
    67             }
       
    68         }
       
    69         return ca;
       
    70     }
       
    71 
       
    72     private static void toChar(Formatter out, String fmt, char c) {
       
    73         switch (c) {
       
    74         case '\b':
       
    75             out.format("\\b"); break;
       
    76         case '\t':
       
    77             out.format("\\t"); break;
       
    78         case '\n':
       
    79             out.format("\\n"); break;
       
    80         case '\f':
       
    81             out.format("\\f"); break;
       
    82         case '\r':
       
    83             out.format("\\r"); break;
       
    84         case '\"':
       
    85             out.format("\\\""); break;
       
    86         case '\'':
       
    87             out.format("\\'"); break;
       
    88         case '\\':
       
    89             out.format("\\\\"); break;
       
    90         default:
       
    91             out.format(fmt, c & 0xffff);
       
    92         }
       
    93     }
       
    94 
       
    95     private static void toString(Formatter out, char[] date, String endStr)
       
    96     {
       
    97         int off = 0;
       
    98         int end = date.length;
       
    99         while (off < end) {
       
   100             out.format("        \"");
       
   101             for (int j = 0; j < 8 && off < end; j++) {
       
   102                 toChar(out, "\\u%04X", date[off++]);
       
   103             }
       
   104             if (off == end)
       
   105                out.format("\"%s%n", endStr);
       
   106             else
       
   107                out.format("\" +%n");
       
   108         }
       
   109     }
       
   110 
       
   111     private static char[] toCharArray(byte[] ba,
       
   112                                       int b1Min, int b1Max,
       
   113                                       int b2Min, int b2Max)
       
   114     {
       
   115         char[] ca = new char[(b1Max - b1Min + 1) * (b2Max - b2Min + 1)];
       
   116         int off = 0;
       
   117         for (int b1 = b1Min; b1 <= b1Max; b1++) {
       
   118             int b2 = b2Min;
       
   119             while (b2 <= b2Max) {
       
   120                 ca[off++] = (char)(((ba[b1 * 256 + b2++] & 0xff) << 8) |
       
   121                                    (ba[b1 * 256 + b2++] & 0xff));
       
   122             }
       
   123         }
       
   124         return ca;
       
   125     }
       
   126 
       
   127     private static void toCharArray(Formatter out, char[] date) {
       
   128         int off = 0;
       
   129         int end = date.length;
       
   130         while (off < end) {
       
   131             out.format("        ");
       
   132             for (int j = 0; j < 8 && off < end; j++) {
       
   133                 toChar(out, "'\\u%04X',", date[off++]);
       
   134             }
       
   135             out.format("%n");
       
   136         }
       
   137     }
       
   138 
       
   139     private static int initC2BIndex(char[] index) {
       
   140         int off = 0;
       
   141         for (int i = 0; i < index.length; i++) {
       
   142             if (index[i] != 0) {
       
   143                 index[i] = (char)off;
       
   144                 off += 0x100;
       
   145             } else {
       
   146                 index[i] = CharsetMapping.UNMAPPABLE_ENCODING;
       
   147             }
       
   148         }
       
   149         return off;
       
   150     }
       
   151 
       
   152     private static Pattern euctw = Pattern.compile("(?:8ea)?(\\p{XDigit}++)\\s++(\\p{XDigit}++)?\\s*+.*");
       
   153 
       
   154     private static void genClass(InputStream is, PrintStream ps, String copyright)
       
   155         throws Exception
       
   156     {
       
   157         // ranges of byte1 and byte2, something should come from a "config" file
       
   158         int b1Min = 0xa1;
       
   159         int b1Max = 0xfe;
       
   160         int b2Min = 0xa1;
       
   161         int b2Max = 0xfe;
       
   162 
       
   163         try {
       
   164             int[][] db = new int[8][0x10000];        // doublebyte
       
   165             byte[]  suppFlag = new byte[0x10000];    // doublebyte
       
   166             char[]  indexC2B = new char[256];
       
   167             char[]  indexC2BSupp = new char[256];
       
   168 
       
   169             for (int i = 0; i < 8; i++)
       
   170                 for (int j = 0; j < 0x10000; j++)
       
   171                     db[i][j] = CharsetMapping.UNMAPPABLE_DECODING;
       
   172 
       
   173             CharsetMapping.Parser p = new CharsetMapping.Parser(is, euctw);
       
   174             CharsetMapping.Entry  e = null;
       
   175             while ((e = p.next()) != null) {
       
   176                 int plane = 0;
       
   177                 if (e.bs >= 0x10000) {
       
   178                     plane = ((e.bs >> 16) & 0xff) - 1;
       
   179                     if (plane >= 14)
       
   180                         plane = 7;
       
   181                     e.bs = e.bs & 0xffff;
       
   182                 }
       
   183                 db[plane][e.bs] = e.cp;
       
   184                 if (e.cp < 0x10000) {
       
   185                     indexC2B[e.cp>>8] = 1;
       
   186                 } else {
       
   187                     indexC2BSupp[(e.cp&0xffff)>>8] = 1;
       
   188                     suppFlag[e.bs] |= (1 << plane);
       
   189                 }
       
   190             }
       
   191 
       
   192             StringBuilder out = new StringBuilder();
       
   193             Formatter fm = new Formatter(out);
       
   194 
       
   195             fm.format(copyright);
       
   196             fm.format("%n// -- This file was mechanically generated: Do not edit! -- //%n");
       
   197             fm.format("package sun.nio.cs.ext;%n%n");
       
   198             fm.format("class EUC_TWMapping {%n%n");
       
   199 
       
   200             // boundaries
       
   201             fm.format("    final static int b1Min = 0x%x;%n", b1Min);
       
   202             fm.format("    final static int b1Max = 0x%x;%n", b1Max);
       
   203             fm.format("    final static int b2Min = 0x%x;%n", b2Min);
       
   204             fm.format("    final static int b2Max = 0x%x;%n", b2Max);
       
   205 
       
   206             // b2c tables
       
   207             fm.format("%n    final static String[] b2c = {%n");
       
   208             for (int plane = 0; plane < 8; plane++) {
       
   209                 fm.format("        // Plane %d%n", plane);
       
   210                 toString(fm, toCharArray(db[plane],
       
   211                                          b1Min, b1Max, b2Min, b2Max),
       
   212                          ",");
       
   213                 fm.format("%n");
       
   214             }
       
   215             fm.format("    };%n");
       
   216 
       
   217             // c2bIndex
       
   218             fm.format("%n    static final int C2BSIZE = 0x%x;%n",
       
   219                       initC2BIndex(indexC2B));
       
   220             fm.format("%n    static char[] c2bIndex = new char[] {%n");
       
   221             toCharArray(fm, indexC2B);
       
   222             fm.format("    };%n");
       
   223 
       
   224             // c2bIndexSupp
       
   225             fm.format("%n    static final int C2BSUPPSIZE = 0x%x;%n",
       
   226                       initC2BIndex(indexC2BSupp));
       
   227             fm.format("%n    static char[] c2bSuppIndex = new char[] {%n");
       
   228             toCharArray(fm, indexC2BSupp);
       
   229             fm.format("    };%n");
       
   230 
       
   231             // suppFlags
       
   232             fm.format("%n    static String b2cIsSuppStr =%n");
       
   233             toString(fm, toCharArray(suppFlag,
       
   234                                      b1Min, b1Max, b2Min, b2Max),
       
   235                      ";");
       
   236 
       
   237             fm.format("}");
       
   238             fm.close();
       
   239 
       
   240             ps.println(out.toString());
       
   241             ps.close();
       
   242         } catch (Exception x) {
       
   243             x.printStackTrace();
       
   244         }
       
   245     }
       
   246 }