jdk/make/tools/src/build/tools/charsetmapping/GenerateDBCS.java
changeset 5222 a430c36e9f2c
parent 5221 8d57da0c20f6
parent 5211 1d7f06d3f89d
child 5223 44158f6d3b94
equal deleted inserted replaced
5221:8d57da0c20f6 5222:a430c36e9f2c
     1 /*
       
     2  * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 package build.tools.charsetmapping;
       
    27 import java.io.*;
       
    28 import java.util.Arrays;
       
    29 import java.util.ArrayList;
       
    30 import java.util.Scanner;
       
    31 import java.util.Formatter;
       
    32 import java.util.regex.*;
       
    33 import java.nio.charset.*;
       
    34 import static build.tools.charsetmapping.CharsetMapping.*;
       
    35 
       
    36 public class GenerateDBCS {
       
    37     // pattern used by this class to read in mapping table
       
    38     static Pattern mPattern = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
       
    39     public static void genDBCS(String args[]) throws Exception {
       
    40 
       
    41         Scanner s = new Scanner(new File(args[0], args[2]));
       
    42         while (s.hasNextLine()) {
       
    43             String line = s.nextLine();
       
    44             if (line.startsWith("#") || line.length() == 0)
       
    45                 continue;
       
    46             String[] fields = line.split("\\s+");
       
    47             if (fields.length < 10) {
       
    48                 System.err.println("Misconfiged sbcs line <" + line + ">?");
       
    49                 continue;
       
    50             }
       
    51             String clzName = fields[0];
       
    52             String csName  = fields[1];
       
    53             String hisName = ("null".equals(fields[2]))?null:fields[2];
       
    54             String type = fields[3].toUpperCase();
       
    55             if ("BASIC".equals(type))
       
    56                 type = "";
       
    57             else
       
    58                 type = "_" + type;
       
    59             String pkgName  = fields[4];
       
    60             boolean isASCII = Boolean.valueOf(fields[5]);
       
    61             int    b1Min = toInteger(fields[6]);
       
    62             int    b1Max = toInteger(fields[7]);
       
    63             int    b2Min    = toInteger(fields[8]);
       
    64             int    b2Max    = toInteger(fields[9]);
       
    65             System.out.printf("%s,%s,%s,%b,%s%n", clzName, csName, hisName, isASCII, pkgName);
       
    66             genClass(args[0], args[1], "DoubleByte-X.java.template",
       
    67                     clzName, csName, hisName, pkgName,
       
    68                     isASCII, type,
       
    69                     b1Min, b1Max, b2Min, b2Max);
       
    70         }
       
    71     }
       
    72 
       
    73     private static int toInteger(String s) {
       
    74         if (s.startsWith("0x") || s.startsWith("0X"))
       
    75             return Integer.valueOf(s.substring(2), 16);
       
    76         else
       
    77             return Integer.valueOf(s);
       
    78     }
       
    79 
       
    80     private static void outString(Formatter out,
       
    81                                   char[] cc, int off, int end,
       
    82                                   String closure)
       
    83     {
       
    84         while (off < end) {
       
    85             out.format("        \"");
       
    86             for (int j = 0; j < 8; j++) {
       
    87                 if (off == end)
       
    88                     break;
       
    89                 char c = cc[off++];
       
    90                 switch (c) {
       
    91                 case '\b':
       
    92                     out.format("\\b"); break;
       
    93                 case '\t':
       
    94                     out.format("\\t"); break;
       
    95                 case '\n':
       
    96                     out.format("\\n"); break;
       
    97                 case '\f':
       
    98                     out.format("\\f"); break;
       
    99                 case '\r':
       
   100                     out.format("\\r"); break;
       
   101                 case '\"':
       
   102                     out.format("\\\""); break;
       
   103                 case '\'':
       
   104                     out.format("\\'"); break;
       
   105                 case '\\':
       
   106                     out.format("\\\\"); break;
       
   107                 default:
       
   108                     out.format("\\u%04X", c & 0xffff);
       
   109                 }
       
   110             }
       
   111             if (off == end)
       
   112                 out.format("\" %s%n", closure);
       
   113             else
       
   114                 out.format("\" + %n");
       
   115         }
       
   116     }
       
   117 
       
   118     private static void outString(Formatter out,
       
   119                                   char[] db,
       
   120                                   int b1,
       
   121                                   int b2Min, int b2Max,
       
   122                                   String closure)
       
   123     {
       
   124         char[] cc = new char[b2Max - b2Min + 1];
       
   125         int off = 0;
       
   126         for (int b2 = b2Min; b2 <= b2Max; b2++) {
       
   127             cc[off++] = db[(b1 << 8) | b2];
       
   128         }
       
   129         outString(out, cc, 0, cc.length, closure);
       
   130     }
       
   131 
       
   132     private static void genClass(String srcDir, String dstDir, String template,
       
   133                                  String clzName,
       
   134                                  String csName,
       
   135                                  String hisName,
       
   136                                  String pkgName,
       
   137                                  boolean isASCII,
       
   138                                  String type,
       
   139                                  int b1Min, int b1Max,
       
   140                                  int b2Min, int b2Max)
       
   141         throws Exception
       
   142     {
       
   143 
       
   144         StringBuilder b2cSB = new StringBuilder();
       
   145         StringBuilder b2cNRSB = new StringBuilder();
       
   146         StringBuilder c2bNRSB = new StringBuilder();
       
   147 
       
   148         char[] db = new char[0x10000];
       
   149         char[] c2bIndex = new char[0x100];
       
   150         int c2bOff = 0x100;    // first 0x100 for unmappable segs
       
   151 
       
   152         Arrays.fill(db, UNMAPPABLE_DECODING);
       
   153         Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
       
   154 
       
   155         char[] b2cIndex = new char[0x100];
       
   156         Arrays.fill(b2cIndex, UNMAPPABLE_DECODING);
       
   157 
       
   158         // (1)read in .map to parse all b->c entries
       
   159         FileInputStream in = new FileInputStream(new File(srcDir, clzName + ".map"));
       
   160         Parser p = new Parser(in, mPattern);
       
   161         Entry  e = null;
       
   162         while ((e = p.next()) != null) {
       
   163             db[e.bs] = (char)e.cp;
       
   164 
       
   165             if (e.bs > 0x100 &&    // db
       
   166                 b2cIndex[e.bs>>8] == UNMAPPABLE_DECODING) {
       
   167                 b2cIndex[e.bs>>8] = 1;
       
   168             }
       
   169 
       
   170             if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
       
   171                 c2bOff += 0x100;
       
   172                 c2bIndex[e.cp>>8] = 1;
       
   173             }
       
   174         }
       
   175         Formatter fm = new Formatter(b2cSB);
       
   176         fm.format("%n    static final String b2cSBStr =%n");
       
   177         outString(fm, db, 0x00, 0x100,  ";");
       
   178 
       
   179         fm.format("%n        static final String[] b2cStr = {%n");
       
   180         for (int i = 0; i < 0x100; i++) {
       
   181             if (b2cIndex[i] == UNMAPPABLE_DECODING) {
       
   182                 fm.format("            null,%n");  //unmappable segments
       
   183             } else {
       
   184                 outString(fm, db, i, b2Min, b2Max, ",");
       
   185             }
       
   186         }
       
   187 
       
   188         fm.format("        };%n");
       
   189         fm.close();
       
   190 
       
   191         // (2)now parse the .nr file which includes "b->c" non-roundtrip entries
       
   192         File f = new File(srcDir, clzName + ".nr");
       
   193         if (f.exists()) {
       
   194             StringBuilder sb = new StringBuilder();
       
   195             in = new FileInputStream(f);
       
   196             p = new Parser(in, mPattern);
       
   197             e = null;
       
   198             while ((e = p.next()) != null) {
       
   199                 // A <b,c> pair
       
   200                 sb.append((char)e.bs);
       
   201                 sb.append((char)e.cp);
       
   202             }
       
   203             char[] nr = sb.toString().toCharArray();
       
   204             fm = new Formatter(b2cNRSB);
       
   205             fm.format("String b2cNR =%n");
       
   206             outString(fm, nr, 0, nr.length,  ";");
       
   207             fm.close();
       
   208         } else {
       
   209             b2cNRSB.append("String b2cNR = null;");
       
   210         }
       
   211 
       
   212         // (3)finally the .c2b file which includes c->b non-roundtrip entries
       
   213         f = new File(srcDir, clzName + ".c2b");
       
   214         if (f.exists()) {
       
   215             StringBuilder sb = new StringBuilder();
       
   216             in = new FileInputStream(f);
       
   217             p = new Parser(in, mPattern);
       
   218             e = null;
       
   219             while ((e = p.next()) != null) {
       
   220                 // A <b,c> pair
       
   221                 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
       
   222                     c2bOff += 0x100;
       
   223                     c2bIndex[e.cp>>8] = 1;
       
   224                 }
       
   225                 sb.append((char)e.bs);
       
   226                 sb.append((char)e.cp);
       
   227             }
       
   228             char[] nr = sb.toString().toCharArray();
       
   229             fm = new Formatter(c2bNRSB);
       
   230             fm.format("String c2bNR =%n");
       
   231             outString(fm, nr, 0, nr.length,  ";");
       
   232             fm.close();
       
   233         } else {
       
   234             c2bNRSB.append("String c2bNR = null;");
       
   235         }
       
   236 
       
   237         // (4)it's time to generate the source file
       
   238         String b2c = b2cSB.toString();
       
   239         String b2cNR = b2cNRSB.toString();
       
   240         String c2bNR = c2bNRSB.toString();
       
   241 
       
   242         Scanner s = new Scanner(new File(srcDir, template));
       
   243         PrintStream out = new PrintStream(new FileOutputStream(
       
   244                               new File(dstDir, clzName + ".java")));
       
   245         if (hisName == null)
       
   246             hisName = "";
       
   247 
       
   248         while (s.hasNextLine()) {
       
   249             String line = s.nextLine();
       
   250             if (line.indexOf("$") == -1) {
       
   251                 out.println(line);
       
   252                 continue;
       
   253             }
       
   254             line = line.replace("$PACKAGE$" , pkgName)
       
   255                        .replace("$IMPLEMENTS$", (hisName == null)?
       
   256                                 "" : "implements HistoricallyNamedCharset")
       
   257                        .replace("$NAME_CLZ$", clzName)
       
   258                        .replace("$NAME_ALIASES$",
       
   259                                 "sun.nio.cs".equals(pkgName) ?
       
   260                                 "StandardCharsets.aliases_" + clzName :
       
   261                                 "ExtendedCharsets.aliasesFor(\"" + csName + "\")")
       
   262                        .replace("$NAME_CS$" , csName)
       
   263                        .replace("$CONTAINS$",
       
   264                                 "MS932".equals(clzName)?
       
   265                                 "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof JIS_X_0201) || (cs instanceof " + clzName + "));":
       
   266                                 (isASCII ?
       
   267                                  "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));":
       
   268                                  "return (cs instanceof " + clzName + ");"))
       
   269                        .replace("$HISTORICALNAME$",
       
   270                                 (hisName == null)? "" :
       
   271                                 "    public String historicalName() { return \"" + hisName + "\"; }")
       
   272                        .replace("$DECTYPE$", type)
       
   273                        .replace("$ENCTYPE$", type)
       
   274                        .replace("$B1MIN$"   , "0x" + Integer.toString(b1Min, 16))
       
   275                        .replace("$B1MAX$"   , "0x" + Integer.toString(b1Max, 16))
       
   276                        .replace("$B2MIN$"   , "0x" + Integer.toString(b2Min, 16))
       
   277                        .replace("$B2MAX$"   , "0x" + Integer.toString(b2Max, 16))
       
   278                        .replace("$B2C$", b2c)
       
   279                        .replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16))
       
   280                        .replace("$NONROUNDTRIP_B2C$", b2cNR)
       
   281                        .replace("$NONROUNDTRIP_C2B$", c2bNR);
       
   282 
       
   283             out.println(line);
       
   284         }
       
   285         out.close();
       
   286     }
       
   287 }