jdk/make/src/classes/build/tools/generatecharacter/CharacterScript.java
changeset 21805 c7d7946239de
parent 12317 9670c1610c53
equal deleted inserted replaced
21804:07b686da11c4 21805:c7d7946239de
       
     1 package build.tools.generatecharacter;
       
     2 
       
     3 import java.util.regex.*;
       
     4 import java.util.*;
       
     5 import java.io.*;
       
     6 
       
     7 public class CharacterScript {
       
     8 
       
     9     // generate the code needed for j.l.C.UnicodeScript
       
    10     static void fortest(String fmt, Object... o) {
       
    11         //System.out.printf(fmt, o);
       
    12     }
       
    13 
       
    14     static void print(String fmt, Object... o) {
       
    15         System.out.printf(fmt, o);
       
    16     }
       
    17 
       
    18     static void debug(String fmt, Object... o) {
       
    19         //System.out.printf(fmt, o);
       
    20     }
       
    21 
       
    22     public static void main(String args[]){
       
    23         try {
       
    24             if (args.length != 1) {
       
    25                 System.out.println("java CharacterScript script.txt out");
       
    26                 System.exit(1);
       
    27             }
       
    28 
       
    29             int i, j;
       
    30             BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
       
    31             HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
       
    32             String line = null;
       
    33 
       
    34             Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
       
    35 
       
    36             int prevS = -1;
       
    37             int prevE = -1;
       
    38             String prevN = null;
       
    39             int[][] scripts = new int[1024][3];
       
    40             int scriptSize = 0;
       
    41 
       
    42             while ((line = sbfr.readLine()) != null) {
       
    43                 if (line.length() <= 1 || line.charAt(0) == '#') {
       
    44                     continue;
       
    45                 }
       
    46                 m.reset(line);
       
    47                 if (m.matches()) {
       
    48                     int start = Integer.parseInt(m.group(1), 16);
       
    49                     int end = (m.group(2)==null)?start
       
    50                               :Integer.parseInt(m.group(2), 16);
       
    51                     String name = m.group(3);
       
    52                     if (name.equals(prevN) && start == prevE + 1) {
       
    53                         prevE = end;
       
    54                     } else {
       
    55                         if (prevS != -1) {
       
    56                             if (scriptMap.get(prevN) == null) {
       
    57                                 scriptMap.put(prevN, scriptMap.size());
       
    58                             }
       
    59                             scripts[scriptSize][0] = prevS;
       
    60                             scripts[scriptSize][1] = prevE;
       
    61                             scripts[scriptSize][2] = scriptMap.get(prevN);
       
    62                             scriptSize++;
       
    63                         }
       
    64                         debug("%x-%x\t%s%n", prevS, prevE, prevN);
       
    65                         prevS = start; prevE = end; prevN = name;
       
    66                     }
       
    67                 } else {
       
    68                     debug("Warning: Unrecognized line <%s>%n", line);
       
    69                 }
       
    70             }
       
    71 
       
    72             //last one.
       
    73             if (scriptMap.get(prevN) == null) {
       
    74                 scriptMap.put(prevN, scriptMap.size());
       
    75             }
       
    76             scripts[scriptSize][0] = prevS;
       
    77             scripts[scriptSize][1] = prevE;
       
    78             scripts[scriptSize][2] = scriptMap.get(prevN);
       
    79             scriptSize++;
       
    80 
       
    81             debug("%x-%x\t%s%n", prevS, prevE, prevN);
       
    82             debug("-----------------%n");
       
    83             debug("Total scripts=%s%n", scriptMap.size());
       
    84             debug("-----------------%n%n");
       
    85 
       
    86             String[] names = new String[scriptMap.size()];
       
    87             for (String name: scriptMap.keySet()) {
       
    88                 names[scriptMap.get(name).intValue()] = name;
       
    89             }
       
    90 
       
    91             for (j = 0; j < scriptSize; j++) {
       
    92                 for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
       
    93                     String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
       
    94                     if (cp > 0xffff)
       
    95                         System.out.printf("%05X    %s%n", cp, name);
       
    96                     else
       
    97                         System.out.printf("%05X    %s%n", cp, name);
       
    98                 }
       
    99             }
       
   100 
       
   101             Arrays.sort(scripts, 0, scriptSize,
       
   102                         new Comparator<int[]>() {
       
   103                             public int compare(int[] a1, int[] a2) {
       
   104                                 return a1[0] - a2[0];
       
   105                             }
       
   106                             public boolean compare(Object obj) {
       
   107                                 return obj == this;
       
   108                             }
       
   109                          });
       
   110 
       
   111 
       
   112 
       
   113             // Consolidation: there are lots of "reserved" code points
       
   114             // embedded in those otherwise "sequential" blocks.
       
   115             // To make the lookup table smaller, we combine those
       
   116             // separated segments with the assumption that the lookup
       
   117             // implementation checks
       
   118             //    Character.getType() !=  Character.UNASSIGNED
       
   119             // first (return UNKNOWN for unassigned)
       
   120 
       
   121             ArrayList<int[]> list = new ArrayList();
       
   122             list.add(scripts[0]);
       
   123 
       
   124             int[] last = scripts[0];
       
   125             for (i = 1; i < scriptSize; i++) {
       
   126                 if (scripts[i][0] != (last[1] + 1)) {
       
   127 
       
   128                     boolean isNotUnassigned = false;
       
   129                     for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
       
   130                         if (Character.getType(cp) != Character.UNASSIGNED) {
       
   131                             isNotUnassigned = true;
       
   132                             debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
       
   133                             break;
       
   134                         }
       
   135                     }
       
   136                     if (isNotUnassigned) {
       
   137                         // surrogates only?
       
   138                         int[] a = new int[3];
       
   139                         a[0] = last[1] + 1;
       
   140                         a[1] = scripts[i][0] - 1;
       
   141                         a[2] = -1;  // unknown
       
   142                         list.add(a);
       
   143                     } else {
       
   144                         if (last[2] == scripts[i][2]) {
       
   145                             //combine
       
   146                             last[1] = scripts[i][1];
       
   147                             continue;
       
   148                         } else {
       
   149                             // expand last
       
   150                             last[1] = scripts[i][0] - 1;
       
   151                         }
       
   152                     }
       
   153                 }
       
   154                 list.add(scripts[i]);
       
   155                 last = scripts[i];
       
   156             }
       
   157 
       
   158             for (i = 0; i < list.size(); i++) {
       
   159                 int[] a = (int[])list.get(i);
       
   160                 String name = "UNKNOWN";
       
   161                 if (a[2] != -1)
       
   162                     name = names[a[2]].toUpperCase(Locale.US);
       
   163                 debug("0x%05x, 0x%05x  %s%n", a[0], a[1], name);
       
   164             }
       
   165             debug("--->total=%d%n", list.size());
       
   166 
       
   167 
       
   168             //////////////////OUTPUT//////////////////////////////////
       
   169             print("public class Scripts {%n%n");
       
   170             print("    public static enum UnicodeScript {%n");
       
   171             for (i = 0; i < names.length; i++) {
       
   172                 print("        /**%n         * Unicode script \"%s\".%n         */%n", names[i]);
       
   173                 print("        %s,%n%n",  names[i].toUpperCase(Locale.US));
       
   174             }
       
   175             print("        /**%n         * Unicode script \"Unknown\".%n         */%n        UNKNOWN;%n%n");
       
   176 
       
   177 
       
   178             // lookup table
       
   179             print("        private static final int[] scriptStarts = {%n");
       
   180             for (int[] a : list) {
       
   181                 String name = "UNKNOWN";
       
   182                 if (a[2] != -1)
       
   183                     name = names[a[2]].toUpperCase(Locale.US);
       
   184                 if (a[0] < 0x10000)
       
   185                     print("            0x%04X,   // %04X..%04X; %s%n",
       
   186                           a[0], a[0], a[1], name);
       
   187                 else
       
   188                     print("            0x%05X,  // %05X..%05X; %s%n",
       
   189                           a[0], a[0], a[1], name);
       
   190             }
       
   191             last = list.get(list.size() -1);
       
   192             if (last[1] != Character.MAX_CODE_POINT)
       
   193                 print("            0x%05X   // %05X..%06X; %s%n",
       
   194                       last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
       
   195                       "UNKNOWN");
       
   196             print("%n        };%n%n");
       
   197 
       
   198             print("        private static final UnicodeScript[] scripts = {%n");
       
   199             for (int[] a : list) {
       
   200                 String name = "UNKNOWN";
       
   201                 if (a[2] != -1)
       
   202                     name = names[a[2]].toUpperCase(Locale.US);
       
   203                 print("            %s,%n", name);
       
   204             }
       
   205 
       
   206             if (last[1] != Character.MAX_CODE_POINT)
       
   207                 print("            UNKNOWN%n");
       
   208             print("        };%n");
       
   209             print("    }%n");
       
   210             print("}%n");
       
   211 
       
   212         } catch (Exception e) {
       
   213             e.printStackTrace();
       
   214         }
       
   215     }
       
   216 }