--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/make/src/classes/build/tools/generatecharacter/CharacterScript.java Thu Nov 14 11:19:32 2013 +0100
@@ -0,0 +1,216 @@
+package build.tools.generatecharacter;
+
+import java.util.regex.*;
+import java.util.*;
+import java.io.*;
+
+public class CharacterScript {
+
+ // generate the code needed for j.l.C.UnicodeScript
+ static void fortest(String fmt, Object... o) {
+ //System.out.printf(fmt, o);
+ }
+
+ static void print(String fmt, Object... o) {
+ System.out.printf(fmt, o);
+ }
+
+ static void debug(String fmt, Object... o) {
+ //System.out.printf(fmt, o);
+ }
+
+ public static void main(String args[]){
+ try {
+ if (args.length != 1) {
+ System.out.println("java CharacterScript script.txt out");
+ System.exit(1);
+ }
+
+ int i, j;
+ BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
+ HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
+ String line = null;
+
+ Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
+
+ int prevS = -1;
+ int prevE = -1;
+ String prevN = null;
+ int[][] scripts = new int[1024][3];
+ int scriptSize = 0;
+
+ while ((line = sbfr.readLine()) != null) {
+ if (line.length() <= 1 || line.charAt(0) == '#') {
+ continue;
+ }
+ m.reset(line);
+ if (m.matches()) {
+ int start = Integer.parseInt(m.group(1), 16);
+ int end = (m.group(2)==null)?start
+ :Integer.parseInt(m.group(2), 16);
+ String name = m.group(3);
+ if (name.equals(prevN) && start == prevE + 1) {
+ prevE = end;
+ } else {
+ if (prevS != -1) {
+ if (scriptMap.get(prevN) == null) {
+ scriptMap.put(prevN, scriptMap.size());
+ }
+ scripts[scriptSize][0] = prevS;
+ scripts[scriptSize][1] = prevE;
+ scripts[scriptSize][2] = scriptMap.get(prevN);
+ scriptSize++;
+ }
+ debug("%x-%x\t%s%n", prevS, prevE, prevN);
+ prevS = start; prevE = end; prevN = name;
+ }
+ } else {
+ debug("Warning: Unrecognized line <%s>%n", line);
+ }
+ }
+
+ //last one.
+ if (scriptMap.get(prevN) == null) {
+ scriptMap.put(prevN, scriptMap.size());
+ }
+ scripts[scriptSize][0] = prevS;
+ scripts[scriptSize][1] = prevE;
+ scripts[scriptSize][2] = scriptMap.get(prevN);
+ scriptSize++;
+
+ debug("%x-%x\t%s%n", prevS, prevE, prevN);
+ debug("-----------------%n");
+ debug("Total scripts=%s%n", scriptMap.size());
+ debug("-----------------%n%n");
+
+ String[] names = new String[scriptMap.size()];
+ for (String name: scriptMap.keySet()) {
+ names[scriptMap.get(name).intValue()] = name;
+ }
+
+ for (j = 0; j < scriptSize; j++) {
+ for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
+ String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
+ if (cp > 0xffff)
+ System.out.printf("%05X %s%n", cp, name);
+ else
+ System.out.printf("%05X %s%n", cp, name);
+ }
+ }
+
+ Arrays.sort(scripts, 0, scriptSize,
+ new Comparator<int[]>() {
+ public int compare(int[] a1, int[] a2) {
+ return a1[0] - a2[0];
+ }
+ public boolean compare(Object obj) {
+ return obj == this;
+ }
+ });
+
+
+
+ // Consolidation: there are lots of "reserved" code points
+ // embedded in those otherwise "sequential" blocks.
+ // To make the lookup table smaller, we combine those
+ // separated segments with the assumption that the lookup
+ // implementation checks
+ // Character.getType() != Character.UNASSIGNED
+ // first (return UNKNOWN for unassigned)
+
+ ArrayList<int[]> list = new ArrayList();
+ list.add(scripts[0]);
+
+ int[] last = scripts[0];
+ for (i = 1; i < scriptSize; i++) {
+ if (scripts[i][0] != (last[1] + 1)) {
+
+ boolean isNotUnassigned = false;
+ for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
+ if (Character.getType(cp) != Character.UNASSIGNED) {
+ isNotUnassigned = true;
+ debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
+ break;
+ }
+ }
+ if (isNotUnassigned) {
+ // surrogates only?
+ int[] a = new int[3];
+ a[0] = last[1] + 1;
+ a[1] = scripts[i][0] - 1;
+ a[2] = -1; // unknown
+ list.add(a);
+ } else {
+ if (last[2] == scripts[i][2]) {
+ //combine
+ last[1] = scripts[i][1];
+ continue;
+ } else {
+ // expand last
+ last[1] = scripts[i][0] - 1;
+ }
+ }
+ }
+ list.add(scripts[i]);
+ last = scripts[i];
+ }
+
+ for (i = 0; i < list.size(); i++) {
+ int[] a = (int[])list.get(i);
+ String name = "UNKNOWN";
+ if (a[2] != -1)
+ name = names[a[2]].toUpperCase(Locale.US);
+ debug("0x%05x, 0x%05x %s%n", a[0], a[1], name);
+ }
+ debug("--->total=%d%n", list.size());
+
+
+ //////////////////OUTPUT//////////////////////////////////
+ print("public class Scripts {%n%n");
+ print(" public static enum UnicodeScript {%n");
+ for (i = 0; i < names.length; i++) {
+ print(" /**%n * Unicode script \"%s\".%n */%n", names[i]);
+ print(" %s,%n%n", names[i].toUpperCase(Locale.US));
+ }
+ print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n");
+
+
+ // lookup table
+ print(" private static final int[] scriptStarts = {%n");
+ for (int[] a : list) {
+ String name = "UNKNOWN";
+ if (a[2] != -1)
+ name = names[a[2]].toUpperCase(Locale.US);
+ if (a[0] < 0x10000)
+ print(" 0x%04X, // %04X..%04X; %s%n",
+ a[0], a[0], a[1], name);
+ else
+ print(" 0x%05X, // %05X..%05X; %s%n",
+ a[0], a[0], a[1], name);
+ }
+ last = list.get(list.size() -1);
+ if (last[1] != Character.MAX_CODE_POINT)
+ print(" 0x%05X // %05X..%06X; %s%n",
+ last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
+ "UNKNOWN");
+ print("%n };%n%n");
+
+ print(" private static final UnicodeScript[] scripts = {%n");
+ for (int[] a : list) {
+ String name = "UNKNOWN";
+ if (a[2] != -1)
+ name = names[a[2]].toUpperCase(Locale.US);
+ print(" %s,%n", name);
+ }
+
+ if (last[1] != Character.MAX_CODE_POINT)
+ print(" UNKNOWN%n");
+ print(" };%n");
+ print(" }%n");
+ print("}%n");
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+}