author | ihse |
Thu, 14 Nov 2013 11:19:32 +0100 | |
changeset 21805 | c7d7946239de |
parent 12317 | jdk/make/tools/src/build/tools/generatecharacter/CharacterScript.java@9670c1610c53 |
permissions | -rw-r--r-- |
12317
9670c1610c53
7074397: Build infrastructure changes (makefile re-write)
ohair
parents:
5610
diff
changeset
|
1 |
package build.tools.generatecharacter; |
9670c1610c53
7074397: Build infrastructure changes (makefile re-write)
ohair
parents:
5610
diff
changeset
|
2 |
|
5610 | 3 |
import java.util.regex.*; |
4 |
import java.util.*; |
|
5 |
import java.io.*; |
|
6 |
||
7 |
public class CharacterScript { |
|
8 |
||
9 |
// generate the code needed for j.l.C.UnicodeScript |
|
10 |
static void fortest(String fmt, Object... o) { |
|
11 |
//System.out.printf(fmt, o); |
|
12 |
} |
|
13 |
||
14 |
static void print(String fmt, Object... o) { |
|
15 |
System.out.printf(fmt, o); |
|
16 |
} |
|
17 |
||
18 |
static void debug(String fmt, Object... o) { |
|
19 |
//System.out.printf(fmt, o); |
|
20 |
} |
|
21 |
||
22 |
public static void main(String args[]){ |
|
23 |
try { |
|
24 |
if (args.length != 1) { |
|
25 |
System.out.println("java CharacterScript script.txt out"); |
|
26 |
System.exit(1); |
|
27 |
} |
|
28 |
||
29 |
int i, j; |
|
30 |
BufferedReader sbfr = new BufferedReader(new FileReader(args[0])); |
|
31 |
HashMap<String,Integer> scriptMap = new HashMap<String,Integer>(); |
|
32 |
String line = null; |
|
33 |
||
34 |
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); |
|
35 |
||
36 |
int prevS = -1; |
|
37 |
int prevE = -1; |
|
38 |
String prevN = null; |
|
39 |
int[][] scripts = new int[1024][3]; |
|
40 |
int scriptSize = 0; |
|
41 |
||
42 |
while ((line = sbfr.readLine()) != null) { |
|
43 |
if (line.length() <= 1 || line.charAt(0) == '#') { |
|
44 |
continue; |
|
45 |
} |
|
46 |
m.reset(line); |
|
47 |
if (m.matches()) { |
|
48 |
int start = Integer.parseInt(m.group(1), 16); |
|
49 |
int end = (m.group(2)==null)?start |
|
50 |
:Integer.parseInt(m.group(2), 16); |
|
51 |
String name = m.group(3); |
|
52 |
if (name.equals(prevN) && start == prevE + 1) { |
|
53 |
prevE = end; |
|
54 |
} else { |
|
55 |
if (prevS != -1) { |
|
56 |
if (scriptMap.get(prevN) == null) { |
|
57 |
scriptMap.put(prevN, scriptMap.size()); |
|
58 |
} |
|
59 |
scripts[scriptSize][0] = prevS; |
|
60 |
scripts[scriptSize][1] = prevE; |
|
61 |
scripts[scriptSize][2] = scriptMap.get(prevN); |
|
62 |
scriptSize++; |
|
63 |
} |
|
64 |
debug("%x-%x\t%s%n", prevS, prevE, prevN); |
|
65 |
prevS = start; prevE = end; prevN = name; |
|
66 |
} |
|
67 |
} else { |
|
68 |
debug("Warning: Unrecognized line <%s>%n", line); |
|
69 |
} |
|
70 |
} |
|
71 |
||
72 |
//last one. |
|
73 |
if (scriptMap.get(prevN) == null) { |
|
74 |
scriptMap.put(prevN, scriptMap.size()); |
|
75 |
} |
|
76 |
scripts[scriptSize][0] = prevS; |
|
77 |
scripts[scriptSize][1] = prevE; |
|
78 |
scripts[scriptSize][2] = scriptMap.get(prevN); |
|
79 |
scriptSize++; |
|
80 |
||
81 |
debug("%x-%x\t%s%n", prevS, prevE, prevN); |
|
82 |
debug("-----------------%n"); |
|
83 |
debug("Total scripts=%s%n", scriptMap.size()); |
|
84 |
debug("-----------------%n%n"); |
|
85 |
||
86 |
String[] names = new String[scriptMap.size()]; |
|
87 |
for (String name: scriptMap.keySet()) { |
|
88 |
names[scriptMap.get(name).intValue()] = name; |
|
89 |
} |
|
90 |
||
91 |
for (j = 0; j < scriptSize; j++) { |
|
92 |
for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) { |
|
93 |
String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);; |
|
94 |
if (cp > 0xffff) |
|
95 |
System.out.printf("%05X %s%n", cp, name); |
|
96 |
else |
|
97 |
System.out.printf("%05X %s%n", cp, name); |
|
98 |
} |
|
99 |
} |
|
100 |
||
101 |
Arrays.sort(scripts, 0, scriptSize, |
|
102 |
new Comparator<int[]>() { |
|
103 |
public int compare(int[] a1, int[] a2) { |
|
104 |
return a1[0] - a2[0]; |
|
105 |
} |
|
106 |
public boolean compare(Object obj) { |
|
107 |
return obj == this; |
|
108 |
} |
|
109 |
}); |
|
110 |
||
111 |
||
112 |
||
113 |
// Consolidation: there are lots of "reserved" code points |
|
114 |
// embedded in those otherwise "sequential" blocks. |
|
115 |
// To make the lookup table smaller, we combine those |
|
116 |
// separated segments with the assumption that the lookup |
|
117 |
// implementation checks |
|
118 |
// Character.getType() != Character.UNASSIGNED |
|
119 |
// first (return UNKNOWN for unassigned) |
|
120 |
||
121 |
ArrayList<int[]> list = new ArrayList(); |
|
122 |
list.add(scripts[0]); |
|
123 |
||
124 |
int[] last = scripts[0]; |
|
125 |
for (i = 1; i < scriptSize; i++) { |
|
126 |
if (scripts[i][0] != (last[1] + 1)) { |
|
127 |
||
128 |
boolean isNotUnassigned = false; |
|
129 |
for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) { |
|
130 |
if (Character.getType(cp) != Character.UNASSIGNED) { |
|
131 |
isNotUnassigned = true; |
|
132 |
debug("Warning: [%x] is ASSIGNED but in NON script%n", cp); |
|
133 |
break; |
|
134 |
} |
|
135 |
} |
|
136 |
if (isNotUnassigned) { |
|
137 |
// surrogates only? |
|
138 |
int[] a = new int[3]; |
|
139 |
a[0] = last[1] + 1; |
|
140 |
a[1] = scripts[i][0] - 1; |
|
141 |
a[2] = -1; // unknown |
|
142 |
list.add(a); |
|
143 |
} else { |
|
144 |
if (last[2] == scripts[i][2]) { |
|
145 |
//combine |
|
146 |
last[1] = scripts[i][1]; |
|
147 |
continue; |
|
148 |
} else { |
|
149 |
// expand last |
|
150 |
last[1] = scripts[i][0] - 1; |
|
151 |
} |
|
152 |
} |
|
153 |
} |
|
154 |
list.add(scripts[i]); |
|
155 |
last = scripts[i]; |
|
156 |
} |
|
157 |
||
158 |
for (i = 0; i < list.size(); i++) { |
|
159 |
int[] a = (int[])list.get(i); |
|
160 |
String name = "UNKNOWN"; |
|
161 |
if (a[2] != -1) |
|
162 |
name = names[a[2]].toUpperCase(Locale.US); |
|
163 |
debug("0x%05x, 0x%05x %s%n", a[0], a[1], name); |
|
164 |
} |
|
165 |
debug("--->total=%d%n", list.size()); |
|
166 |
||
167 |
||
168 |
//////////////////OUTPUT////////////////////////////////// |
|
169 |
print("public class Scripts {%n%n"); |
|
170 |
print(" public static enum UnicodeScript {%n"); |
|
171 |
for (i = 0; i < names.length; i++) { |
|
172 |
print(" /**%n * Unicode script \"%s\".%n */%n", names[i]); |
|
173 |
print(" %s,%n%n", names[i].toUpperCase(Locale.US)); |
|
174 |
} |
|
175 |
print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n"); |
|
176 |
||
177 |
||
178 |
// lookup table |
|
179 |
print(" private static final int[] scriptStarts = {%n"); |
|
180 |
for (int[] a : list) { |
|
181 |
String name = "UNKNOWN"; |
|
182 |
if (a[2] != -1) |
|
183 |
name = names[a[2]].toUpperCase(Locale.US); |
|
184 |
if (a[0] < 0x10000) |
|
185 |
print(" 0x%04X, // %04X..%04X; %s%n", |
|
186 |
a[0], a[0], a[1], name); |
|
187 |
else |
|
188 |
print(" 0x%05X, // %05X..%05X; %s%n", |
|
189 |
a[0], a[0], a[1], name); |
|
190 |
} |
|
191 |
last = list.get(list.size() -1); |
|
192 |
if (last[1] != Character.MAX_CODE_POINT) |
|
193 |
print(" 0x%05X // %05X..%06X; %s%n", |
|
194 |
last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT, |
|
195 |
"UNKNOWN"); |
|
196 |
print("%n };%n%n"); |
|
197 |
||
198 |
print(" private static final UnicodeScript[] scripts = {%n"); |
|
199 |
for (int[] a : list) { |
|
200 |
String name = "UNKNOWN"; |
|
201 |
if (a[2] != -1) |
|
202 |
name = names[a[2]].toUpperCase(Locale.US); |
|
203 |
print(" %s,%n", name); |
|
204 |
} |
|
205 |
||
206 |
if (last[1] != Character.MAX_CODE_POINT) |
|
207 |
print(" UNKNOWN%n"); |
|
208 |
print(" };%n"); |
|
209 |
print(" }%n"); |
|
210 |
print("}%n"); |
|
211 |
||
212 |
} catch (Exception e) { |
|
213 |
e.printStackTrace(); |
|
214 |
} |
|
215 |
} |
|
216 |
} |