author | pliden |
Thu, 26 Sep 2019 13:56:58 +0200 | |
changeset 58355 | de246fd65587 |
parent 58028 | 7ac4273bb49b |
permissions | -rw-r--r-- |
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
1 |
/* |
58028
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
2 |
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. |
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
4 |
* |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
5 |
* This code is free software; you can redistribute it and/or modify it |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
7 |
* published by the Free Software Foundation. Oracle designates this |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
8 |
* particular file as subject to the "Classpath" exception as provided |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
10 |
* |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
15 |
* accompanied this code). |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
16 |
* |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
17 |
* You should have received a copy of the GNU General Public License version |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
20 |
* |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
22 |
* or visit www.oracle.com if you need additional information or have any |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
23 |
* questions. |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
24 |
*/ |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
25 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
26 |
package build.tools.generatecharacter; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
27 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
28 |
import java.util.regex.*; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
29 |
import java.util.*; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
30 |
import java.io.*; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
31 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
32 |
/** |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
33 |
* A PropList object contains the lists of code points that have |
58028
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
34 |
* the same Unicode property defined in PropList.txt and |
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
35 |
* DerivedCoreProperties.txt |
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
36 |
* |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
37 |
* @author Xueming Shen |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
38 |
*/ |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
39 |
public class PropList { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
40 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
41 |
public static PropList readSpecFile(File file, int plane) |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
42 |
throws IOException |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
43 |
{ |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
44 |
return new PropList(file, plane); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
45 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
46 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
47 |
public List<Integer> codepoints(String name) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
48 |
return propMap.get(name); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
49 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
50 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
51 |
public Set<String> names() { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
52 |
return propMap.keySet(); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
53 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
54 |
|
58028
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
55 |
public void putAll(PropList pl) { |
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
56 |
pl.names().stream() |
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
57 |
.forEach(name -> propMap.put(name, pl.codepoints(name))); |
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
58 |
} |
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
59 |
|
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
60 |
private Map<String, List<Integer>> propMap = |
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
61 |
new LinkedHashMap<String, List<Integer>>(); |
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
62 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
63 |
private PropList(File file, int plane) throws IOException { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
64 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
65 |
int i, j; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
66 |
BufferedReader sbfr = new BufferedReader(new FileReader(file)); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
67 |
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher(""); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
68 |
String line = null; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
69 |
int lineNo = 0; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
70 |
while ((line = sbfr.readLine()) != null) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
71 |
lineNo++; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
72 |
if (line.length() <= 1 || line.charAt(0) == '#') { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
73 |
continue; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
74 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
75 |
m.reset(line); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
76 |
if (m.matches()) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
77 |
int start = Integer.parseInt(m.group(1), 16); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
78 |
if ((start >> 16) != plane) |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
79 |
continue; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
80 |
int end = (m.group(2)==null)?start |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
81 |
:Integer.parseInt(m.group(2), 16); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
82 |
String name = m.group(3); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
83 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
84 |
start &= 0xffff; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
85 |
end &= 0xffff; |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
86 |
|
58028
7ac4273bb49b
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents:
47216
diff
changeset
|
87 |
List<Integer> list = propMap.get(name); |
9535
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
88 |
if (list == null) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
89 |
list = new ArrayList<Integer>(); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
90 |
propMap.put(name, list); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
91 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
92 |
while (start <= end) |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
93 |
list.add(start++); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
94 |
} else { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
95 |
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
96 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
97 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
98 |
sbfr.close(); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
99 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
100 |
//for (String name: propMap.keySet()) { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
101 |
// System.out.printf("%s %d%n", name, propMap.get(name).size()); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
102 |
//} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
103 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
104 |
|
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
105 |
public static void main(String[] args) throws IOException { |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
106 |
readSpecFile(new File(args[0]), Integer.decode(args[1])); |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
107 |
} |
d930011fd275
7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff
changeset
|
108 |
} |