make/jdk/src/classes/build/tools/generatecharacter/PropList.java
author pliden
Thu, 26 Sep 2019 13:56:58 +0200
changeset 58355 de246fd65587
parent 58028 7ac4273bb49b
permissions -rw-r--r--
8231294: ZGC: vmTestbase/nsk/jvmti/ResourceExhausted/resexhausted002 fails Reviewed-by: shade, dholmes
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     1
/*
58028
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
     2
 * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     4
 *
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    10
 *
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    15
 * accompanied this code).
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    16
 *
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    20
 *
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    23
 * questions.
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    24
 */
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    25
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    26
package build.tools.generatecharacter;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    27
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    28
import java.util.regex.*;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    29
import java.util.*;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    30
import java.io.*;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    31
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    32
/**
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    33
 * A PropList object contains the lists of code points that have
58028
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    34
 * the same Unicode property defined in PropList.txt and
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    35
 * DerivedCoreProperties.txt
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    36
 *
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    37
 * @author Xueming Shen
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    38
 */
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    39
public class PropList {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    40
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    41
    public static PropList readSpecFile(File file, int plane)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    42
        throws IOException
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    43
    {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    44
        return new PropList(file, plane);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    45
    }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    46
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    47
    public List<Integer> codepoints(String name) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    48
        return propMap.get(name);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    49
    }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    50
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    51
    public Set<String> names() {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    52
        return propMap.keySet();
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    53
    }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    54
58028
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    55
    public void putAll(PropList pl) {
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    56
        pl.names().stream()
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    57
            .forEach(name -> propMap.put(name, pl.codepoints(name)));
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    58
    }
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    59
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    60
    private Map<String, List<Integer>> propMap =
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    61
        new LinkedHashMap<String, List<Integer>>();
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    62
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    63
    private PropList(File file, int plane) throws IOException {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    64
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    65
        int i, j;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    66
        BufferedReader sbfr = new BufferedReader(new FileReader(file));
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    67
        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    68
        String line = null;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    69
        int lineNo = 0;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    70
        while ((line = sbfr.readLine()) != null) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    71
            lineNo++;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    72
            if (line.length() <= 1 || line.charAt(0) == '#') {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    73
                continue;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    74
            }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    75
            m.reset(line);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    76
            if (m.matches()) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    77
                int start = Integer.parseInt(m.group(1), 16);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    78
                if ((start >> 16) != plane)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    79
                    continue;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    80
                int end = (m.group(2)==null)?start
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    81
                          :Integer.parseInt(m.group(2), 16);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    82
                String name = m.group(3);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    83
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    84
                start &= 0xffff;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    85
                end &= 0xffff;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    86
58028
7ac4273bb49b 8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
naoto
parents: 47216
diff changeset
    87
                List<Integer> list = propMap.get(name);
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    88
                if (list == null) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    89
                    list = new ArrayList<Integer>();
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    90
                    propMap.put(name, list);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    91
                }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    92
                while (start <= end)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    93
                    list.add(start++);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    94
            } else {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    95
                System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    96
            }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    97
        }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    98
        sbfr.close();
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
    99
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   100
        //for (String name: propMap.keySet()) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   101
        //    System.out.printf("%s    %d%n", name, propMap.get(name).size());
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   102
        //}
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   103
    }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   104
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   105
    public static void main(String[] args) throws IOException {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   106
        readSpecFile(new File(args[0]), Integer.decode(args[1]));
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   107
    }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents:
diff changeset
   108
}