test/jdk/java/lang/Character/CheckScript.java
author mikael
Mon, 29 Jul 2019 09:59:04 -0700
changeset 57584 9d82a35b6ff7
parent 55013 8dae495a59e7
permissions -rw-r--r--
Merge
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     1
/*
55013
8dae495a59e7 8221431: Support for Unicode 12.1
naoto
parents: 47216
diff changeset
     2
 * Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     4
 *
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     7
 * published by the Free Software Foundation.
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     8
 *
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    13
 * accompanied this code).
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    14
 *
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    18
 *
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    21
 * questions.
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    22
 */
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    23
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    24
/**
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    25
 * @test
55013
8dae495a59e7 8221431: Support for Unicode 12.1
naoto
parents: 47216
diff changeset
    26
 * @bug 6945564 6959267 7033561 7070436 7198195 8032446 8072600 8221431
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    27
 * @summary  Check that the j.l.Character.UnicodeScript
55013
8dae495a59e7 8221431: Support for Unicode 12.1
naoto
parents: 47216
diff changeset
    28
 * @library /lib/testlibrary/java/lang
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    29
 */
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    30
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    31
import java.io.*;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    32
import java.util.*;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    33
import java.util.regex.*;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    34
import java.lang.Character.UnicodeScript;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    35
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    36
public class CheckScript {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    37
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    38
    public static void main(String[] args) throws Exception {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    39
        File fScripts;
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    40
        File fAliases;
7247
20bd166a1ad6 6959267: Support Unicode 6.0.0
peytoia
parents: 5610
diff changeset
    41
        if (args.length == 0) {
55013
8dae495a59e7 8221431: Support for Unicode 12.1
naoto
parents: 47216
diff changeset
    42
            fScripts = UCDFiles.SCRIPTS.toFile();
8dae495a59e7 8221431: Support for Unicode 12.1
naoto
parents: 47216
diff changeset
    43
            fAliases = UCDFiles.PROPERTY_VALUE_ALIASES.toFile();
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    44
        } else if (args.length == 2) {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    45
            fScripts = new File(args[0]);
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    46
            fAliases = new File(args[1]);
7247
20bd166a1ad6 6959267: Support Unicode 6.0.0
peytoia
parents: 5610
diff changeset
    47
        } else {
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    48
            System.out.println("java CharacterScript Scripts.txt PropertyValueAliases.txt");
7247
20bd166a1ad6 6959267: Support Unicode 6.0.0
peytoia
parents: 5610
diff changeset
    49
            throw new RuntimeException("Datafile name should be specified.");
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    50
        }
8543
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    51
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    52
        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    53
        String line = null;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    54
        HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
    55
        try (BufferedReader sbfr = new BufferedReader(new FileReader(fScripts))) {
8543
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    56
            while ((line = sbfr.readLine()) != null) {
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    57
                if (line.length() <= 1 || line.charAt(0) == '#') {
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    58
                    continue;
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    59
                }
8543
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    60
                m.reset(line);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    61
                if (m.matches()) {
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    62
                    int start = Integer.parseInt(m.group(1), 16);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    63
                    int end = (m.group(2)==null)?start
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    64
                                                :Integer.parseInt(m.group(2), 16);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    65
                    String name = m.group(3).toLowerCase(Locale.ENGLISH);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    66
                    ArrayList<Integer> ranges = scripts.get(name);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    67
                    if (ranges == null) {
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    68
                        ranges = new ArrayList<Integer>();
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    69
                        scripts.put(name, ranges);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    70
                    }
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    71
                    ranges.add(start);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    72
                    ranges.add(end);
e5ec12a932da 7021209: convert lang, math, util to use try-with-resources
smarks
parents: 7247
diff changeset
    73
                }
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    74
            }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    75
        }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    76
        // check all defined ranges
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    77
        Integer[] ZEROSIZEARRAY = new Integer[0];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    78
        for (String name : scripts.keySet()) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    79
            System.out.println("Checking " + name + "...");
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    80
            Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    81
            Character.UnicodeScript expected =
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    82
                Character.UnicodeScript.forName(name);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    83
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    84
            int off = 0;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    85
            while (off < ranges.length) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    86
                int start = ranges[off++];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    87
                int end = ranges[off++];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    88
                for (int cp = start; cp <= end; cp++) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    89
                    Character.UnicodeScript script =
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    90
                        Character.UnicodeScript.of(cp);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    91
                    if (script != expected) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    92
                        throw new RuntimeException(
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    93
                            "UnicodeScript failed: cp=" +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    94
                            Integer.toHexString(cp) +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    95
                            ", of(cp)=<" + script + "> but <" +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    96
                            expected + "> is expected");
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    97
                   }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    98
                }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    99
            }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   100
        }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   101
        // check all codepoints
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   102
        for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   103
            Character.UnicodeScript script = Character.UnicodeScript.of(cp);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   104
            if (script == Character.UnicodeScript.UNKNOWN) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   105
                if (Character.getType(cp) != Character.UNASSIGNED &&
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   106
                    Character.getType(cp) != Character.SURROGATE &&
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   107
                    Character.getType(cp) != Character.PRIVATE_USE)
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   108
                    throw new RuntimeException(
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   109
                        "UnicodeScript failed: cp=" +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   110
                        Integer.toHexString(cp) +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   111
                        ", of(cp)=<" + script + "> but UNKNOWN is expected");
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   112
            } else {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   113
                Integer[] ranges =
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   114
                    scripts.get(script.name().toLowerCase(Locale.ENGLISH))
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   115
                           .toArray(ZEROSIZEARRAY);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   116
                int off = 0;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   117
                boolean found = false;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   118
                while (off < ranges.length) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   119
                    int start = ranges[off++];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   120
                    int end = ranges[off++];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   121
                    if (cp >= start && cp <= end)
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   122
                        found = true;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   123
                }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   124
                if (!found) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   125
                    throw new RuntimeException(
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   126
                        "UnicodeScript failed: cp=" +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   127
                        Integer.toHexString(cp) +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   128
                        ", of(cp)=<" + script +
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   129
                        "> but NOT in ranges of this script");
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   130
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   131
                }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   132
            }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   133
        }
9250
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   134
        // check all aliases
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   135
        m = Pattern.compile("sc\\s*;\\s*(\\p{Alpha}{4})\\s*;\\s*([\\p{Alpha}|_]+)\\s*.*").matcher("");
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   136
        line = null;
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   137
        try (BufferedReader sbfr = new BufferedReader(new FileReader(fAliases))) {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   138
            while ((line = sbfr.readLine()) != null) {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   139
                if (line.length() <= 1 || line.charAt(0) == '#') {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   140
                    continue;
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   141
                }
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   142
                m.reset(line);
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   143
                if (m.matches()) {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   144
                    String alias = m.group(1);
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   145
                    String name = m.group(2);
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   146
                    // HRKT -> Katakana_Or_Hiragana not supported
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   147
                    if ("HRKT".equals(alias.toUpperCase(Locale.ENGLISH)))
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   148
                        continue;
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   149
                    if (Character.UnicodeScript.forName(alias) !=
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   150
                        Character.UnicodeScript.forName(name)) {
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   151
                        throw new RuntimeException(
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   152
                            "UnicodeScript failed: alias<" + alias +
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   153
                            "> does not map to <" + name + ">");
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   154
                    }
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   155
                }
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   156
            }
284446951deb 7033561: Missing Unicode Script aliases
sherman
parents: 8543
diff changeset
   157
        }
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   158
    }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   159
}