test/jdk/java/lang/Character/UnicodeSpec.java
author mikael
Mon, 29 Jul 2019 09:59:04 -0700
changeset 57584 9d82a35b6ff7
parent 50045 d9d55f64d136
permissions -rw-r--r--
Merge
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
50045
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     1
/*
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     2
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     4
 *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    10
 *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    15
 * accompanied this code).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    16
 *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    20
 *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    23
 * questions.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    24
 */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    25
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    26
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    27
import java.io.BufferedReader;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    28
import java.io.FileReader;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    29
import java.io.FileNotFoundException;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    30
import java.io.IOException;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    31
import java.io.File;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    32
import java.util.regex.Pattern;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    33
import java.util.ArrayList;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    34
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    35
/**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    36
 * The UnicodeSpec class provides a way to read in Unicode character
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    37
 * properties from a Unicode data file.  One instance of class UnicodeSpec
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    38
 * holds a decoded version of one line of the data file.  The file may
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    39
 * be obtained from www.unicode.org.  The method readSpecFile returns an array
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    40
 * of UnicodeSpec objects.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    41
 *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    42
 * @author      Guy Steele
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    43
 * @author  John O'Conner
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    44
 */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    45
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    46
public class UnicodeSpec {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    47
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    48
    public UnicodeSpec() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    49
        this(0xffff);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    50
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    51
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    52
    public UnicodeSpec(int codePoint) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    53
        this.codePoint = codePoint;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    54
        generalCategory = UNASSIGNED;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    55
        bidiCategory = DIRECTIONALITY_UNDEFINED;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    56
        mirrored = false;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    57
        titleMap = 0xFFFF;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    58
        upperMap = 0xFFFF;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    59
        lowerMap = 0xFFFF;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    60
        decimalValue = -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    61
        digitValue = -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    62
        numericValue = "";
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    63
                oldName = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    64
                comment = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    65
                name = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    66
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    67
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    68
    public String toString() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    69
        StringBuffer result = new StringBuffer(hex6(codePoint));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    70
        if (getUpperMap() != 0xffff) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    71
            result.append(", upper=").append(hex6(upperMap));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    72
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    73
        if (getLowerMap() != 0xffff) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    74
            result.append(", lower=").append(hex6(lowerMap));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    75
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    76
        if (getTitleMap() != 0xffff) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    77
            result.append(", title=").append(hex6(titleMap));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    78
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    79
        return result.toString();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    80
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    81
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    82
    static String hex4(int n) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    83
        String q = Long.toHexString(n & 0xFFFF).toUpperCase();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    84
        return "0000".substring(Math.min(4, q.length())) + q;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    85
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    86
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    87
        static String hex6(int n) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    88
                String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    89
                return "000000".substring(Math.min(6, str.length())) + str;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    90
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    91
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    92
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    93
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    94
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    95
    * Given one line of a Unicode data file as a String, parse the line
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    96
    * and return a UnicodeSpec object that contains the same character information.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    97
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    98
    * @param s a line of the Unicode data file to be parsed
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
    99
    * @return a UnicodeSpec object, or null if the parsing process failed for some reason
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   100
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   101
    public static UnicodeSpec parse(String s) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   102
        UnicodeSpec spec = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   103
        String[] tokens = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   104
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   105
        try {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   106
                        tokens = tokenSeparator.split(s, REQUIRED_FIELDS);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   107
            spec = new UnicodeSpec();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   108
            spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   109
            spec.setName(parseName(tokens[FIELD_NAME]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   110
            spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   111
            spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   112
            spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   113
            spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   114
            spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   115
            spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   116
            spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   117
            spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   118
            spec.setOldName(parseOldName(tokens[FIELD_OLDNAME]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   119
            spec.setComment(parseComment(tokens[FIELD_COMMENT]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   120
            spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   121
            spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   122
            spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE]));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   123
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   124
        catch(Exception e) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   125
            spec = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   126
            System.out.println("Error parsing spec line.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   127
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   128
        return spec;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   129
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   130
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   131
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   132
    * Parse the codePoint attribute for a Unicode character.  If the parse succeeds,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   133
    * the codePoint field of this UnicodeSpec object is updated and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   134
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   135
    * The codePoint attribute should be a four-digit hexadecimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   136
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   137
    * @param s   the codePoint attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   138
    * @return   code point if successful
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   139
    * @exception NumberFormatException if unable to parse argument
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   140
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   141
    public static int parseCodePoint(String s) throws NumberFormatException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   142
        return Integer.parseInt(s, 16);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   143
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   144
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   145
    public static String parseName(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   146
        if (s==null) throw new Exception("Cannot parse name.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   147
        return s;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   148
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   149
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   150
    public static byte parseGeneralCategory(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   151
        byte category = GENERAL_CATEGORY_COUNT;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   152
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   153
        for (byte x=0; x<generalCategoryList.length; x++) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   154
            if (s.equals(generalCategoryList[x][SHORT])) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   155
                category = x;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   156
                break;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   157
            }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   158
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   159
        if (category >= GENERAL_CATEGORY_COUNT) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   160
            throw new Exception("Could not parse general category.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   161
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   162
        return category;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   163
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   164
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   165
    public static byte parseBidiCategory(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   166
        byte category = DIRECTIONALITY_CATEGORY_COUNT;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   167
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   168
        for (byte x=0; x<bidiCategoryList.length; x++) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   169
            if (s.equals(bidiCategoryList[x][SHORT])) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   170
                category = x;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   171
                break;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   172
            }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   173
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   174
        if (category >= DIRECTIONALITY_CATEGORY_COUNT) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   175
            throw new Exception("Could not parse bidi category.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   176
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   177
        return category;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   178
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   179
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   180
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   181
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   182
    * Parse the combining attribute for a Unicode character.  If there is a combining
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   183
    * attribute and the parse succeeds, then the hasCombining field is set to true,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   184
    * the combining field of this UnicodeSpec object is updated, and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   185
    * If the combining attribute is an empty string, the parse succeeds but the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   186
    * hasCombining field is set to false. (and false is returned).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   187
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   188
    * The combining attribute, if any, should be a nonnegative decimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   189
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   190
    * @param s   the combining attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   191
    * @return   the combining class value if any, -1 if property not defined
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   192
    * @exception Exception if can't parse the combining class
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   193
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   194
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   195
    public static int parseCombiningClass(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   196
        int combining = -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   197
        if (s.length()>0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   198
            combining = Integer.parseInt(s, 10);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   199
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   200
        return combining;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   201
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   202
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   203
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   204
    * Parse the decomposition attribute for a Unicode character.  If the parse succeeds,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   205
    * the decomposition field of this UnicodeSpec object is updated and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   206
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   207
    * The decomposition attribute is complicated; for now, it is treated as a string.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   208
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   209
    * @param s   the decomposition attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   210
    * @return   true if the parse failed; otherwise false
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   211
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   212
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   213
    public static String parseDecomposition(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   214
        if (s==null) throw new Exception("Cannot parse decomposition.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   215
        return s;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   216
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   217
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   218
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   219
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   220
    * Parse the decimal value attribute for a Unicode character.  If there is a decimal value
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   221
    * attribute and the parse succeeds, then the hasDecimalValue field is set to true,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   222
    * the decimalValue field of this UnicodeSpec object is updated, and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   223
    * If the decimal value attribute is an empty string, the parse succeeds but the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   224
    * hasDecimalValue field is set to false. (and false is returned).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   225
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   226
    * The decimal value attribute, if any, should be a nonnegative decimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   227
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   228
    * @param s   the decimal value attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   229
    * @return   the decimal value as an int, -1 if no decimal value defined
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   230
    * @exception NumberFormatException if the parse fails
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   231
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   232
    public static int parseDecimalValue(String s) throws NumberFormatException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   233
        int value = -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   234
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   235
        if (s.length() > 0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   236
            value = Integer.parseInt(s, 10);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   237
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   238
        return value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   239
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   240
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   241
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   242
    * Parse the digit value attribute for a Unicode character.  If there is a digit value
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   243
    * attribute and the parse succeeds, then the hasDigitValue field is set to true,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   244
    * the digitValue field of this UnicodeSpec object is updated, and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   245
    * If the digit value attribute is an empty string, the parse succeeds but the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   246
    * hasDigitValue field is set to false. (and false is returned).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   247
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   248
    * The digit value attribute, if any, should be a nonnegative decimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   249
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   250
    * @param s   the digit value attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   251
    * @return   the digit value as an non-negative int, or -1 if no digit property defined
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   252
    * @exception NumberFormatException if the parse fails
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   253
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   254
    public static int parseDigitValue(String s) throws NumberFormatException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   255
        int value = -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   256
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   257
        if (s.length() > 0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   258
            value = Integer.parseInt(s, 10);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   259
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   260
        return value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   261
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   262
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   263
    public static String parseNumericValue(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   264
        if (s == null) throw new Exception("Cannot parse numeric value.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   265
        return s;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   266
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   267
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   268
    public static String parseComment(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   269
        if (s == null) throw new Exception("Cannot parse comment.");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   270
        return s;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   271
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   272
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   273
    public static boolean parseMirrored(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   274
        boolean mirrored;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   275
        if (s.length() == 1) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   276
            if (s.charAt(0) == 'Y') {mirrored = true;}
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   277
            else if (s.charAt(0) == 'N') {mirrored = false;}
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   278
            else {throw new Exception("Cannot parse mirrored property.");}
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   279
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   280
        else { throw new Exception("Cannot parse mirrored property.");}
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   281
        return mirrored;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   282
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   283
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   284
    public static String parseOldName(String s) throws Exception {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   285
        if (s == null) throw new Exception("Cannot parse old name");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   286
        return s;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   287
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   288
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   289
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   290
    * Parse the uppercase mapping attribute for a Unicode character.  If there is a uppercase
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   291
    * mapping attribute and the parse succeeds, then the hasUpperMap field is set to true,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   292
    * the upperMap field of this UnicodeSpec object is updated, and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   293
    * If the uppercase mapping attribute is an empty string, the parse succeeds but the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   294
    * hasUpperMap field is set to false. (and false is returned).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   295
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   296
    * The uppercase mapping attribute should be a four-digit hexadecimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   297
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   298
    * @param s   the uppercase mapping attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   299
    * @return   uppercase char if defined, \uffff otherwise
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   300
    * @exception NumberFormatException if parse fails
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   301
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   302
    public static int parseUpperMap(String s) throws NumberFormatException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   303
        int upperCase = 0xFFFF;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   304
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   305
        if (s.length() >= 4) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   306
            upperCase = Integer.parseInt(s, 16);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   307
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   308
        else if (s.length() != 0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   309
            throw new NumberFormatException();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   310
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   311
        return upperCase;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   312
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   313
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   314
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   315
    * Parse the lowercase mapping attribute for a Unicode character.  If there is a lowercase
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   316
    * mapping attribute and the parse succeeds, then the hasLowerMap field is set to true,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   317
    * the lowerMap field of this UnicodeSpec object is updated, and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   318
    * If the lowercase mapping attribute is an empty string, the parse succeeds but the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   319
     * hasLowerMap field is set to false. (and false is returned).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   320
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   321
    * The lowercase mapping attribute should be a four-digit hexadecimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   322
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   323
    * @param s   the lowercase mapping attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   324
    * @return   lowercase char mapping if defined, \uFFFF otherwise
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   325
    * @exception NumberFormatException if parse fails
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   326
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   327
    public static int parseLowerMap(String s) throws NumberFormatException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   328
        int lowerCase = 0xFFFF;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   329
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   330
        if (s.length() >= 4) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   331
            lowerCase = Integer.parseInt(s, 16);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   332
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   333
        else if (s.length() != 0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   334
            throw new NumberFormatException();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   335
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   336
        return lowerCase;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   337
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   338
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   339
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   340
    * Parse the titlecase mapping attribute for a Unicode character.  If there is a titlecase
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   341
    * mapping attribute and the parse succeeds, then the hasTitleMap field is set to true,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   342
    * the titleMap field of this UnicodeSpec object is updated, and false is returned.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   343
    * If the titlecase mapping attribute is an empty string, the parse succeeds but the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   344
    * hasTitleMap field is set to false. (and false is returned).
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   345
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   346
    * The titlecase mapping attribute should be a four-digit hexadecimal integer.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   347
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   348
    * @param s   the titlecase mapping attribute extracted from a line of the Unicode data file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   349
    * @return   title case char mapping if defined, \uFFFF otherwise
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   350
    * @exception NumberFormatException if parse fails
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   351
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   352
    public static int parseTitleMap(String s) throws NumberFormatException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   353
        int titleCase = 0xFFFF;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   354
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   355
        if (s.length() >= 4) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   356
            titleCase = Integer.parseInt(s, 16);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   357
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   358
        else if (s.length() != 0) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   359
            throw new NumberFormatException();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   360
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   361
        return titleCase;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   362
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   363
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   364
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   365
    * Read and parse a Unicode data file.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   366
    *
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   367
    * @param file   a file specifying the Unicode data file to be read
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   368
    * @return   an array of UnicodeSpec objects, one for each line of the
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   369
    *           Unicode data file that could be successfully parsed as
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   370
    *           specifying Unicode character attributes
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   371
    */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   372
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   373
    public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   374
                ArrayList<UnicodeSpec> list = new ArrayList<>(3000);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   375
        UnicodeSpec[] result = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   376
        int count = 0;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   377
        BufferedReader f = new BufferedReader(new FileReader(file));
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   378
                String line = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   379
        loop:
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   380
        while(true) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   381
            try {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   382
                line = f.readLine();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   383
            }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   384
            catch (IOException e) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   385
                                break loop;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   386
                        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   387
            if (line == null) break loop;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   388
            UnicodeSpec item = parse(line.trim());
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   389
                        int specPlane = item.getCodePoint() >>> 16;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   390
                        if (specPlane < plane) continue;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   391
                        if (specPlane > plane) break;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   392
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   393
            if (item != null) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   394
                                list.add(item);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   395
            }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   396
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   397
                result = new UnicodeSpec[list.size()];
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   398
                list.toArray(result);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   399
        return result;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   400
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   401
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   402
    void setCodePoint(int value) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   403
        codePoint = value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   404
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   405
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   406
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   407
     * Return the code point in this Unicode specification
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   408
     * @return the char code point representing by the specification
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   409
     */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   410
    public int getCodePoint() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   411
        return codePoint;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   412
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   413
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   414
    void setName(String name) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   415
        this.name = name;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   416
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   417
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   418
    public String getName() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   419
        return name;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   420
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   421
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   422
    void setGeneralCategory(byte category) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   423
        generalCategory = category;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   424
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   425
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   426
    public byte getGeneralCategory() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   427
        return generalCategory;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   428
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   429
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   430
    void setBidiCategory(byte category) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   431
        bidiCategory = category;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   432
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   433
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   434
    public byte getBidiCategory() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   435
        return bidiCategory;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   436
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   437
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   438
    void setCombiningClass(int combiningClass) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   439
        this.combiningClass = combiningClass;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   440
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   441
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   442
    public int getCombiningClass() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   443
        return combiningClass;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   444
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   445
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   446
    void setDecomposition(String decomposition) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   447
        this.decomposition = decomposition;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   448
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   449
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   450
    public String getDecomposition() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   451
         return decomposition;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   452
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   453
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   454
    void setDecimalValue(int value) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   455
        decimalValue = value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   456
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   457
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   458
    public int getDecimalValue() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   459
        return decimalValue;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   460
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   461
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   462
    public boolean isDecimalValue() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   463
        return decimalValue != -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   464
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   465
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   466
    void setDigitValue(int value) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   467
        digitValue = value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   468
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   469
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   470
    public int getDigitValue() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   471
        return digitValue;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   472
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   473
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   474
    public boolean isDigitValue() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   475
        return digitValue != -1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   476
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   477
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   478
    void setNumericValue(String value) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   479
        numericValue = value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   480
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   481
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   482
    public String getNumericValue() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   483
        return numericValue;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   484
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   485
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   486
    public boolean isNumericValue() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   487
        return numericValue.length() > 0;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   488
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   489
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   490
    void setMirrored(boolean value) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   491
        mirrored = value;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   492
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   493
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   494
    public boolean isMirrored() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   495
        return mirrored;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   496
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   497
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   498
    void setOldName(String name) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   499
        oldName = name;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   500
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   501
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   502
    public String getOldName() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   503
        return oldName;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   504
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   505
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   506
    void setComment(String comment) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   507
        this.comment = comment;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   508
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   509
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   510
    public String getComment() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   511
        return comment;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   512
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   513
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   514
    void setUpperMap(int ch) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   515
        upperMap = ch;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   516
    };
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   517
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   518
    public int getUpperMap() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   519
        return upperMap;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   520
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   521
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   522
    public boolean hasUpperMap() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   523
        return upperMap != 0xffff;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   524
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   525
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   526
    void setLowerMap(int ch) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   527
        lowerMap = ch;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   528
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   529
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   530
    public int getLowerMap() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   531
        return lowerMap;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   532
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   533
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   534
    public boolean hasLowerMap() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   535
        return lowerMap != 0xffff;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   536
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   537
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   538
    void setTitleMap(int ch) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   539
        titleMap = ch;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   540
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   541
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   542
    public int getTitleMap() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   543
        return titleMap;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   544
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   545
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   546
    public boolean hasTitleMap() {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   547
        return titleMap != 0xffff;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   548
    }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   549
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   550
    int codePoint;         // the characters UTF-32 code value
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   551
    String name;            // the ASCII name
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   552
    byte generalCategory;   // general category, available via Characte.getType()
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   553
    byte bidiCategory;      // available via Character.getBidiType()
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   554
    int combiningClass;     // not used in Character
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   555
    String decomposition;   // not used in Character
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   556
    int decimalValue;       // decimal digit value
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   557
    int digitValue;         // not all digits are decimal
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   558
    String numericValue;    // numeric value if digit or non-digit
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   559
    boolean mirrored;       //
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   560
    String oldName;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   561
    String comment;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   562
    int upperMap;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   563
    int lowerMap;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   564
    int titleMap;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   565
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   566
    // this is the number of fields in one line of the UnicodeData.txt file
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   567
    // each field is separated by a semicolon (a token)
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   568
    static final int REQUIRED_FIELDS = 15;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   569
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   570
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   571
     * General category types
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   572
     * To preserve compatibility, these values cannot be changed
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   573
     */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   574
    public static final byte
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   575
        UNASSIGNED                  =  0, // Cn normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   576
        UPPERCASE_LETTER            =  1, // Lu normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   577
        LOWERCASE_LETTER            =  2, // Ll normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   578
        TITLECASE_LETTER            =  3, // Lt normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   579
        MODIFIER_LETTER             =  4, // Lm normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   580
        OTHER_LETTER                =  5, // Lo normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   581
        NON_SPACING_MARK            =  6, // Mn informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   582
        ENCLOSING_MARK              =  7, // Me informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   583
        COMBINING_SPACING_MARK      =  8, // Mc normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   584
        DECIMAL_DIGIT_NUMBER        =  9, // Nd normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   585
        LETTER_NUMBER               = 10, // Nl normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   586
        OTHER_NUMBER                = 11, // No normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   587
        SPACE_SEPARATOR             = 12, // Zs normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   588
        LINE_SEPARATOR              = 13, // Zl normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   589
        PARAGRAPH_SEPARATOR         = 14, // Zp normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   590
        CONTROL                     = 15, // Cc normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   591
        FORMAT                      = 16, // Cf normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   592
        // 17 is unused for no apparent reason,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   593
        // but must preserve forward compatibility
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   594
        PRIVATE_USE                 = 18, // Co normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   595
        SURROGATE                   = 19, // Cs normative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   596
        DASH_PUNCTUATION            = 20, // Pd informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   597
        START_PUNCTUATION           = 21, // Ps informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   598
        END_PUNCTUATION             = 22, // Pe informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   599
        CONNECTOR_PUNCTUATION       = 23, // Pc informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   600
        OTHER_PUNCTUATION           = 24, // Po informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   601
        MATH_SYMBOL                 = 25, // Sm informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   602
        CURRENCY_SYMBOL             = 26, // Sc informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   603
        MODIFIER_SYMBOL             = 27, // Sk informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   604
        OTHER_SYMBOL                = 28, // So informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   605
        INITIAL_QUOTE_PUNCTUATION   = 29, // Pi informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   606
        FINAL_QUOTE_PUNCTUATION     = 30, // Pf informative
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   607
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   608
        // this value is only used in the character generation tool
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   609
        // it can change to accommodate the addition of new categories.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   610
        GENERAL_CATEGORY_COUNT      = 31; // sentinel value
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   611
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   612
    static final byte SHORT = 0, LONG = 1;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   613
    // general category type strings
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   614
    // NOTE: The order of this category array is dependent on the assignment of
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   615
    // category constants above. We want to access this array using constants above.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   616
    // [][SHORT] is the SHORT name, [][LONG] is the LONG name
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   617
    static final String[][] generalCategoryList = {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   618
        {"Cn", "UNASSIGNED"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   619
        {"Lu", "UPPERCASE_LETTER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   620
        {"Ll", "LOWERCASE_LETTER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   621
        {"Lt", "TITLECASE_LETTER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   622
        {"Lm", "MODIFIER_LETTER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   623
        {"Lo", "OTHER_LETTER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   624
        {"Mn", "NON_SPACING_MARK"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   625
        {"Me", "ENCLOSING_MARK"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   626
        {"Mc", "COMBINING_SPACING_MARK"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   627
        {"Nd", "DECIMAL_DIGIT_NUMBER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   628
        {"Nl", "LETTER_NUMBER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   629
        {"No", "OTHER_NUMBER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   630
        {"Zs", "SPACE_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   631
        {"Zl", "LINE_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   632
        {"Zp", "PARAGRAPH_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   633
        {"Cc", "CONTROL"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   634
        {"Cf", "FORMAT"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   635
        {"xx", "unused"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   636
        {"Co", "PRIVATE_USE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   637
        {"Cs", "SURROGATE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   638
        {"Pd", "DASH_PUNCTUATION"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   639
        {"Ps", "START_PUNCTUATION"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   640
        {"Pe", "END_PUNCTUATION"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   641
        {"Pc", "CONNECTOR_PUNCTUATION"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   642
        {"Po", "OTHER_PUNCTUATION"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   643
        {"Sm", "MATH_SYMBOL"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   644
        {"Sc", "CURRENCY_SYMBOL"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   645
        {"Sk", "MODIFIER_SYMBOL"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   646
        {"So", "OTHER_SYMBOL"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   647
        {"Pi", "INITIAL_QUOTE_PUNCTUATION"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   648
        {"Pf", "FINAL_QUOTE_PUNCTUATION"}
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   649
    };
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   650
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   651
    /**
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   652
     * Bidirectional categories
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   653
     */
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   654
    public static final byte
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   655
                DIRECTIONALITY_UNDEFINED                  = -1,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   656
        // Strong category
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   657
        DIRECTIONALITY_LEFT_TO_RIGHT              =  0, // L
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   658
        DIRECTIONALITY_RIGHT_TO_LEFT              =  1, // R
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   659
        DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC       =  2, // AL
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   660
        // Weak category
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   661
        DIRECTIONALITY_EUROPEAN_NUMBER            =  3, // EN
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   662
        DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR  =  4, // ES
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   663
        DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR =  5, // ET
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   664
        DIRECTIONALITY_ARABIC_NUMBER              =  6, // AN
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   665
        DIRECTIONALITY_COMMON_NUMBER_SEPARATOR    =  7, // CS
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   666
        DIRECTIONALITY_NONSPACING_MARK            =  8, // NSM
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   667
        DIRECTIONALITY_BOUNDARY_NEUTRAL           =  9, // BN
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   668
        // Neutral category
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   669
        DIRECTIONALITY_PARAGRAPH_SEPARATOR        = 10, // B
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   670
        DIRECTIONALITY_SEGMENT_SEPARATOR          = 11, // S
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   671
        DIRECTIONALITY_WHITESPACE                 = 12, // WS
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   672
        DIRECTIONALITY_OTHER_NEUTRALS              = 13, // ON
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   673
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   674
        DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING    = 14, // LRE
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   675
        DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE     = 15, // LRO
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   676
        DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING    = 16, // RLE
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   677
        DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE     = 17, // RLO
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   678
        DIRECTIONALITY_POP_DIRECTIONAL_FORMAT     = 18, // PDF
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   679
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   680
        DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE      = 19, // LRI
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   681
        DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE      = 20, // RLI
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   682
        DIRECTIONALITY_FIRST_STRONG_ISOLATE       = 21, // FSI
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   683
        DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE    = 22, // PDI
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   684
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   685
        DIRECTIONALITY_CATEGORY_COUNT             = 23; // sentinel value
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   686
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   687
    // If changes are made to the above bidi category assignments, this
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   688
    // list of bidi category names must be changed to keep their order in synch.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   689
    // Access this list using the bidi category constants above.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   690
    static final String[][] bidiCategoryList = {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   691
        {"L", "DIRECTIONALITY_LEFT_TO_RIGHT"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   692
        {"R", "DIRECTIONALITY_RIGHT_TO_LEFT"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   693
        {"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   694
        {"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   695
        {"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   696
        {"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   697
        {"AN", "DIRECTIONALITY_ARABIC_NUMBER"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   698
        {"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   699
        {"NSM", "DIRECTIONALITY_NONSPACING_MARK"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   700
        {"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   701
        {"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   702
        {"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   703
        {"WS", "DIRECTIONALITY_WHITESPACE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   704
        {"ON", "DIRECTIONALITY_OTHER_NEUTRALS"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   705
        {"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   706
        {"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   707
        {"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   708
        {"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   709
        {"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   710
        {"LRI", "DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   711
        {"RLI", "DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   712
        {"FSI", "DIRECTIONALITY_FIRST_STRONG_ISOLATE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   713
        {"PDI", "DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE"},
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   714
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   715
    };
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   716
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   717
    // Unicode specification lines have fields in this order.
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   718
    static final byte
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   719
        FIELD_VALUE         = 0,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   720
        FIELD_NAME          = 1,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   721
        FIELD_CATEGORY      = 2,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   722
        FIELD_CLASS         = 3,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   723
        FIELD_BIDI          = 4,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   724
        FIELD_DECOMPOSITION = 5,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   725
        FIELD_DECIMAL       = 6,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   726
        FIELD_DIGIT         = 7,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   727
        FIELD_NUMERIC       = 8,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   728
        FIELD_MIRRORED      = 9,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   729
        FIELD_OLDNAME       = 10,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   730
        FIELD_COMMENT       = 11,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   731
        FIELD_UPPERCASE     = 12,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   732
        FIELD_LOWERCASE     = 13,
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   733
        FIELD_TITLECASE     = 14;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   734
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   735
        static final Pattern tokenSeparator = Pattern.compile(";");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   736
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   737
        public static void main(String[] args) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   738
                UnicodeSpec[] spec = null;
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   739
                if (args.length == 2 ) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   740
                        try {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   741
                                File file = new File(args[0]);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   742
                                int plane = Integer.parseInt(args[1]);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   743
                                spec = UnicodeSpec.readSpecFile(file, plane);
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   744
                                System.out.println("UnicodeSpec[" + spec.length + "]:");
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   745
                                for (int x=0; x<spec.length; x++) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   746
                                        System.out.println(spec[x].toString());
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   747
                                }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   748
                        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   749
                        catch(Exception e) {
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   750
                                e.printStackTrace();
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   751
                        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   752
                }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   753
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   754
        }
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   755
d9d55f64d136 8191410: Unicode 10
rgoel
parents:
diff changeset
   756
}