7033561: Missing Unicode Script aliases
Summary: added 6.0 aliases
Reviewed-by: okutsu, peytoia, alanb
--- a/jdk/src/share/classes/java/lang/Character.java Mon Apr 11 10:22:39 2011 +0800
+++ b/jdk/src/share/classes/java/lang/Character.java Sun Apr 10 23:33:14 2011 -0700
@@ -4184,9 +4184,11 @@
aliases.put("AVST", AVESTAN);
aliases.put("BALI", BALINESE);
aliases.put("BAMU", BAMUM);
+ aliases.put("BATK", BATAK);
aliases.put("BENG", BENGALI);
aliases.put("BOPO", BOPOMOFO);
aliases.put("BRAI", BRAILLE);
+ aliases.put("BRAH", BRAHMI);
aliases.put("BUGI", BUGINESE);
aliases.put("BUHD", BUHID);
aliases.put("CANS", CANADIAN_ABORIGINAL);
@@ -4230,6 +4232,7 @@
aliases.put("LISU", LISU);
aliases.put("LYCI", LYCIAN);
aliases.put("LYDI", LYDIAN);
+ aliases.put("MAND", MANDAIC);
aliases.put("MLYM", MALAYALAM);
aliases.put("MONG", MONGOLIAN);
aliases.put("MTEI", MEETEI_MAYEK);
--- a/jdk/test/java/lang/Character/CheckScript.java Mon Apr 11 10:22:39 2011 +0800
+++ b/jdk/test/java/lang/Character/CheckScript.java Sun Apr 10 23:33:14 2011 -0700
@@ -1,34 +1,58 @@
+
+/*
+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
/**
* @test
- * @bug 6945564 6959267
+ * @bug 6945564 6959267 7033561
* @summary Check that the j.l.Character.UnicodeScript
*/
import java.io.*;
-import java.lang.reflect.*;
import java.util.*;
import java.util.regex.*;
import java.lang.Character.UnicodeScript;
public class CheckScript {
- static BufferedReader open(String[] args) throws FileNotFoundException {
+ public static void main(String[] args) throws Exception {
+ File fScripts;
+ File fAliases;
if (args.length == 0) {
- return new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Scripts.txt")));
- } else if (args.length == 1) {
- return new BufferedReader(new FileReader(args[0]));
+ fScripts = new File(System.getProperty("test.src", "."), "Scripts.txt");
+ fAliases = new File(System.getProperty("test.src", "."), "PropertyValueAliases.txt");
+ } else if (args.length == 2) {
+ fScripts = new File(args[0]);
+ fAliases = new File(args[1]);
} else {
- System.out.println("java CharacterScript Scripts.txt");
+ System.out.println("java CharacterScript Scripts.txt PropertyValueAliases.txt");
throw new RuntimeException("Datafile name should be specified.");
}
- }
-
- public static void main(String[] args) throws Exception {
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
String line = null;
HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
- try (BufferedReader sbfr = open(args)) {
+ try (BufferedReader sbfr = new BufferedReader(new FileReader(fScripts))) {
while ((line = sbfr.readLine()) != null) {
if (line.length() <= 1 || line.charAt(0) == '#') {
continue;
@@ -107,5 +131,29 @@
}
}
}
+ // check all aliases
+ m = Pattern.compile("sc\\s*;\\s*(\\p{Alpha}{4})\\s*;\\s*([\\p{Alpha}|_]+)\\s*.*").matcher("");
+ line = null;
+ try (BufferedReader sbfr = new BufferedReader(new FileReader(fAliases))) {
+ while ((line = sbfr.readLine()) != null) {
+ if (line.length() <= 1 || line.charAt(0) == '#') {
+ continue;
+ }
+ m.reset(line);
+ if (m.matches()) {
+ String alias = m.group(1);
+ String name = m.group(2);
+ // HRKT -> Katakana_Or_Hiragana not supported
+ if ("HRKT".equals(alias.toUpperCase(Locale.ENGLISH)))
+ continue;
+ if (Character.UnicodeScript.forName(alias) !=
+ Character.UnicodeScript.forName(name)) {
+ throw new RuntimeException(
+ "UnicodeScript failed: alias<" + alias +
+ "> does not map to <" + name + ">");
+ }
+ }
+ }
+ }
}
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/lang/Character/PropertyValueAliases.txt Sun Apr 10 23:33:14 2011 -0700
@@ -0,0 +1,1178 @@
+# PropertyValueAliases-6.0.0.txt
+# Date: 2010-07-17, 22:44:06 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2010 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# This file contains aliases for property values used in the UCD.
+# These names can be used for XML formats of UCD data, for regular-expression
+# property tests, and other programmatic textual descriptions of Unicode data.
+# For information on which properties are normative, see UCD.html.
+#
+# The names may be translated in appropriate environments, and additional
+# aliases may be useful.
+#
+# FORMAT
+#
+# Each line describes a property value name.
+# This consists of three or more fields, separated by semicolons.
+#
+# First Field: The first field describes the property for which that
+# property value name is used.
+#
+# Second Field: The second field is an abbreviated name.
+# If there is no abbreviated name available, the field is marked with "n/a".
+#
+# Third Field: The third field is a long name.
+#
+# In the case of ccc, there are 4 fields. The second field is numeric, third
+# is abbreviated, and fourth is long.
+#
+# The above are the preferred aliases. Other aliases may be listed in additional fields.
+#
+# Loose matching should be applied to all property names and property values, with
+# the exception of String Property values. With loose matching of property names and
+# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
+# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
+#
+# NOTE: Property value names are NOT unique across properties. For example:
+#
+# AL means Arabic Letter for the Bidi_Class property, and
+# AL means Above_Left for the Combining_Class property, and
+# AL means Alphabetic for the Line_Break property.
+#
+# In addition, some property names may be the same as some property value names.
+# For example:
+#
+# sc means the Script property, and
+# Sc means the General_Category property value Currency_Symbol (Sc)
+#
+# The combination of property value and property name is, however, unique.
+#
+# For more information, see UTS #18: Unicode Regular Expressions
+# ================================================
+
+
+# ASCII_Hex_Digit (AHex)
+
+AHex; N ; No ; F ; False
+AHex; Y ; Yes ; T ; True
+
+# Age (age)
+
+age; n/a ; 1.1
+age; n/a ; 2.0
+age; n/a ; 2.1
+age; n/a ; 3.0
+age; n/a ; 3.1
+age; n/a ; 3.2
+age; n/a ; 4.0
+age; n/a ; 4.1
+age; n/a ; 5.0
+age; n/a ; 5.1
+age; n/a ; 5.2
+age; n/a ; 6.0
+age; n/a ; unassigned
+
+# Alphabetic (Alpha)
+
+Alpha; N ; No ; F ; False
+Alpha; Y ; Yes ; T ; True
+
+# Bidi_Class (bc)
+
+bc ; AL ; Arabic_Letter
+bc ; AN ; Arabic_Number
+bc ; B ; Paragraph_Separator
+bc ; BN ; Boundary_Neutral
+bc ; CS ; Common_Separator
+bc ; EN ; European_Number
+bc ; ES ; European_Separator
+bc ; ET ; European_Terminator
+bc ; L ; Left_To_Right
+bc ; LRE ; Left_To_Right_Embedding
+bc ; LRO ; Left_To_Right_Override
+bc ; NSM ; Nonspacing_Mark
+bc ; ON ; Other_Neutral
+bc ; PDF ; Pop_Directional_Format
+bc ; R ; Right_To_Left
+bc ; RLE ; Right_To_Left_Embedding
+bc ; RLO ; Right_To_Left_Override
+bc ; S ; Segment_Separator
+bc ; WS ; White_Space
+
+# Bidi_Control (Bidi_C)
+
+Bidi_C; N ; No ; F ; False
+Bidi_C; Y ; Yes ; T ; True
+
+# Bidi_Mirrored (Bidi_M)
+
+Bidi_M; N ; No ; F ; False
+Bidi_M; Y ; Yes ; T ; True
+
+# Bidi_Mirroring_Glyph (bmg)
+
+# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; <none>
+
+# Block (blk)
+
+blk; n/a ; Aegean_Numbers
+blk; n/a ; Alchemical_Symbols
+blk; n/a ; Alphabetic_Presentation_Forms
+blk; n/a ; Ancient_Greek_Musical_Notation
+blk; n/a ; Ancient_Greek_Numbers
+blk; n/a ; Ancient_Symbols
+blk; n/a ; Arabic
+blk; n/a ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A
+blk; n/a ; Arabic_Presentation_Forms_B
+blk; n/a ; Arabic_Supplement
+blk; n/a ; Armenian
+blk; n/a ; Arrows
+blk; n/a ; Avestan
+blk; n/a ; Balinese
+blk; n/a ; Bamum
+blk; n/a ; Bamum_Supplement
+blk; n/a ; Basic_Latin ; ASCII
+blk; n/a ; Batak
+blk; n/a ; Bengali
+blk; n/a ; Block_Elements
+blk; n/a ; Bopomofo
+blk; n/a ; Bopomofo_Extended
+blk; n/a ; Box_Drawing
+blk; n/a ; Brahmi
+blk; n/a ; Braille_Patterns
+blk; n/a ; Buginese
+blk; n/a ; Buhid
+blk; n/a ; Byzantine_Musical_Symbols
+blk; n/a ; Carian
+blk; n/a ; Cham
+blk; n/a ; Cherokee
+blk; n/a ; CJK_Compatibility
+blk; n/a ; CJK_Compatibility_Forms
+blk; n/a ; CJK_Compatibility_Ideographs
+blk; n/a ; CJK_Compatibility_Ideographs_Supplement
+blk; n/a ; CJK_Radicals_Supplement
+blk; n/a ; CJK_Strokes
+blk; n/a ; CJK_Symbols_And_Punctuation
+blk; n/a ; CJK_Unified_Ideographs
+blk; n/a ; CJK_Unified_Ideographs_Extension_A
+blk; n/a ; CJK_Unified_Ideographs_Extension_B
+blk; n/a ; CJK_Unified_Ideographs_Extension_C
+blk; n/a ; CJK_Unified_Ideographs_Extension_D
+blk; n/a ; Combining_Diacritical_Marks
+blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols
+blk; n/a ; Combining_Diacritical_Marks_Supplement
+blk; n/a ; Combining_Half_Marks
+blk; n/a ; Common_Indic_Number_Forms
+blk; n/a ; Control_Pictures
+blk; n/a ; Coptic
+blk; n/a ; Counting_Rod_Numerals
+blk; n/a ; Cuneiform
+blk; n/a ; Cuneiform_Numbers_And_Punctuation
+blk; n/a ; Currency_Symbols
+blk; n/a ; Cypriot_Syllabary
+blk; n/a ; Cyrillic
+blk; n/a ; Cyrillic_Extended_A
+blk; n/a ; Cyrillic_Extended_B
+blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary
+blk; n/a ; Deseret
+blk; n/a ; Devanagari
+blk; n/a ; Devanagari_Extended
+blk; n/a ; Dingbats
+blk; n/a ; Domino_Tiles
+blk; n/a ; Egyptian_Hieroglyphs
+blk; n/a ; Emoticons
+blk; n/a ; Enclosed_Alphanumeric_Supplement
+blk; n/a ; Enclosed_Alphanumerics
+blk; n/a ; Enclosed_CJK_Letters_And_Months
+blk; n/a ; Enclosed_Ideographic_Supplement
+blk; n/a ; Ethiopic
+blk; n/a ; Ethiopic_Extended
+blk; n/a ; Ethiopic_Extended_A
+blk; n/a ; Ethiopic_Supplement
+blk; n/a ; General_Punctuation
+blk; n/a ; Geometric_Shapes
+blk; n/a ; Georgian
+blk; n/a ; Georgian_Supplement
+blk; n/a ; Glagolitic
+blk; n/a ; Gothic
+blk; n/a ; Greek_And_Coptic ; Greek
+blk; n/a ; Greek_Extended
+blk; n/a ; Gujarati
+blk; n/a ; Gurmukhi
+blk; n/a ; Halfwidth_And_Fullwidth_Forms
+blk; n/a ; Hangul_Compatibility_Jamo
+blk; n/a ; Hangul_Jamo
+blk; n/a ; Hangul_Jamo_Extended_A
+blk; n/a ; Hangul_Jamo_Extended_B
+blk; n/a ; Hangul_Syllables
+blk; n/a ; Hanunoo
+blk; n/a ; Hebrew
+blk; n/a ; High_Private_Use_Surrogates
+blk; n/a ; High_Surrogates
+blk; n/a ; Hiragana
+blk; n/a ; Ideographic_Description_Characters
+blk; n/a ; Imperial_Aramaic
+blk; n/a ; Inscriptional_Pahlavi
+blk; n/a ; Inscriptional_Parthian
+blk; n/a ; IPA_Extensions
+blk; n/a ; Javanese
+blk; n/a ; Kaithi
+blk; n/a ; Kana_Supplement
+blk; n/a ; Kanbun
+blk; n/a ; Kangxi_Radicals
+blk; n/a ; Kannada
+blk; n/a ; Katakana
+blk; n/a ; Katakana_Phonetic_Extensions
+blk; n/a ; Kayah_Li
+blk; n/a ; Kharoshthi
+blk; n/a ; Khmer
+blk; n/a ; Khmer_Symbols
+blk; n/a ; Lao
+blk; n/a ; Latin_1_Supplement ; Latin_1
+blk; n/a ; Latin_Extended_A
+blk; n/a ; Latin_Extended_Additional
+blk; n/a ; Latin_Extended_B
+blk; n/a ; Latin_Extended_C
+blk; n/a ; Latin_Extended_D
+blk; n/a ; Lepcha
+blk; n/a ; Letterlike_Symbols
+blk; n/a ; Limbu
+blk; n/a ; Linear_B_Ideograms
+blk; n/a ; Linear_B_Syllabary
+blk; n/a ; Lisu
+blk; n/a ; Low_Surrogates
+blk; n/a ; Lycian
+blk; n/a ; Lydian
+blk; n/a ; Mahjong_Tiles
+blk; n/a ; Malayalam
+blk; n/a ; Mandaic
+blk; n/a ; Mathematical_Alphanumeric_Symbols
+blk; n/a ; Mathematical_Operators
+blk; n/a ; Meetei_Mayek
+blk; n/a ; Miscellaneous_Mathematical_Symbols_A
+blk; n/a ; Miscellaneous_Mathematical_Symbols_B
+blk; n/a ; Miscellaneous_Symbols
+blk; n/a ; Miscellaneous_Symbols_And_Arrows
+blk; n/a ; Miscellaneous_Symbols_And_Pictographs
+blk; n/a ; Miscellaneous_Technical
+blk; n/a ; Modifier_Tone_Letters
+blk; n/a ; Mongolian
+blk; n/a ; Musical_Symbols
+blk; n/a ; Myanmar
+blk; n/a ; Myanmar_Extended_A
+blk; n/a ; New_Tai_Lue
+blk; n/a ; NKo
+blk; n/a ; No_Block
+blk; n/a ; Number_Forms
+blk; n/a ; Ogham
+blk; n/a ; Ol_Chiki
+blk; n/a ; Old_Italic
+blk; n/a ; Old_Persian
+blk; n/a ; Old_South_Arabian
+blk; n/a ; Old_Turkic
+blk; n/a ; Optical_Character_Recognition
+blk; n/a ; Oriya
+blk; n/a ; Osmanya
+blk; n/a ; Phags_Pa
+blk; n/a ; Phaistos_Disc
+blk; n/a ; Phoenician
+blk; n/a ; Phonetic_Extensions
+blk; n/a ; Phonetic_Extensions_Supplement
+blk; n/a ; Playing_Cards
+blk; n/a ; Private_Use_Area ; Private_Use
+blk; n/a ; Rejang
+blk; n/a ; Rumi_Numeral_Symbols
+blk; n/a ; Runic
+blk; n/a ; Samaritan
+blk; n/a ; Saurashtra
+blk; n/a ; Shavian
+blk; n/a ; Sinhala
+blk; n/a ; Small_Form_Variants
+blk; n/a ; Spacing_Modifier_Letters
+blk; n/a ; Specials
+blk; n/a ; Sundanese
+blk; n/a ; Superscripts_And_Subscripts
+blk; n/a ; Supplemental_Arrows_A
+blk; n/a ; Supplemental_Arrows_B
+blk; n/a ; Supplemental_Mathematical_Operators
+blk; n/a ; Supplemental_Punctuation
+blk; n/a ; Supplementary_Private_Use_Area_A
+blk; n/a ; Supplementary_Private_Use_Area_B
+blk; n/a ; Syloti_Nagri
+blk; n/a ; Syriac
+blk; n/a ; Tagalog
+blk; n/a ; Tagbanwa
+blk; n/a ; Tags
+blk; n/a ; Tai_Le
+blk; n/a ; Tai_Tham
+blk; n/a ; Tai_Viet
+blk; n/a ; Tai_Xuan_Jing_Symbols
+blk; n/a ; Tamil
+blk; n/a ; Telugu
+blk; n/a ; Thaana
+blk; n/a ; Thai
+blk; n/a ; Tibetan
+blk; n/a ; Tifinagh
+blk; n/a ; Transport_And_Map_Symbols
+blk; n/a ; Ugaritic
+blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
+blk; n/a ; Unified_Canadian_Aboriginal_Syllabics_Extended
+blk; n/a ; Vai
+blk; n/a ; Variation_Selectors
+blk; n/a ; Variation_Selectors_Supplement
+blk; n/a ; Vedic_Extensions
+blk; n/a ; Vertical_Forms
+blk; n/a ; Yi_Radicals
+blk; n/a ; Yi_Syllables
+blk; n/a ; Yijing_Hexagram_Symbols
+
+# Canonical_Combining_Class (ccc)
+
+ccc; 0; NR ; Not_Reordered
+ccc; 1; OV ; Overlay
+ccc; 7; NK ; Nukta
+ccc; 8; KV ; Kana_Voicing
+ccc; 9; VR ; Virama
+ccc; 200; ATBL ; Attached_Below_Left
+ccc; 202; ATB ; Attached_Below
+ccc; 214; ATA ; Attached_Above
+ccc; 216; ATAR ; Attached_Above_Right
+ccc; 218; BL ; Below_Left
+ccc; 220; B ; Below
+ccc; 222; BR ; Below_Right
+ccc; 224; L ; Left
+ccc; 226; R ; Right
+ccc; 228; AL ; Above_Left
+ccc; 230; A ; Above
+ccc; 232; AR ; Above_Right
+ccc; 233; DB ; Double_Below
+ccc; 234; DA ; Double_Above
+ccc; 240; IS ; Iota_Subscript
+
+# Case_Folding (cf)
+
+# @missing: 0000..10FFFF; Case_Folding; <code point>
+
+# Case_Ignorable (CI)
+
+CI ; N ; No ; F ; False
+CI ; Y ; Yes ; T ; True
+
+# Cased (Cased)
+
+Cased; N ; No ; F ; False
+Cased; Y ; Yes ; T ; True
+
+# Changes_When_Casefolded (CWCF)
+
+CWCF; N ; No ; F ; False
+CWCF; Y ; Yes ; T ; True
+
+# Changes_When_Casemapped (CWCM)
+
+CWCM; N ; No ; F ; False
+CWCM; Y ; Yes ; T ; True
+
+# Changes_When_Lowercased (CWL)
+
+CWL; N ; No ; F ; False
+CWL; Y ; Yes ; T ; True
+
+# Changes_When_NFKC_Casefolded (CWKCF)
+
+CWKCF; N ; No ; F ; False
+CWKCF; Y ; Yes ; T ; True
+
+# Changes_When_Titlecased (CWT)
+
+CWT; N ; No ; F ; False
+CWT; Y ; Yes ; T ; True
+
+# Changes_When_Uppercased (CWU)
+
+CWU; N ; No ; F ; False
+CWU; Y ; Yes ; T ; True
+
+# Composition_Exclusion (CE)
+
+CE ; N ; No ; F ; False
+CE ; Y ; Yes ; T ; True
+
+# Dash (Dash)
+
+Dash; N ; No ; F ; False
+Dash; Y ; Yes ; T ; True
+
+# Decomposition_Mapping (dm)
+
+# @missing: 0000..10FFFF; Decomposition_Mapping; <code point>
+
+# Decomposition_Type (dt)
+
+dt ; Can ; Canonical ; can
+dt ; Com ; Compat ; com
+dt ; Enc ; Circle ; enc
+dt ; Fin ; Final ; fin
+dt ; Font ; font
+dt ; Fra ; Fraction ; fra
+dt ; Init ; Initial ; init
+dt ; Iso ; Isolated ; iso
+dt ; Med ; Medial ; med
+dt ; Nar ; Narrow ; nar
+dt ; Nb ; Nobreak ; nb
+dt ; None ; none
+dt ; Sml ; Small ; sml
+dt ; Sqr ; Square ; sqr
+dt ; Sub ; sub
+dt ; Sup ; Super ; sup
+dt ; Vert ; Vertical ; vert
+dt ; Wide ; wide
+
+# Default_Ignorable_Code_Point (DI)
+
+DI ; N ; No ; F ; False
+DI ; Y ; Yes ; T ; True
+
+# Deprecated (Dep)
+
+Dep; N ; No ; F ; False
+Dep; Y ; Yes ; T ; True
+
+# Diacritic (Dia)
+
+Dia; N ; No ; F ; False
+Dia; Y ; Yes ; T ; True
+
+# East_Asian_Width (ea)
+
+ea ; A ; Ambiguous
+ea ; F ; Fullwidth
+ea ; H ; Halfwidth
+ea ; N ; Neutral
+ea ; Na ; Narrow
+ea ; W ; Wide
+
+# Expands_On_NFC (XO_NFC)
+
+XO_NFC; N ; No ; F ; False
+XO_NFC; Y ; Yes ; T ; True
+
+# Expands_On_NFD (XO_NFD)
+
+XO_NFD; N ; No ; F ; False
+XO_NFD; Y ; Yes ; T ; True
+
+# Expands_On_NFKC (XO_NFKC)
+
+XO_NFKC; N ; No ; F ; False
+XO_NFKC; Y ; Yes ; T ; True
+
+# Expands_On_NFKD (XO_NFKD)
+
+XO_NFKD; N ; No ; F ; False
+XO_NFKD; Y ; Yes ; T ; True
+
+# Extender (Ext)
+
+Ext; N ; No ; F ; False
+Ext; Y ; Yes ; T ; True
+
+# FC_NFKC_Closure (FC_NFKC)
+
+# @missing: 0000..10FFFF; FC_NFKC_Closure; <code point>
+
+# Full_Composition_Exclusion (Comp_Ex)
+
+Comp_Ex; N ; No ; F ; False
+Comp_Ex; Y ; Yes ; T ; True
+
+# General_Category (gc)
+
+gc ; C ; Other # Cc | Cf | Cn | Co | Cs
+gc ; Cc ; Control ; cntrl
+gc ; Cf ; Format
+gc ; Cn ; Unassigned
+gc ; Co ; Private_Use
+gc ; Cs ; Surrogate
+gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu
+gc ; LC ; Cased_Letter # Ll | Lt | Lu
+gc ; Ll ; Lowercase_Letter
+gc ; Lm ; Modifier_Letter
+gc ; Lo ; Other_Letter
+gc ; Lt ; Titlecase_Letter
+gc ; Lu ; Uppercase_Letter
+gc ; M ; Mark # Mc | Me | Mn
+gc ; Mc ; Spacing_Mark
+gc ; Me ; Enclosing_Mark
+gc ; Mn ; Nonspacing_Mark
+gc ; N ; Number # Nd | Nl | No
+gc ; Nd ; Decimal_Number ; digit
+gc ; Nl ; Letter_Number
+gc ; No ; Other_Number
+gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps
+gc ; Pc ; Connector_Punctuation
+gc ; Pd ; Dash_Punctuation
+gc ; Pe ; Close_Punctuation
+gc ; Pf ; Final_Punctuation
+gc ; Pi ; Initial_Punctuation
+gc ; Po ; Other_Punctuation
+gc ; Ps ; Open_Punctuation
+gc ; S ; Symbol # Sc | Sk | Sm | So
+gc ; Sc ; Currency_Symbol
+gc ; Sk ; Modifier_Symbol
+gc ; Sm ; Math_Symbol
+gc ; So ; Other_Symbol
+gc ; Z ; Separator # Zl | Zp | Zs
+gc ; Zl ; Line_Separator
+gc ; Zp ; Paragraph_Separator
+gc ; Zs ; Space_Separator
+
+# Grapheme_Base (Gr_Base)
+
+Gr_Base; N ; No ; F ; False
+Gr_Base; Y ; Yes ; T ; True
+
+# Grapheme_Cluster_Break (GCB)
+
+GCB; CN ; Control
+GCB; CR ; CR
+GCB; EX ; Extend
+GCB; L ; L
+GCB; LF ; LF
+GCB; LV ; LV
+GCB; LVT ; LVT
+GCB; PP ; Prepend
+GCB; SM ; SpacingMark
+GCB; T ; T
+GCB; V ; V
+GCB; XX ; Other
+
+# Grapheme_Extend (Gr_Ext)
+
+Gr_Ext; N ; No ; F ; False
+Gr_Ext; Y ; Yes ; T ; True
+
+# Grapheme_Link (Gr_Link)
+
+Gr_Link; N ; No ; F ; False
+Gr_Link; Y ; Yes ; T ; True
+
+# Hangul_Syllable_Type (hst)
+
+hst; L ; Leading_Jamo
+hst; LV ; LV_Syllable
+hst; LVT ; LVT_Syllable
+hst; NA ; Not_Applicable
+hst; T ; Trailing_Jamo
+hst; V ; Vowel_Jamo
+
+# Hex_Digit (Hex)
+
+Hex; N ; No ; F ; False
+Hex; Y ; Yes ; T ; True
+
+# Hyphen (Hyphen)
+
+Hyphen; N ; No ; F ; False
+Hyphen; Y ; Yes ; T ; True
+
+# IDS_Binary_Operator (IDSB)
+
+IDSB; N ; No ; F ; False
+IDSB; Y ; Yes ; T ; True
+
+# IDS_Trinary_Operator (IDST)
+
+IDST; N ; No ; F ; False
+IDST; Y ; Yes ; T ; True
+
+# ID_Continue (IDC)
+
+IDC; N ; No ; F ; False
+IDC; Y ; Yes ; T ; True
+
+# ID_Start (IDS)
+
+IDS; N ; No ; F ; False
+IDS; Y ; Yes ; T ; True
+
+# ISO_Comment (isc)
+
+# @missing: 0000..10FFFF; ISO_Comment; <none>
+
+# Ideographic (Ideo)
+
+Ideo; N ; No ; F ; False
+Ideo; Y ; Yes ; T ; True
+
+# Jamo_Short_Name (JSN)
+
+# @missing: 0000..10FFFF; Jamo_Short_Name; <none>
+JSN; A ; A
+JSN; AE ; AE
+JSN; B ; B
+JSN; BB ; BB
+JSN; BS ; BS
+JSN; C ; C
+JSN; D ; D
+JSN; DD ; DD
+JSN; E ; E
+JSN; EO ; EO
+JSN; EU ; EU
+JSN; G ; G
+JSN; GG ; GG
+JSN; GS ; GS
+JSN; H ; H
+JSN; I ; I
+JSN; J ; J
+JSN; JJ ; JJ
+JSN; K ; K
+JSN; L ; L
+JSN; LB ; LB
+JSN; LG ; LG
+JSN; LH ; LH
+JSN; LM ; LM
+JSN; LP ; LP
+JSN; LS ; LS
+JSN; LT ; LT
+JSN; M ; M
+JSN; N ; N
+JSN; NG ; NG
+JSN; NH ; NH
+JSN; NJ ; NJ
+JSN; O ; O
+JSN; OE ; OE
+JSN; P ; P
+JSN; R ; R
+JSN; S ; S
+JSN; SS ; SS
+JSN; T ; T
+JSN; U ; U
+JSN; WA ; WA
+JSN; WAE ; WAE
+JSN; WE ; WE
+JSN; WEO ; WEO
+JSN; WI ; WI
+JSN; YA ; YA
+JSN; YAE ; YAE
+JSN; YE ; YE
+JSN; YEO ; YEO
+JSN; YI ; YI
+JSN; YO ; YO
+JSN; YU ; YU
+
+# Join_Control (Join_C)
+
+Join_C; N ; No ; F ; False
+Join_C; Y ; Yes ; T ; True
+
+# Joining_Group (jg)
+
+jg ; n/a ; Ain
+jg ; n/a ; Alaph
+jg ; n/a ; Alef
+jg ; n/a ; Beh
+jg ; n/a ; Beth
+jg ; n/a ; Burushaski_Yeh_Barree
+jg ; n/a ; Dal
+jg ; n/a ; Dalath_Rish
+jg ; n/a ; E
+jg ; n/a ; Farsi_Yeh
+jg ; n/a ; Fe
+jg ; n/a ; Feh
+jg ; n/a ; Final_Semkath
+jg ; n/a ; Gaf
+jg ; n/a ; Gamal
+jg ; n/a ; Hah
+jg ; n/a ; He
+jg ; n/a ; Heh
+jg ; n/a ; Heh_Goal
+jg ; n/a ; Heth
+jg ; n/a ; Kaf
+jg ; n/a ; Kaph
+jg ; n/a ; Khaph
+jg ; n/a ; Knotted_Heh
+jg ; n/a ; Lam
+jg ; n/a ; Lamadh
+jg ; n/a ; Meem
+jg ; n/a ; Mim
+jg ; n/a ; No_Joining_Group
+jg ; n/a ; Noon
+jg ; n/a ; Nun
+jg ; n/a ; Nya
+jg ; n/a ; Pe
+jg ; n/a ; Qaf
+jg ; n/a ; Qaph
+jg ; n/a ; Reh
+jg ; n/a ; Reversed_Pe
+jg ; n/a ; Sad
+jg ; n/a ; Sadhe
+jg ; n/a ; Seen
+jg ; n/a ; Semkath
+jg ; n/a ; Shin
+jg ; n/a ; Swash_Kaf
+jg ; n/a ; Syriac_Waw
+jg ; n/a ; Tah
+jg ; n/a ; Taw
+jg ; n/a ; Teh_Marbuta
+jg ; n/a ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal
+jg ; n/a ; Teth
+jg ; n/a ; Waw
+jg ; n/a ; Yeh
+jg ; n/a ; Yeh_Barree
+jg ; n/a ; Yeh_With_Tail
+jg ; n/a ; Yudh
+jg ; n/a ; Yudh_He
+jg ; n/a ; Zain
+jg ; n/a ; Zhain
+
+# Joining_Type (jt)
+
+jt ; C ; Join_Causing
+jt ; D ; Dual_Joining
+jt ; L ; Left_Joining
+jt ; R ; Right_Joining
+jt ; T ; Transparent
+jt ; U ; Non_Joining
+
+# Line_Break (lb)
+
+lb ; AI ; Ambiguous
+lb ; AL ; Alphabetic
+lb ; B2 ; Break_Both
+lb ; BA ; Break_After
+lb ; BB ; Break_Before
+lb ; BK ; Mandatory_Break
+lb ; CB ; Contingent_Break
+lb ; CL ; Close_Punctuation
+lb ; CM ; Combining_Mark
+lb ; CP ; Close_Parenthesis
+lb ; CR ; Carriage_Return
+lb ; EX ; Exclamation
+lb ; GL ; Glue
+lb ; H2 ; H2
+lb ; H3 ; H3
+lb ; HY ; Hyphen
+lb ; ID ; Ideographic
+lb ; IN ; Inseparable ; Inseperable
+lb ; IS ; Infix_Numeric
+lb ; JL ; JL
+lb ; JT ; JT
+lb ; JV ; JV
+lb ; LF ; Line_Feed
+lb ; NL ; Next_Line
+lb ; NS ; Nonstarter
+lb ; NU ; Numeric
+lb ; OP ; Open_Punctuation
+lb ; PO ; Postfix_Numeric
+lb ; PR ; Prefix_Numeric
+lb ; QU ; Quotation
+lb ; SA ; Complex_Context
+lb ; SG ; Surrogate
+lb ; SP ; Space
+lb ; SY ; Break_Symbols
+lb ; WJ ; Word_Joiner
+lb ; XX ; Unknown
+lb ; ZW ; ZWSpace
+
+# Logical_Order_Exception (LOE)
+
+LOE; N ; No ; F ; False
+LOE; Y ; Yes ; T ; True
+
+# Lowercase (Lower)
+
+Lower; N ; No ; F ; False
+Lower; Y ; Yes ; T ; True
+
+# Lowercase_Mapping (lc)
+
+# @missing: 0000..10FFFF; Lowercase_Mapping; <code point>
+
+# Math (Math)
+
+Math; N ; No ; F ; False
+Math; Y ; Yes ; T ; True
+
+# NFC_Quick_Check (NFC_QC)
+
+NFC_QC; M ; Maybe
+NFC_QC; N ; No
+NFC_QC; Y ; Yes
+
+# NFD_Quick_Check (NFD_QC)
+
+NFD_QC; N ; No
+NFD_QC; Y ; Yes
+
+# NFKC_Casefold (NFKC_CF)
+
+# @missing: 0000..10FFFF; NFKC_Casefold; <code point>
+
+# NFKC_Quick_Check (NFKC_QC)
+
+NFKC_QC; M ; Maybe
+NFKC_QC; N ; No
+NFKC_QC; Y ; Yes
+
+# NFKD_Quick_Check (NFKD_QC)
+
+NFKD_QC; N ; No
+NFKD_QC; Y ; Yes
+
+# Name (na)
+
+# @missing: 0000..10FFFF; Name; <none>
+
+# Name_Alias (Name_Alias)
+
+# @missing: 0000..10FFFF; Name_Alias; <none>
+
+# Noncharacter_Code_Point (NChar)
+
+NChar; N ; No ; F ; False
+NChar; Y ; Yes ; T ; True
+
+# Numeric_Type (nt)
+
+nt ; De ; Decimal
+nt ; Di ; Digit
+nt ; None ; None
+nt ; Nu ; Numeric
+
+# Numeric_Value (nv)
+
+# @missing: 0000..10FFFF; Numeric_Value; NaN
+
+# Other_Alphabetic (OAlpha)
+
+OAlpha; N ; No ; F ; False
+OAlpha; Y ; Yes ; T ; True
+
+# Other_Default_Ignorable_Code_Point (ODI)
+
+ODI; N ; No ; F ; False
+ODI; Y ; Yes ; T ; True
+
+# Other_Grapheme_Extend (OGr_Ext)
+
+OGr_Ext; N ; No ; F ; False
+OGr_Ext; Y ; Yes ; T ; True
+
+# Other_ID_Continue (OIDC)
+
+OIDC; N ; No ; F ; False
+OIDC; Y ; Yes ; T ; True
+
+# Other_ID_Start (OIDS)
+
+OIDS; N ; No ; F ; False
+OIDS; Y ; Yes ; T ; True
+
+# Other_Lowercase (OLower)
+
+OLower; N ; No ; F ; False
+OLower; Y ; Yes ; T ; True
+
+# Other_Math (OMath)
+
+OMath; N ; No ; F ; False
+OMath; Y ; Yes ; T ; True
+
+# Other_Uppercase (OUpper)
+
+OUpper; N ; No ; F ; False
+OUpper; Y ; Yes ; T ; True
+
+# Pattern_Syntax (Pat_Syn)
+
+Pat_Syn; N ; No ; F ; False
+Pat_Syn; Y ; Yes ; T ; True
+
+# Pattern_White_Space (Pat_WS)
+
+Pat_WS; N ; No ; F ; False
+Pat_WS; Y ; Yes ; T ; True
+
+# Quotation_Mark (QMark)
+
+QMark; N ; No ; F ; False
+QMark; Y ; Yes ; T ; True
+
+# Radical (Radical)
+
+Radical; N ; No ; F ; False
+Radical; Y ; Yes ; T ; True
+
+# STerm (STerm)
+
+STerm; N ; No ; F ; False
+STerm; Y ; Yes ; T ; True
+
+# Script (sc)
+
+sc ; Arab ; Arabic
+sc ; Armi ; Imperial_Aramaic
+sc ; Armn ; Armenian
+sc ; Avst ; Avestan
+sc ; Bali ; Balinese
+sc ; Bamu ; Bamum
+sc ; Batk ; Batak
+sc ; Beng ; Bengali
+sc ; Bopo ; Bopomofo
+sc ; Brah ; Brahmi
+sc ; Brai ; Braille
+sc ; Bugi ; Buginese
+sc ; Buhd ; Buhid
+sc ; Cans ; Canadian_Aboriginal
+sc ; Cari ; Carian
+sc ; Cham ; Cham
+sc ; Cher ; Cherokee
+sc ; Copt ; Coptic ; Qaac
+sc ; Cprt ; Cypriot
+sc ; Cyrl ; Cyrillic
+sc ; Deva ; Devanagari
+sc ; Dsrt ; Deseret
+sc ; Egyp ; Egyptian_Hieroglyphs
+sc ; Ethi ; Ethiopic
+sc ; Geor ; Georgian
+sc ; Glag ; Glagolitic
+sc ; Goth ; Gothic
+sc ; Grek ; Greek
+sc ; Gujr ; Gujarati
+sc ; Guru ; Gurmukhi
+sc ; Hang ; Hangul
+sc ; Hani ; Han
+sc ; Hano ; Hanunoo
+sc ; Hebr ; Hebrew
+sc ; Hira ; Hiragana
+sc ; Hrkt ; Katakana_Or_Hiragana
+sc ; Ital ; Old_Italic
+sc ; Java ; Javanese
+sc ; Kali ; Kayah_Li
+sc ; Kana ; Katakana
+sc ; Khar ; Kharoshthi
+sc ; Khmr ; Khmer
+sc ; Knda ; Kannada
+sc ; Kthi ; Kaithi
+sc ; Lana ; Tai_Tham
+sc ; Laoo ; Lao
+sc ; Latn ; Latin
+sc ; Lepc ; Lepcha
+sc ; Limb ; Limbu
+sc ; Linb ; Linear_B
+sc ; Lisu ; Lisu
+sc ; Lyci ; Lycian
+sc ; Lydi ; Lydian
+sc ; Mand ; Mandaic
+sc ; Mlym ; Malayalam
+sc ; Mong ; Mongolian
+sc ; Mtei ; Meetei_Mayek
+sc ; Mymr ; Myanmar
+sc ; Nkoo ; Nko
+sc ; Ogam ; Ogham
+sc ; Olck ; Ol_Chiki
+sc ; Orkh ; Old_Turkic
+sc ; Orya ; Oriya
+sc ; Osma ; Osmanya
+sc ; Phag ; Phags_Pa
+sc ; Phli ; Inscriptional_Pahlavi
+sc ; Phnx ; Phoenician
+sc ; Prti ; Inscriptional_Parthian
+sc ; Rjng ; Rejang
+sc ; Runr ; Runic
+sc ; Samr ; Samaritan
+sc ; Sarb ; Old_South_Arabian
+sc ; Saur ; Saurashtra
+sc ; Shaw ; Shavian
+sc ; Sinh ; Sinhala
+sc ; Sund ; Sundanese
+sc ; Sylo ; Syloti_Nagri
+sc ; Syrc ; Syriac
+sc ; Tagb ; Tagbanwa
+sc ; Tale ; Tai_Le
+sc ; Talu ; New_Tai_Lue
+sc ; Taml ; Tamil
+sc ; Tavt ; Tai_Viet
+sc ; Telu ; Telugu
+sc ; Tfng ; Tifinagh
+sc ; Tglg ; Tagalog
+sc ; Thaa ; Thaana
+sc ; Thai ; Thai
+sc ; Tibt ; Tibetan
+sc ; Ugar ; Ugaritic
+sc ; Vaii ; Vai
+sc ; Xpeo ; Old_Persian
+sc ; Xsux ; Cuneiform
+sc ; Yiii ; Yi
+sc ; Zinh ; Inherited ; Qaai
+sc ; Zyyy ; Common
+sc ; Zzzz ; Unknown
+
+# Sentence_Break (SB)
+
+SB ; AT ; ATerm
+SB ; CL ; Close
+SB ; CR ; CR
+SB ; EX ; Extend
+SB ; FO ; Format
+SB ; LE ; OLetter
+SB ; LF ; LF
+SB ; LO ; Lower
+SB ; NU ; Numeric
+SB ; SC ; SContinue
+SB ; SE ; Sep
+SB ; SP ; Sp
+SB ; ST ; STerm
+SB ; UP ; Upper
+SB ; XX ; Other
+
+# Simple_Case_Folding (scf)
+
+# @missing: 0000..10FFFF; Simple_Case_Folding; <code point>
+
+# Simple_Lowercase_Mapping (slc)
+
+# @missing: 0000..10FFFF; Simple_Lowercase_Mapping; <code point>
+
+# Simple_Titlecase_Mapping (stc)
+
+# @missing: 0000..10FFFF; Simple_Titlecase_Mapping; <code point>
+
+# Simple_Uppercase_Mapping (suc)
+
+# @missing: 0000..10FFFF; Simple_Uppercase_Mapping; <code point>
+
+# Soft_Dotted (SD)
+
+SD ; N ; No ; F ; False
+SD ; Y ; Yes ; T ; True
+
+# Terminal_Punctuation (Term)
+
+Term; N ; No ; F ; False
+Term; Y ; Yes ; T ; True
+
+# Titlecase_Mapping (tc)
+
+# @missing: 0000..10FFFF; Titlecase_Mapping; <code point>
+
+# Unicode_1_Name (na1)
+
+# @missing: 0000..10FFFF; Unicode_1_Name; <none>
+
+# Unified_Ideograph (UIdeo)
+
+UIdeo; N ; No ; F ; False
+UIdeo; Y ; Yes ; T ; True
+
+# Uppercase (Upper)
+
+Upper; N ; No ; F ; False
+Upper; Y ; Yes ; T ; True
+
+# Uppercase_Mapping (uc)
+
+# @missing: 0000..10FFFF; Uppercase_Mapping; <code point>
+
+# Variation_Selector (VS)
+
+VS ; N ; No ; F ; False
+VS ; Y ; Yes ; T ; True
+
+# White_Space (WSpace)
+
+WSpace; N ; No ; F ; False
+WSpace; Y ; Yes ; T ; True
+
+# Word_Break (WB)
+
+WB ; CR ; CR
+WB ; EX ; ExtendNumLet
+WB ; Extend ; Extend
+WB ; FO ; Format
+WB ; KA ; Katakana
+WB ; LE ; ALetter
+WB ; LF ; LF
+WB ; MB ; MidNumLet
+WB ; ML ; MidLetter
+WB ; MN ; MidNum
+WB ; NL ; Newline
+WB ; NU ; Numeric
+WB ; XX ; Other
+
+# XID_Continue (XIDC)
+
+XIDC; N ; No ; F ; False
+XIDC; Y ; Yes ; T ; True
+
+# XID_Start (XIDS)
+
+XIDS; N ; No ; F ; False
+XIDS; Y ; Yes ; T ; True
+
+# cjkAccountingNumeric (cjkAccountingNumeric)
+
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
+
+# cjkCompatibilityVariant (cjkCompatibilityVariant)
+
+# @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
+
+# cjkIICore (cjkIICore)
+
+# @missing: 0000..10FFFF; cjkIICore; <none>
+
+# cjkIRG_GSource (cjkIRG_GSource)
+
+# @missing: 0000..10FFFF; cjkIRG_GSource; <none>
+
+# cjkIRG_HSource (cjkIRG_HSource)
+
+# @missing: 0000..10FFFF; cjkIRG_HSource; <none>
+
+# cjkIRG_JSource (cjkIRG_JSource)
+
+# @missing: 0000..10FFFF; cjkIRG_JSource; <none>
+
+# cjkIRG_KPSource (cjkIRG_KPSource)
+
+# @missing: 0000..10FFFF; cjkIRG_KPSource; <none>
+
+# cjkIRG_KSource (cjkIRG_KSource)
+
+# @missing: 0000..10FFFF; cjkIRG_KSource; <none>
+
+# cjkIRG_MSource (cjkIRG_MSource)
+
+# @missing: 0000..10FFFF; cjkIRG_MSource; <none>
+
+# cjkIRG_TSource (cjkIRG_TSource)
+
+# @missing: 0000..10FFFF; cjkIRG_TSource; <none>
+
+# cjkIRG_USource (cjkIRG_USource)
+
+# @missing: 0000..10FFFF; cjkIRG_USource; <none>
+
+# cjkIRG_VSource (cjkIRG_VSource)
+
+# @missing: 0000..10FFFF; cjkIRG_VSource; <none>
+
+# cjkOtherNumeric (cjkOtherNumeric)
+
+# @missing: 0000..10FFFF; cjkOtherNumeric; NaN
+
+# cjkPrimaryNumeric (cjkPrimaryNumeric)
+
+# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
+
+# cjkRSUnicode (cjkRSUnicode)
+
+# @missing: 0000..10FFFF; cjkRSUnicode; <none>
+
+# EOF