8202771: Migrate Unicode character tests to JDK Repo
Reviewed-by: naoto
Contributed-by: dan.z.zhou@oracle.com
--- a/test/jdk/java/lang/Character/Blocks.txt Tue May 22 21:50:45 2018 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,316 +0,0 @@
-# Blocks-10.0.0.txt
-# Date: 2017-04-12, 17:30:00 GMT [KW]
-# Copyright (c) 2017 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
-#
-# Unicode Character Database
-# For documentation, see http://www.unicode.org/reports/tr44/
-#
-# Format:
-# Start Code..End Code; Block Name
-
-# ================================================
-
-# Note: When comparing block names, casing, whitespace, hyphens,
-# and underbars are ignored.
-# For example, "Latin Extended-A" and "latin extended a" are equivalent.
-# For more information on the comparison of property values,
-# see UAX #44: http://www.unicode.org/reports/tr44/
-#
-# All block ranges start with a value where (cp MOD 16) = 0,
-# and end with a value where (cp MOD 16) = 15. In other words,
-# the last hexadecimal digit of the start of range is ...0
-# and the last hexadecimal digit of the end of range is ...F.
-# This constraint on block ranges guarantees that allocations
-# are done in terms of whole columns, and that code chart display
-# never involves splitting columns in the charts.
-#
-# All code points not explicitly listed for Block
-# have the value No_Block.
-
-# Property: Block
-#
-# @missing: 0000..10FFFF; No_Block
-
-0000..007F; Basic Latin
-0080..00FF; Latin-1 Supplement
-0100..017F; Latin Extended-A
-0180..024F; Latin Extended-B
-0250..02AF; IPA Extensions
-02B0..02FF; Spacing Modifier Letters
-0300..036F; Combining Diacritical Marks
-0370..03FF; Greek and Coptic
-0400..04FF; Cyrillic
-0500..052F; Cyrillic Supplement
-0530..058F; Armenian
-0590..05FF; Hebrew
-0600..06FF; Arabic
-0700..074F; Syriac
-0750..077F; Arabic Supplement
-0780..07BF; Thaana
-07C0..07FF; NKo
-0800..083F; Samaritan
-0840..085F; Mandaic
-0860..086F; Syriac Supplement
-08A0..08FF; Arabic Extended-A
-0900..097F; Devanagari
-0980..09FF; Bengali
-0A00..0A7F; Gurmukhi
-0A80..0AFF; Gujarati
-0B00..0B7F; Oriya
-0B80..0BFF; Tamil
-0C00..0C7F; Telugu
-0C80..0CFF; Kannada
-0D00..0D7F; Malayalam
-0D80..0DFF; Sinhala
-0E00..0E7F; Thai
-0E80..0EFF; Lao
-0F00..0FFF; Tibetan
-1000..109F; Myanmar
-10A0..10FF; Georgian
-1100..11FF; Hangul Jamo
-1200..137F; Ethiopic
-1380..139F; Ethiopic Supplement
-13A0..13FF; Cherokee
-1400..167F; Unified Canadian Aboriginal Syllabics
-1680..169F; Ogham
-16A0..16FF; Runic
-1700..171F; Tagalog
-1720..173F; Hanunoo
-1740..175F; Buhid
-1760..177F; Tagbanwa
-1780..17FF; Khmer
-1800..18AF; Mongolian
-18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
-1900..194F; Limbu
-1950..197F; Tai Le
-1980..19DF; New Tai Lue
-19E0..19FF; Khmer Symbols
-1A00..1A1F; Buginese
-1A20..1AAF; Tai Tham
-1AB0..1AFF; Combining Diacritical Marks Extended
-1B00..1B7F; Balinese
-1B80..1BBF; Sundanese
-1BC0..1BFF; Batak
-1C00..1C4F; Lepcha
-1C50..1C7F; Ol Chiki
-1C80..1C8F; Cyrillic Extended-C
-1CC0..1CCF; Sundanese Supplement
-1CD0..1CFF; Vedic Extensions
-1D00..1D7F; Phonetic Extensions
-1D80..1DBF; Phonetic Extensions Supplement
-1DC0..1DFF; Combining Diacritical Marks Supplement
-1E00..1EFF; Latin Extended Additional
-1F00..1FFF; Greek Extended
-2000..206F; General Punctuation
-2070..209F; Superscripts and Subscripts
-20A0..20CF; Currency Symbols
-20D0..20FF; Combining Diacritical Marks for Symbols
-2100..214F; Letterlike Symbols
-2150..218F; Number Forms
-2190..21FF; Arrows
-2200..22FF; Mathematical Operators
-2300..23FF; Miscellaneous Technical
-2400..243F; Control Pictures
-2440..245F; Optical Character Recognition
-2460..24FF; Enclosed Alphanumerics
-2500..257F; Box Drawing
-2580..259F; Block Elements
-25A0..25FF; Geometric Shapes
-2600..26FF; Miscellaneous Symbols
-2700..27BF; Dingbats
-27C0..27EF; Miscellaneous Mathematical Symbols-A
-27F0..27FF; Supplemental Arrows-A
-2800..28FF; Braille Patterns
-2900..297F; Supplemental Arrows-B
-2980..29FF; Miscellaneous Mathematical Symbols-B
-2A00..2AFF; Supplemental Mathematical Operators
-2B00..2BFF; Miscellaneous Symbols and Arrows
-2C00..2C5F; Glagolitic
-2C60..2C7F; Latin Extended-C
-2C80..2CFF; Coptic
-2D00..2D2F; Georgian Supplement
-2D30..2D7F; Tifinagh
-2D80..2DDF; Ethiopic Extended
-2DE0..2DFF; Cyrillic Extended-A
-2E00..2E7F; Supplemental Punctuation
-2E80..2EFF; CJK Radicals Supplement
-2F00..2FDF; Kangxi Radicals
-2FF0..2FFF; Ideographic Description Characters
-3000..303F; CJK Symbols and Punctuation
-3040..309F; Hiragana
-30A0..30FF; Katakana
-3100..312F; Bopomofo
-3130..318F; Hangul Compatibility Jamo
-3190..319F; Kanbun
-31A0..31BF; Bopomofo Extended
-31C0..31EF; CJK Strokes
-31F0..31FF; Katakana Phonetic Extensions
-3200..32FF; Enclosed CJK Letters and Months
-3300..33FF; CJK Compatibility
-3400..4DBF; CJK Unified Ideographs Extension A
-4DC0..4DFF; Yijing Hexagram Symbols
-4E00..9FFF; CJK Unified Ideographs
-A000..A48F; Yi Syllables
-A490..A4CF; Yi Radicals
-A4D0..A4FF; Lisu
-A500..A63F; Vai
-A640..A69F; Cyrillic Extended-B
-A6A0..A6FF; Bamum
-A700..A71F; Modifier Tone Letters
-A720..A7FF; Latin Extended-D
-A800..A82F; Syloti Nagri
-A830..A83F; Common Indic Number Forms
-A840..A87F; Phags-pa
-A880..A8DF; Saurashtra
-A8E0..A8FF; Devanagari Extended
-A900..A92F; Kayah Li
-A930..A95F; Rejang
-A960..A97F; Hangul Jamo Extended-A
-A980..A9DF; Javanese
-A9E0..A9FF; Myanmar Extended-B
-AA00..AA5F; Cham
-AA60..AA7F; Myanmar Extended-A
-AA80..AADF; Tai Viet
-AAE0..AAFF; Meetei Mayek Extensions
-AB00..AB2F; Ethiopic Extended-A
-AB30..AB6F; Latin Extended-E
-AB70..ABBF; Cherokee Supplement
-ABC0..ABFF; Meetei Mayek
-AC00..D7AF; Hangul Syllables
-D7B0..D7FF; Hangul Jamo Extended-B
-D800..DB7F; High Surrogates
-DB80..DBFF; High Private Use Surrogates
-DC00..DFFF; Low Surrogates
-E000..F8FF; Private Use Area
-F900..FAFF; CJK Compatibility Ideographs
-FB00..FB4F; Alphabetic Presentation Forms
-FB50..FDFF; Arabic Presentation Forms-A
-FE00..FE0F; Variation Selectors
-FE10..FE1F; Vertical Forms
-FE20..FE2F; Combining Half Marks
-FE30..FE4F; CJK Compatibility Forms
-FE50..FE6F; Small Form Variants
-FE70..FEFF; Arabic Presentation Forms-B
-FF00..FFEF; Halfwidth and Fullwidth Forms
-FFF0..FFFF; Specials
-10000..1007F; Linear B Syllabary
-10080..100FF; Linear B Ideograms
-10100..1013F; Aegean Numbers
-10140..1018F; Ancient Greek Numbers
-10190..101CF; Ancient Symbols
-101D0..101FF; Phaistos Disc
-10280..1029F; Lycian
-102A0..102DF; Carian
-102E0..102FF; Coptic Epact Numbers
-10300..1032F; Old Italic
-10330..1034F; Gothic
-10350..1037F; Old Permic
-10380..1039F; Ugaritic
-103A0..103DF; Old Persian
-10400..1044F; Deseret
-10450..1047F; Shavian
-10480..104AF; Osmanya
-104B0..104FF; Osage
-10500..1052F; Elbasan
-10530..1056F; Caucasian Albanian
-10600..1077F; Linear A
-10800..1083F; Cypriot Syllabary
-10840..1085F; Imperial Aramaic
-10860..1087F; Palmyrene
-10880..108AF; Nabataean
-108E0..108FF; Hatran
-10900..1091F; Phoenician
-10920..1093F; Lydian
-10980..1099F; Meroitic Hieroglyphs
-109A0..109FF; Meroitic Cursive
-10A00..10A5F; Kharoshthi
-10A60..10A7F; Old South Arabian
-10A80..10A9F; Old North Arabian
-10AC0..10AFF; Manichaean
-10B00..10B3F; Avestan
-10B40..10B5F; Inscriptional Parthian
-10B60..10B7F; Inscriptional Pahlavi
-10B80..10BAF; Psalter Pahlavi
-10C00..10C4F; Old Turkic
-10C80..10CFF; Old Hungarian
-10E60..10E7F; Rumi Numeral Symbols
-11000..1107F; Brahmi
-11080..110CF; Kaithi
-110D0..110FF; Sora Sompeng
-11100..1114F; Chakma
-11150..1117F; Mahajani
-11180..111DF; Sharada
-111E0..111FF; Sinhala Archaic Numbers
-11200..1124F; Khojki
-11280..112AF; Multani
-112B0..112FF; Khudawadi
-11300..1137F; Grantha
-11400..1147F; Newa
-11480..114DF; Tirhuta
-11580..115FF; Siddham
-11600..1165F; Modi
-11660..1167F; Mongolian Supplement
-11680..116CF; Takri
-11700..1173F; Ahom
-118A0..118FF; Warang Citi
-11A00..11A4F; Zanabazar Square
-11A50..11AAF; Soyombo
-11AC0..11AFF; Pau Cin Hau
-11C00..11C6F; Bhaiksuki
-11C70..11CBF; Marchen
-11D00..11D5F; Masaram Gondi
-12000..123FF; Cuneiform
-12400..1247F; Cuneiform Numbers and Punctuation
-12480..1254F; Early Dynastic Cuneiform
-13000..1342F; Egyptian Hieroglyphs
-14400..1467F; Anatolian Hieroglyphs
-16800..16A3F; Bamum Supplement
-16A40..16A6F; Mro
-16AD0..16AFF; Bassa Vah
-16B00..16B8F; Pahawh Hmong
-16F00..16F9F; Miao
-16FE0..16FFF; Ideographic Symbols and Punctuation
-17000..187FF; Tangut
-18800..18AFF; Tangut Components
-1B000..1B0FF; Kana Supplement
-1B100..1B12F; Kana Extended-A
-1B170..1B2FF; Nushu
-1BC00..1BC9F; Duployan
-1BCA0..1BCAF; Shorthand Format Controls
-1D000..1D0FF; Byzantine Musical Symbols
-1D100..1D1FF; Musical Symbols
-1D200..1D24F; Ancient Greek Musical Notation
-1D300..1D35F; Tai Xuan Jing Symbols
-1D360..1D37F; Counting Rod Numerals
-1D400..1D7FF; Mathematical Alphanumeric Symbols
-1D800..1DAAF; Sutton SignWriting
-1E000..1E02F; Glagolitic Supplement
-1E800..1E8DF; Mende Kikakui
-1E900..1E95F; Adlam
-1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
-1F000..1F02F; Mahjong Tiles
-1F030..1F09F; Domino Tiles
-1F0A0..1F0FF; Playing Cards
-1F100..1F1FF; Enclosed Alphanumeric Supplement
-1F200..1F2FF; Enclosed Ideographic Supplement
-1F300..1F5FF; Miscellaneous Symbols and Pictographs
-1F600..1F64F; Emoticons
-1F650..1F67F; Ornamental Dingbats
-1F680..1F6FF; Transport and Map Symbols
-1F700..1F77F; Alchemical Symbols
-1F780..1F7FF; Geometric Shapes Extended
-1F800..1F8FF; Supplemental Arrows-C
-1F900..1F9FF; Supplemental Symbols and Pictographs
-20000..2A6DF; CJK Unified Ideographs Extension B
-2A700..2B73F; CJK Unified Ideographs Extension C
-2B740..2B81F; CJK Unified Ideographs Extension D
-2B820..2CEAF; CJK Unified Ideographs Extension E
-2CEB0..2EBEF; CJK Unified Ideographs Extension F
-2F800..2FA1F; CJK Compatibility Ideographs Supplement
-E0000..E007F; Tags
-E0100..E01EF; Variation Selectors Supplement
-F0000..FFFFF; Supplementary Private Use Area-A
-100000..10FFFF; Supplementary Private Use Area-B
-
-# EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/Character/CharPropTest.java Wed May 23 14:21:14 2018 +0800
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8202771
+ * @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
+ * /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
+ * /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
+ * /isUnicodeIdentifierPart
+ * @run main CharPropTest
+ */
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.stream.Stream;
+
+public class CharPropTest {
+ private static int diffs = 0;
+ private static int rangeStart = 0x0000;
+ private static boolean isRange = false;
+
+ public static void main(String[] args) throws Exception {
+ Path path = Paths.get(System.getProperty("test.src", "."),
+ "UnicodeData.txt");
+ try (Stream<String> lines = Files.lines(path)) {
+ lines.map(String::trim)
+ .filter(line -> line.length() != 0 && line.charAt(0) != '#')
+ .forEach(line -> handleOneLine(line));
+
+ if (diffs != 0) {
+ throw new RuntimeException("Total differences: " + diffs);
+ }
+ }
+ }
+
+ private static void handleOneLine(String line) {
+ String[] fields = line.split(";");
+ int currentCp = Integer.parseInt(fields[0], 16);
+ String name = fields[1];
+ String category = fields[2];
+
+ // Except single code point, also handle ranges like the following:
+ // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+ // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;
+ if (isRange) {
+ if (name.endsWith("Last>")) {
+ for (int cp = rangeStart; cp <= currentCp; cp++) {
+ testCodePoint(cp, category);
+ }
+ } else {
+ throw new RuntimeException("Not a valid range, first range <"
+ + Integer.toHexString(rangeStart) + "> without last.");
+ }
+ isRange = false;
+ } else {
+ if (name.endsWith("First>")) {
+ rangeStart = currentCp;
+ isRange = true;
+ } else {
+ testCodePoint(currentCp, category);
+ }
+ }
+ }
+
+ private static void testCodePoint(int codePoint, String category) {
+ isDigitTest(codePoint, category);
+ isLetterTest(codePoint, category);
+ isLetterOrDigitTest(codePoint, category);
+
+ isSpaceCharTest(codePoint, category);
+ isWhitespaceTest(codePoint, category);
+
+ isTitleCaseTest(codePoint, category);
+
+ isISOControlTest(codePoint);
+
+ isIdentifierIgnorableTest(codePoint, category);
+ isJavaIdentifierStartTest(codePoint, category);
+ isJavaIdentifierPartTest(codePoint, category);
+ isUnicodeIdentifierStartTest(codePoint, category);
+ isUnicodeIdentifierPartTest(codePoint, category);
+ }
+
+ private static void isDigitTest(int codePoint, String category) {
+ boolean actual = Character.isDigit(codePoint);
+ boolean expected = category.equals("Nd");
+ if (actual != expected) {
+ printDiff(codePoint, "isDigit", actual, expected);
+ }
+ }
+
+ private static void isLetterTest(int codePoint, String category) {
+ boolean actual = Character.isLetter(codePoint);
+ boolean expected = isLetter(category);
+ if (actual != expected) {
+ printDiff(codePoint, "isLetter", actual, expected);
+ }
+ }
+
+ private static void isLetterOrDigitTest(int codePoint, String category) {
+ boolean actual = Character.isLetterOrDigit(codePoint);
+ boolean expected = isLetter(category) || category.equals("Nd");
+ if (actual != expected) {
+ printDiff(codePoint, "isLetterOrDigit", actual, expected);
+ }
+ }
+
+ private static void isSpaceCharTest(int codePoint, String category) {
+ boolean actual = Character.isSpaceChar(codePoint);
+ boolean expected = isSpaceChar(category);
+ if (actual != expected) {
+ printDiff(codePoint, "isSpaceChar", actual, expected);
+ }
+ }
+
+ private static void isWhitespaceTest(int codePoint, String category) {
+ boolean actual = Character.isWhitespace(codePoint);
+ boolean expected = isWhitespace(codePoint, category);
+ if (actual != expected) {
+ printDiff(codePoint, "isWhitespace", actual, expected);
+ }
+ }
+
+ private static void isTitleCaseTest(int codePoint, String category) {
+ boolean actual = Character.isTitleCase(codePoint);
+ boolean expected = category.equals("Lt");
+ if (actual != expected) {
+ printDiff(codePoint, "isTitleCase", actual, expected);
+ }
+ }
+
+ private static void isISOControlTest(int codePoint) {
+ boolean actual = Character.isISOControl(codePoint);
+ boolean expected = isISOControl(codePoint);
+ if (actual != expected) {
+ printDiff(codePoint, "isISOControl", actual, expected);
+ }
+ }
+
+ private static void isIdentifierIgnorableTest(int codePoint, String category) {
+ boolean actual = Character.isIdentifierIgnorable(codePoint);
+ boolean expected = isIdentifierIgnorable(codePoint, category);
+ if (actual != expected) {
+ printDiff(codePoint, "isIdentifierIgnorable", actual, expected);
+ }
+ }
+
+ private static void isJavaIdentifierStartTest(int codePoint, String category) {
+ boolean actual = Character.isJavaIdentifierStart(codePoint);
+ boolean expected = isJavaIdentifierStart(category);
+ if (actual != expected) {
+ printDiff(codePoint, "isJavaIdentifierStart", actual, expected);
+ }
+ }
+
+ private static void isJavaIdentifierPartTest(int codePoint, String category) {
+ boolean actual = Character.isJavaIdentifierPart(codePoint);
+ boolean expected = isJavaIdentifierPart(codePoint, category);
+ if (actual != expected) {
+ printDiff(codePoint, "isJavaIdentifierPart", actual, expected);
+ }
+ }
+
+ private static void isUnicodeIdentifierStartTest(int codePoint, String category) {
+ boolean actual = Character.isUnicodeIdentifierStart(codePoint);
+ boolean expected = isUnicodeIdentifierStart(category);
+ if (actual != expected) {
+ printDiff(codePoint, "isUnicodeIdentifierStart", actual, expected);
+ }
+ }
+
+ private static void isUnicodeIdentifierPartTest(int codePoint, String category) {
+ boolean actual = Character.isUnicodeIdentifierPart(codePoint);
+ boolean expected = isUnicodeIdentifierPart(codePoint, category);
+ if (actual != expected) {
+ printDiff(codePoint, "isUnicodeIdentifierPart", actual, expected);
+ }
+ }
+
+ private static boolean isLetter(String category) {
+ return category.equals("Lu") || category.equals("Ll")
+ || category.equals("Lt") || category.equals("Lm")
+ || category.equals("Lo");
+ }
+
+ private static boolean isSpaceChar(String category) {
+ return category.equals("Zs") || category.equals("Zl")
+ || category.equals("Zp");
+ }
+
+ private static boolean isWhitespace(int codePoint, String category) {
+ if (isSpaceChar(category) && codePoint != Integer.parseInt("00A0", 16)
+ && codePoint != Integer.parseInt("2007", 16)
+ && codePoint != Integer.parseInt("202F", 16)) {
+ return true;
+ } else {
+ if (codePoint == Integer.parseInt("0009", 16)
+ || codePoint == Integer.parseInt("000A", 16)
+ || codePoint == Integer.parseInt("000B", 16)
+ || codePoint == Integer.parseInt("000C", 16)
+ || codePoint == Integer.parseInt("000D", 16)
+ || codePoint == Integer.parseInt("001C", 16)
+ || codePoint == Integer.parseInt("001D", 16)
+ || codePoint == Integer.parseInt("001E", 16)
+ || codePoint == Integer.parseInt("001F", 16)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isISOControl(int codePoint) {
+ return (codePoint > 0x00 && codePoint < 0x1f)
+ || (codePoint > 0x7f && codePoint < 0x9f)
+ || (codePoint == 0x00 || codePoint == 0x1f || codePoint == 0x7f || codePoint == 0x9f);
+ }
+
+ private static boolean isIdentifierIgnorable(int codePoint, String category) {
+ if (category.equals("Cf")) {
+ return true;
+ } else {
+ int a1 = Integer.parseInt("0000", 16);
+ int a2 = Integer.parseInt("0008", 16);
+ int b1 = Integer.parseInt("000E", 16);
+ int b2 = Integer.parseInt("001B", 16);
+ int c1 = Integer.parseInt("007F", 16);
+ int c2 = Integer.parseInt("009F", 16);
+
+ if ((codePoint > a1 && codePoint < a2) || (codePoint > b1 && codePoint < b2)
+ || (codePoint > c1 && codePoint < c2) || (codePoint == a1 || codePoint == a2
+ || codePoint == b1 || codePoint == b2 || codePoint == c1 || codePoint == c2)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isJavaIdentifierStart(String category) {
+ return isLetter(category) || category.equals("Nl") || category.equals("Sc")
+ || category.equals("Pc");
+ }
+
+ private static boolean isJavaIdentifierPart(int codePoint, String category) {
+ return isLetter(category) || category.equals("Sc") || category.equals("Pc")
+ || category.equals("Nd") || category.equals("Nl")
+ || category.equals("Mc") || category.equals("Mn")
+ || isIdentifierIgnorable(codePoint, category);
+ }
+
+ private static boolean isUnicodeIdentifierStart(String category) {
+ return isLetter(category) || category.equals("Nl");
+ }
+
+ private static boolean isUnicodeIdentifierPart(int codePoint, String category) {
+ return isLetter(category) || category.equals("Pc") || category.equals("Nd")
+ || category.equals("Nl") || category.equals("Mc") || category.equals("Mn")
+ || isIdentifierIgnorable(codePoint, category);
+ }
+
+ private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
+ System.out.println("Not equal at codePoint <" + Integer.toHexString(codePoint)
+ + ">, method: " + method
+ + ", actual: " + actual + ", expected: " + expected);
+ diffs++;
+ }
+}
--- a/test/jdk/java/lang/Character/CheckBlocks.java Tue May 22 21:50:45 2018 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/**
- * @test
- * @bug 4830803 4886934 6565620 6959267 7070436 7198195 8032446 8072600
- * @summary Check that the UnicodeBlock forName() method works as expected and block ranges are correct for all Unicode characters.
- * @run main CheckBlocks
- * @author John O'Conner
- */
-
-import java.io.*;
-import java.util.*;
-import java.lang.Character.UnicodeBlock;
-
-
-public class CheckBlocks {
-
- static boolean err = false;
- static Class<?> character;
-
- public static void main(String[] args) throws Exception {
- generateBlockList();
-
- try {
- character = Class.forName("java.lang.Character$UnicodeBlock");
- } catch (ClassNotFoundException e) {
- throw new RuntimeException("Class.forName(\"Character\") failed.");
- }
-
- for (Block blk : blocks) {
- test4830803_1(blk);
- test4830803_2();
- test4886934(blk);
- }
-
- if (err) {
- throw new RuntimeException("Failed");
- } else {
- System.out.println("Passed");
- }
- }
-
- /**
- * Check that the UnicodeBlock forName() method works as expected.
- */
- private static void test4830803_1(Block blk) throws Exception {
-
- /*
- * Try 3 forms of block name in the forName() method. Each form should
- * produce the same expected block.
- */
- String blkName = blk.getName();
-
- // For backward compatibility
- if (blkName.equals("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS")) {
- blkName = "COMBINING_MARKS_FOR_SYMBOLS";
- System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS is replaced with COMBINING_MARKS_FOR_SYMBOLS for backward compatibility.");
- } else if (blkName.equals("GREEK_AND_COPTIC")) {
- blkName = "GREEK";
- System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK for backward compatibility.");
- } else if (blkName.equals("CYRILLIC_SUPPLEMENT")) {
- blkName = "CYRILLIC_SUPPLEMENTARY";
- System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with CYRILLIC_SUPPLEMENTARY for backward compatibility.");
- }
-
- String expectedBlock = null;
- try {
- expectedBlock = character.getField(blkName).getName();
- } catch (NoSuchFieldException | SecurityException e) {
- System.err.println("Error: " + blkName + " was not found.");
- err = true;
- return;
- }
-
- String canonicalBlockName = blk.getOriginalName();
- String idBlockName = expectedBlock;
- String regexBlockName = toRegExString(canonicalBlockName);
-
- if (regexBlockName == null) {
- System.err.println("Error: Block name which was processed with regex was null.");
- err = true;
- return;
- }
-
- if (!expectedBlock.equals(UnicodeBlock.forName(canonicalBlockName).toString())) {
- System.err.println("Error #1: UnicodeBlock.forName(\"" +
- canonicalBlockName + "\") returned wrong value.\n\tGot: " +
- UnicodeBlock.forName(canonicalBlockName) +
- "\n\tExpected: " + expectedBlock);
- err = true;
- }
-
- if (!expectedBlock.equals(UnicodeBlock.forName(idBlockName).toString())) {
- System.err.println("Error #2: UnicodeBlock.forName(\"" +
- idBlockName + "\") returned wrong value.\n\tGot: " +
- UnicodeBlock.forName(idBlockName) +
- "\n\tExpected: " + expectedBlock);
- err = true;
- }
-
- if (!expectedBlock.equals(UnicodeBlock.forName(regexBlockName).toString())) {
- System.err.println("Error #3: UnicodeBlock.forName(\"" +
- regexBlockName + "\") returned wrong value.\n\tGot: " +
- UnicodeBlock.forName(regexBlockName) +
- "\n\tExpected: " + expectedBlock);
- err = true;
- }
- }
-
- /**
- * now try a bad block name. This should produce an IAE.
- */
- private static void test4830803_2() {
- boolean threwExpected = false;
-
- try {
- UnicodeBlock block = UnicodeBlock.forName("notdefined");
- }
- catch(IllegalArgumentException e) {
- threwExpected = true;
- }
-
- if (threwExpected == false) {
- System.err.println("Error: UnicodeBlock.forName(\"notdefined\") should throw IllegalArgumentException.");
- err = true;
- }
- }
-
- /**
- * Convert the argument to a block name form used by the regex package.
- * That is, remove all spaces.
- */
- private static String toRegExString(String str) {
- String[] tokens = null;
- StringBuilder retStr = new StringBuilder();
- try {
- tokens = str.split(" ");
- }
- catch(java.util.regex.PatternSyntaxException e) {
- return null;
- }
- for(int x=0; x < tokens.length; ++x) {
- retStr.append(tokens[x]);
- }
- return retStr.toString();
- }
-
- private static void test4886934(Block blk) {
- String blkName = blk.getName();
- String blkOrigName = blk.getOriginalName();
- int ch = blk.getBegin();
- UnicodeBlock block = UnicodeBlock.of(ch);
-
- if (block == null) {
- System.err.println("Error: The block for " + blkName +
- " is missing. Please check java.lang.Character.UnicodeBlock.");
- err = true;
- return;
- }
-
- // For backward compatibility
- if (blkName.equals("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS")) {
- blkName = "COMBINING_MARKS_FOR_SYMBOLS";
- System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS is replaced with COMBINING_MARKS_FOR_SYMBOLS for backward compatibility.");
- } else if (blkName.equals("GREEK_AND_COPTIC")) {
- blkName = "GREEK";
- System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK for backward compatibility.");
- } else if (blkName.equals("CYRILLIC_SUPPLEMENT")) {
- blkName = "CYRILLIC_SUPPLEMENTARY";
- System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with CYRILLIC_SUPPLEMENTARY for backward compatibility.");
- }
-
- String blockName = block.toString();
- if (!blockName.equals(blkName)) {
- System.err.println("Error: Begin-of-block character(0x" +
- Integer.toHexString(ch).toUpperCase() +
- ") should be in \"" + blkName + "\" block " +
- "(Block name is \"" + blkOrigName + "\")" +
- " but found in \"" + blockName + "\" block.");
- err = true;
- }
-
- block = UnicodeBlock.of(++ch);
- blockName = block.toString();
- if (!blockName.equals(blkName)) {
- System.err.println("Error: Character(0x" +
- Integer.toHexString(ch).toUpperCase() +
- ") should be in \"" + blkName + "\" block " +
- "(Block name is \"" + blkOrigName + "\")" +
- " but found in \"" + blockName + "\" block.");
- err = true;
- }
-
- ch = blk.getEnd();
- block = UnicodeBlock.of(ch);
- blockName = block.toString();
- if (!blockName.equals(blkName)) {
- System.err.println("Error: End-of-block Character(0x" +
- Integer.toHexString(ch).toUpperCase() +
- ") should be in \"" + blkName + "\" block " +
- "(Block name is \"" + blkOrigName + "\")" +
- " but found in \"" + blockName + "\" block.");
- err = true;
- }
- }
-
- // List of all Unicode blocks, their start, and end codepoints.
- public static HashSet<Block> blocks = new HashSet<>();
-
- private static void generateBlockList() throws Exception {
- BufferedReader f = new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Blocks.txt")));
-
- String line;
- while ((line = f.readLine()) != null) {
- if (line.length() == 0 || line.charAt(0) == '#') {
- continue;
- }
-
- int index1 = line.indexOf('.');
- int begin = Integer.parseInt(line.substring(0, index1), 16);
- int index2 = line.indexOf(';');
- int end = Integer.parseInt(line.substring(index1+2, index2), 16);
- String name = line.substring(index2+1).trim();
-
- System.out.println(" Adding a Block(" +
- Integer.toHexString(begin) + ", " + Integer.toHexString(end) +
- ", " + name + ")");
- blocks.add(new Block(begin, end, name));
- }
- f.close();
- }
-}
-
-class Block {
-
- public Block() {
- blockBegin = 0;
- blockEnd = 0;
- blockName = null;
- }
-
- public Block(int begin, int end, String name) {
- blockBegin = begin;
- blockEnd = end;
- blockName = name.replaceAll("[ -]", "_").toUpperCase(Locale.ENGLISH);
- originalBlockName = name;
- }
-
- public int getBegin() {
- return blockBegin;
- }
-
- public int getEnd() {
- return blockEnd;
- }
-
- public String getName() {
- return blockName;
- }
-
- public String getOriginalName() {
- return originalBlockName;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == null) return false;
- if (!(obj instanceof Block)) return false;
-
- Block other = (Block)obj;
- return other.blockBegin == blockBegin &&
- other.blockEnd == blockEnd &&
- other.blockName.equals(blockName) &&
- other.originalBlockName.equals(originalBlockName);
- }
- int blockBegin, blockEnd;
- String blockName, originalBlockName;
-}
--- a/test/jdk/java/lang/Character/TestISOControls.java Tue May 22 21:50:45 2018 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @summary Check that ISO control ranges are valid.
- * @run main TestISOControls
- * @author John O'Conner
- */
-
-public class TestISOControls {
-
-
- public static void main(String[] args) {
-
- int[] test = { -1, 0, 0x0010, 0x001F, 0x0020, 0x007E, 0x007F, 0x0090,
- 0x009F, 0x00A0 };
- boolean[] expectedResult = { false, true, true, true, false, false, true,
- true, true, false };
-
- for (int x=0; x < test.length; ++x) {
- if (Character.isISOControl(test[x]) != expectedResult[x]) {
- System.out.println("Fail: " + test[x]);
- throw new RuntimeException();
- }
-
- }
- System.out.println("Passed");
-
- }
-
-}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/Character/UnicodeBlock/Blocks.txt Wed May 23 14:21:14 2018 +0800
@@ -0,0 +1,316 @@
+# Blocks-10.0.0.txt
+# Date: 2017-04-12, 17:30:00 GMT [KW]
+# Copyright (c) 2017 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Format:
+# Start Code..End Code; Block Name
+
+# ================================================
+
+# Note: When comparing block names, casing, whitespace, hyphens,
+# and underbars are ignored.
+# For example, "Latin Extended-A" and "latin extended a" are equivalent.
+# For more information on the comparison of property values,
+# see UAX #44: http://www.unicode.org/reports/tr44/
+#
+# All block ranges start with a value where (cp MOD 16) = 0,
+# and end with a value where (cp MOD 16) = 15. In other words,
+# the last hexadecimal digit of the start of range is ...0
+# and the last hexadecimal digit of the end of range is ...F.
+# This constraint on block ranges guarantees that allocations
+# are done in terms of whole columns, and that code chart display
+# never involves splitting columns in the charts.
+#
+# All code points not explicitly listed for Block
+# have the value No_Block.
+
+# Property: Block
+#
+# @missing: 0000..10FFFF; No_Block
+
+0000..007F; Basic Latin
+0080..00FF; Latin-1 Supplement
+0100..017F; Latin Extended-A
+0180..024F; Latin Extended-B
+0250..02AF; IPA Extensions
+02B0..02FF; Spacing Modifier Letters
+0300..036F; Combining Diacritical Marks
+0370..03FF; Greek and Coptic
+0400..04FF; Cyrillic
+0500..052F; Cyrillic Supplement
+0530..058F; Armenian
+0590..05FF; Hebrew
+0600..06FF; Arabic
+0700..074F; Syriac
+0750..077F; Arabic Supplement
+0780..07BF; Thaana
+07C0..07FF; NKo
+0800..083F; Samaritan
+0840..085F; Mandaic
+0860..086F; Syriac Supplement
+08A0..08FF; Arabic Extended-A
+0900..097F; Devanagari
+0980..09FF; Bengali
+0A00..0A7F; Gurmukhi
+0A80..0AFF; Gujarati
+0B00..0B7F; Oriya
+0B80..0BFF; Tamil
+0C00..0C7F; Telugu
+0C80..0CFF; Kannada
+0D00..0D7F; Malayalam
+0D80..0DFF; Sinhala
+0E00..0E7F; Thai
+0E80..0EFF; Lao
+0F00..0FFF; Tibetan
+1000..109F; Myanmar
+10A0..10FF; Georgian
+1100..11FF; Hangul Jamo
+1200..137F; Ethiopic
+1380..139F; Ethiopic Supplement
+13A0..13FF; Cherokee
+1400..167F; Unified Canadian Aboriginal Syllabics
+1680..169F; Ogham
+16A0..16FF; Runic
+1700..171F; Tagalog
+1720..173F; Hanunoo
+1740..175F; Buhid
+1760..177F; Tagbanwa
+1780..17FF; Khmer
+1800..18AF; Mongolian
+18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
+1900..194F; Limbu
+1950..197F; Tai Le
+1980..19DF; New Tai Lue
+19E0..19FF; Khmer Symbols
+1A00..1A1F; Buginese
+1A20..1AAF; Tai Tham
+1AB0..1AFF; Combining Diacritical Marks Extended
+1B00..1B7F; Balinese
+1B80..1BBF; Sundanese
+1BC0..1BFF; Batak
+1C00..1C4F; Lepcha
+1C50..1C7F; Ol Chiki
+1C80..1C8F; Cyrillic Extended-C
+1CC0..1CCF; Sundanese Supplement
+1CD0..1CFF; Vedic Extensions
+1D00..1D7F; Phonetic Extensions
+1D80..1DBF; Phonetic Extensions Supplement
+1DC0..1DFF; Combining Diacritical Marks Supplement
+1E00..1EFF; Latin Extended Additional
+1F00..1FFF; Greek Extended
+2000..206F; General Punctuation
+2070..209F; Superscripts and Subscripts
+20A0..20CF; Currency Symbols
+20D0..20FF; Combining Diacritical Marks for Symbols
+2100..214F; Letterlike Symbols
+2150..218F; Number Forms
+2190..21FF; Arrows
+2200..22FF; Mathematical Operators
+2300..23FF; Miscellaneous Technical
+2400..243F; Control Pictures
+2440..245F; Optical Character Recognition
+2460..24FF; Enclosed Alphanumerics
+2500..257F; Box Drawing
+2580..259F; Block Elements
+25A0..25FF; Geometric Shapes
+2600..26FF; Miscellaneous Symbols
+2700..27BF; Dingbats
+27C0..27EF; Miscellaneous Mathematical Symbols-A
+27F0..27FF; Supplemental Arrows-A
+2800..28FF; Braille Patterns
+2900..297F; Supplemental Arrows-B
+2980..29FF; Miscellaneous Mathematical Symbols-B
+2A00..2AFF; Supplemental Mathematical Operators
+2B00..2BFF; Miscellaneous Symbols and Arrows
+2C00..2C5F; Glagolitic
+2C60..2C7F; Latin Extended-C
+2C80..2CFF; Coptic
+2D00..2D2F; Georgian Supplement
+2D30..2D7F; Tifinagh
+2D80..2DDF; Ethiopic Extended
+2DE0..2DFF; Cyrillic Extended-A
+2E00..2E7F; Supplemental Punctuation
+2E80..2EFF; CJK Radicals Supplement
+2F00..2FDF; Kangxi Radicals
+2FF0..2FFF; Ideographic Description Characters
+3000..303F; CJK Symbols and Punctuation
+3040..309F; Hiragana
+30A0..30FF; Katakana
+3100..312F; Bopomofo
+3130..318F; Hangul Compatibility Jamo
+3190..319F; Kanbun
+31A0..31BF; Bopomofo Extended
+31C0..31EF; CJK Strokes
+31F0..31FF; Katakana Phonetic Extensions
+3200..32FF; Enclosed CJK Letters and Months
+3300..33FF; CJK Compatibility
+3400..4DBF; CJK Unified Ideographs Extension A
+4DC0..4DFF; Yijing Hexagram Symbols
+4E00..9FFF; CJK Unified Ideographs
+A000..A48F; Yi Syllables
+A490..A4CF; Yi Radicals
+A4D0..A4FF; Lisu
+A500..A63F; Vai
+A640..A69F; Cyrillic Extended-B
+A6A0..A6FF; Bamum
+A700..A71F; Modifier Tone Letters
+A720..A7FF; Latin Extended-D
+A800..A82F; Syloti Nagri
+A830..A83F; Common Indic Number Forms
+A840..A87F; Phags-pa
+A880..A8DF; Saurashtra
+A8E0..A8FF; Devanagari Extended
+A900..A92F; Kayah Li
+A930..A95F; Rejang
+A960..A97F; Hangul Jamo Extended-A
+A980..A9DF; Javanese
+A9E0..A9FF; Myanmar Extended-B
+AA00..AA5F; Cham
+AA60..AA7F; Myanmar Extended-A
+AA80..AADF; Tai Viet
+AAE0..AAFF; Meetei Mayek Extensions
+AB00..AB2F; Ethiopic Extended-A
+AB30..AB6F; Latin Extended-E
+AB70..ABBF; Cherokee Supplement
+ABC0..ABFF; Meetei Mayek
+AC00..D7AF; Hangul Syllables
+D7B0..D7FF; Hangul Jamo Extended-B
+D800..DB7F; High Surrogates
+DB80..DBFF; High Private Use Surrogates
+DC00..DFFF; Low Surrogates
+E000..F8FF; Private Use Area
+F900..FAFF; CJK Compatibility Ideographs
+FB00..FB4F; Alphabetic Presentation Forms
+FB50..FDFF; Arabic Presentation Forms-A
+FE00..FE0F; Variation Selectors
+FE10..FE1F; Vertical Forms
+FE20..FE2F; Combining Half Marks
+FE30..FE4F; CJK Compatibility Forms
+FE50..FE6F; Small Form Variants
+FE70..FEFF; Arabic Presentation Forms-B
+FF00..FFEF; Halfwidth and Fullwidth Forms
+FFF0..FFFF; Specials
+10000..1007F; Linear B Syllabary
+10080..100FF; Linear B Ideograms
+10100..1013F; Aegean Numbers
+10140..1018F; Ancient Greek Numbers
+10190..101CF; Ancient Symbols
+101D0..101FF; Phaistos Disc
+10280..1029F; Lycian
+102A0..102DF; Carian
+102E0..102FF; Coptic Epact Numbers
+10300..1032F; Old Italic
+10330..1034F; Gothic
+10350..1037F; Old Permic
+10380..1039F; Ugaritic
+103A0..103DF; Old Persian
+10400..1044F; Deseret
+10450..1047F; Shavian
+10480..104AF; Osmanya
+104B0..104FF; Osage
+10500..1052F; Elbasan
+10530..1056F; Caucasian Albanian
+10600..1077F; Linear A
+10800..1083F; Cypriot Syllabary
+10840..1085F; Imperial Aramaic
+10860..1087F; Palmyrene
+10880..108AF; Nabataean
+108E0..108FF; Hatran
+10900..1091F; Phoenician
+10920..1093F; Lydian
+10980..1099F; Meroitic Hieroglyphs
+109A0..109FF; Meroitic Cursive
+10A00..10A5F; Kharoshthi
+10A60..10A7F; Old South Arabian
+10A80..10A9F; Old North Arabian
+10AC0..10AFF; Manichaean
+10B00..10B3F; Avestan
+10B40..10B5F; Inscriptional Parthian
+10B60..10B7F; Inscriptional Pahlavi
+10B80..10BAF; Psalter Pahlavi
+10C00..10C4F; Old Turkic
+10C80..10CFF; Old Hungarian
+10E60..10E7F; Rumi Numeral Symbols
+11000..1107F; Brahmi
+11080..110CF; Kaithi
+110D0..110FF; Sora Sompeng
+11100..1114F; Chakma
+11150..1117F; Mahajani
+11180..111DF; Sharada
+111E0..111FF; Sinhala Archaic Numbers
+11200..1124F; Khojki
+11280..112AF; Multani
+112B0..112FF; Khudawadi
+11300..1137F; Grantha
+11400..1147F; Newa
+11480..114DF; Tirhuta
+11580..115FF; Siddham
+11600..1165F; Modi
+11660..1167F; Mongolian Supplement
+11680..116CF; Takri
+11700..1173F; Ahom
+118A0..118FF; Warang Citi
+11A00..11A4F; Zanabazar Square
+11A50..11AAF; Soyombo
+11AC0..11AFF; Pau Cin Hau
+11C00..11C6F; Bhaiksuki
+11C70..11CBF; Marchen
+11D00..11D5F; Masaram Gondi
+12000..123FF; Cuneiform
+12400..1247F; Cuneiform Numbers and Punctuation
+12480..1254F; Early Dynastic Cuneiform
+13000..1342F; Egyptian Hieroglyphs
+14400..1467F; Anatolian Hieroglyphs
+16800..16A3F; Bamum Supplement
+16A40..16A6F; Mro
+16AD0..16AFF; Bassa Vah
+16B00..16B8F; Pahawh Hmong
+16F00..16F9F; Miao
+16FE0..16FFF; Ideographic Symbols and Punctuation
+17000..187FF; Tangut
+18800..18AFF; Tangut Components
+1B000..1B0FF; Kana Supplement
+1B100..1B12F; Kana Extended-A
+1B170..1B2FF; Nushu
+1BC00..1BC9F; Duployan
+1BCA0..1BCAF; Shorthand Format Controls
+1D000..1D0FF; Byzantine Musical Symbols
+1D100..1D1FF; Musical Symbols
+1D200..1D24F; Ancient Greek Musical Notation
+1D300..1D35F; Tai Xuan Jing Symbols
+1D360..1D37F; Counting Rod Numerals
+1D400..1D7FF; Mathematical Alphanumeric Symbols
+1D800..1DAAF; Sutton SignWriting
+1E000..1E02F; Glagolitic Supplement
+1E800..1E8DF; Mende Kikakui
+1E900..1E95F; Adlam
+1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
+1F000..1F02F; Mahjong Tiles
+1F030..1F09F; Domino Tiles
+1F0A0..1F0FF; Playing Cards
+1F100..1F1FF; Enclosed Alphanumeric Supplement
+1F200..1F2FF; Enclosed Ideographic Supplement
+1F300..1F5FF; Miscellaneous Symbols and Pictographs
+1F600..1F64F; Emoticons
+1F650..1F67F; Ornamental Dingbats
+1F680..1F6FF; Transport and Map Symbols
+1F700..1F77F; Alchemical Symbols
+1F780..1F7FF; Geometric Shapes Extended
+1F800..1F8FF; Supplemental Arrows-C
+1F900..1F9FF; Supplemental Symbols and Pictographs
+20000..2A6DF; CJK Unified Ideographs Extension B
+2A700..2B73F; CJK Unified Ideographs Extension C
+2B740..2B81F; CJK Unified Ideographs Extension D
+2B820..2CEAF; CJK Unified Ideographs Extension E
+2CEB0..2EBEF; CJK Unified Ideographs Extension F
+2F800..2FA1F; CJK Compatibility Ideographs Supplement
+E0000..E007F; Tags
+E0100..E01EF; Variation Selectors Supplement
+F0000..FFFFF; Supplementary Private Use Area-A
+100000..10FFFF; Supplementary Private Use Area-B
+
+# EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/Character/UnicodeBlock/CheckBlocks.java Wed May 23 14:21:14 2018 +0800
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 4830803 4886934 6565620 6959267 7070436 7198195 8032446 8072600 8202771
+ * @summary Check that the UnicodeBlock forName() method works as expected and block ranges are correct for all Unicode characters.
+ * @run main CheckBlocks
+ * @author John O'Conner
+ */
+
+import java.lang.Character.UnicodeBlock;
+import java.lang.reflect.Field;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashSet;
+import java.util.Locale;
+
+public class CheckBlocks {
+
+ static boolean err = false;
+ static Class<?> clazzUnicodeBlock;
+
+ public static void main(String[] args) throws Exception {
+ generateBlockList();
+
+ try {
+ clazzUnicodeBlock = Class.forName("java.lang.Character$UnicodeBlock");
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException("Class.forName(\"java.lang.Character$UnicodeBlock\") failed.");
+ }
+
+ for (Block blk : blocks) {
+ test4830803_1(blk);
+ test4830803_2();
+ test4886934(blk);
+ }
+
+ test8202771();
+
+ if (err) {
+ throw new RuntimeException("Failed");
+ } else {
+ System.out.println("Passed");
+ }
+ }
+
+ /**
+ * Check that the UnicodeBlock forName() method works as expected.
+ */
+ private static void test4830803_1(Block blk) throws Exception {
+
+ /*
+ * Try 3 forms of block name in the forName() method. Each form should
+ * produce the same expected block.
+ */
+ String blkName = blk.getName();
+
+ // For backward compatibility
+ switch (blkName) {
+ case "COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS":
+ blkName = "COMBINING_MARKS_FOR_SYMBOLS";
+ System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS"
+ + " is replaced with COMBINING_MARKS_FOR_SYMBOLS"
+ + " for backward compatibility.");
+ break;
+ case "GREEK_AND_COPTIC":
+ blkName = "GREEK";
+ System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK"
+ + " for backward compatibility.");
+ break;
+ case "CYRILLIC_SUPPLEMENT":
+ blkName = "CYRILLIC_SUPPLEMENTARY";
+ System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with"
+ + " CYRILLIC_SUPPLEMENTARY for backward compatibility.");
+ break;
+ default:
+ break;
+ }
+
+ String expectedBlock = null;
+ try {
+ expectedBlock = clazzUnicodeBlock.getField(blkName).getName();
+ } catch (NoSuchFieldException | SecurityException e) {
+ System.err.println("Error: " + blkName + " was not found.");
+ err = true;
+ return;
+ }
+
+ String canonicalBlockName = blk.getOriginalName();
+ String idBlockName = expectedBlock;
+ String regexBlockName = toRegExString(canonicalBlockName);
+
+ if (regexBlockName == null) {
+ System.err.println("Error: Block name which was processed with regex was null.");
+ err = true;
+ return;
+ }
+
+ if (!expectedBlock.equals(UnicodeBlock.forName(canonicalBlockName).toString())) {
+ System.err.println("Error #1: UnicodeBlock.forName(\"" +
+ canonicalBlockName + "\") returned wrong value.\n\tGot: " +
+ UnicodeBlock.forName(canonicalBlockName) +
+ "\n\tExpected: " + expectedBlock);
+ err = true;
+ }
+
+ if (!expectedBlock.equals(UnicodeBlock.forName(idBlockName).toString())) {
+ System.err.println("Error #2: UnicodeBlock.forName(\"" +
+ idBlockName + "\") returned wrong value.\n\tGot: " +
+ UnicodeBlock.forName(idBlockName) +
+ "\n\tExpected: " + expectedBlock);
+ err = true;
+ }
+
+ if (!expectedBlock.equals(UnicodeBlock.forName(regexBlockName).toString())) {
+ System.err.println("Error #3: UnicodeBlock.forName(\"" +
+ regexBlockName + "\") returned wrong value.\n\tGot: " +
+ UnicodeBlock.forName(regexBlockName) +
+ "\n\tExpected: " + expectedBlock);
+ err = true;
+ }
+ }
+
+ /**
+ * now try a bad block name. This should produce an IAE.
+ */
+ private static void test4830803_2() {
+ boolean threwExpected = false;
+
+ try {
+ UnicodeBlock block = UnicodeBlock.forName("notdefined");
+ }
+ catch(IllegalArgumentException e) {
+ threwExpected = true;
+ }
+
+ if (threwExpected == false) {
+ System.err.println("Error: UnicodeBlock.forName(\"notdefined\") should throw IllegalArgumentException.");
+ err = true;
+ }
+ }
+
+ /**
+ * Convert the argument to a block name form used by the regex package.
+ * That is, remove all spaces.
+ */
+ private static String toRegExString(String str) {
+ String[] tokens = null;
+ StringBuilder retStr = new StringBuilder();
+ try {
+ tokens = str.split(" ");
+ }
+ catch(java.util.regex.PatternSyntaxException e) {
+ return null;
+ }
+ for(int x=0; x < tokens.length; ++x) {
+ retStr.append(tokens[x]);
+ }
+ return retStr.toString();
+ }
+
+ private static void test4886934(Block blk) {
+ String blkName = blk.getName();
+ String blkOrigName = blk.getOriginalName();
+ UnicodeBlock block;
+ String blockName;
+
+ // For backward compatibility
+ switch (blkName) {
+ case "COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS":
+ blkName = "COMBINING_MARKS_FOR_SYMBOLS";
+ System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS"
+ + " is replaced with COMBINING_MARKS_FOR_SYMBOLS"
+ + " for backward compatibility.");
+ break;
+ case "GREEK_AND_COPTIC":
+ blkName = "GREEK";
+ System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK"
+ + " for backward compatibility.");
+ break;
+ case "CYRILLIC_SUPPLEMENT":
+ blkName = "CYRILLIC_SUPPLEMENTARY";
+ System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with"
+ + " CYRILLIC_SUPPLEMENTARY for backward compatibility.");
+ break;
+ default:
+ break;
+ }
+
+ for (int ch = blk.getBegin(); ch <= blk.getEnd(); ch++) {
+ block = UnicodeBlock.of(ch);
+ if (block == null) {
+ System.err.println("Error: The block for " + blkName
+ + " is missing. Please check java.lang.Character.UnicodeBlock.");
+ err = true;
+ break;
+ }
+ blockName = block.toString();
+ if (!blockName.equals(blkName)) {
+ System.err.println("Error: Character(0x"
+ + Integer.toHexString(ch).toUpperCase()
+ + ") should be in \"" + blkName + "\" block "
+ + "(Block name is \"" + blkOrigName + "\")"
+ + " but found in \"" + blockName + "\" block.");
+ err = true;
+ }
+ }
+ }
+
+ /**
+ * Check if every Field of Character.UnicodeBlock is a valid Unicode Block.
+ */
+ private static void test8202771() {
+ Field[] fields = clazzUnicodeBlock.getFields();
+
+ for (Field f : fields) {
+ // Handle Deprecated field "SURROGATES_AREA".
+ if (f.getAnnotation(Deprecated.class) != null) {
+ continue;
+ }
+
+ String blkName = f.getName();
+ switch (blkName) {
+ case "COMBINING_MARKS_FOR_SYMBOLS":
+ validateBlock("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS");
+ break;
+ case "GREEK":
+ validateBlock("GREEK_AND_COPTIC");
+ break;
+ case "CYRILLIC_SUPPLEMENTARY":
+ validateBlock("CYRILLIC_SUPPLEMENT");
+ break;
+ default:
+ validateBlock(blkName);
+ break;
+ }
+ }
+ }
+
+ private static void validateBlock(String blkName) {
+ for (Block block : blocks) {
+ String blockName = block.getName();
+ if (blockName.equals(blkName)) {
+ return;
+ }
+ }
+ err = true;
+ System.err.println(blkName + " is not a valid Unicode Block.");
+ }
+
+ // List of all Unicode blocks, their start, and end codepoints.
+ public static HashSet<Block> blocks = new HashSet<>();
+
+ private static void generateBlockList() throws Exception {
+ File blockData = new File(System.getProperty("test.src", "."),
+ "Blocks.txt");
+ try (BufferedReader f = new BufferedReader(new FileReader(blockData))) {
+ String line;
+ while ((line = f.readLine()) != null) {
+ if (line.length() == 0 || line.charAt(0) == '#') {
+ continue;
+ }
+
+ int index1 = line.indexOf('.');
+ int begin = Integer.parseInt(line.substring(0, index1), 16);
+ int index2 = line.indexOf(';');
+ int end = Integer.parseInt(line.substring(index1 + 2, index2), 16);
+ String name = line.substring(index2 + 1).trim();
+
+ System.out.println(" Adding a Block(" + Integer.toHexString(begin) + ", " + Integer.toHexString(end)
+ + ", " + name + ")");
+ blocks.add(new Block(begin, end, name));
+ }
+ }
+ }
+}
+
+class Block {
+
+ public Block() {
+ blockBegin = 0;
+ blockEnd = 0;
+ blockName = null;
+ }
+
+ public Block(int begin, int end, String name) {
+ blockBegin = begin;
+ blockEnd = end;
+ blockName = name.replaceAll("[ -]", "_").toUpperCase(Locale.ENGLISH);
+ originalBlockName = name;
+ }
+
+ public int getBegin() {
+ return blockBegin;
+ }
+
+ public int getEnd() {
+ return blockEnd;
+ }
+
+ public String getName() {
+ return blockName;
+ }
+
+ public String getOriginalName() {
+ return originalBlockName;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) return false;
+ if (!(obj instanceof Block)) return false;
+
+ Block other = (Block)obj;
+ return other.blockBegin == blockBegin &&
+ other.blockEnd == blockEnd &&
+ other.blockName.equals(blockName) &&
+ other.originalBlockName.equals(originalBlockName);
+ }
+ int blockBegin, blockEnd;
+ String blockName, originalBlockName;
+}