jdk-sandbox: changeset 50230:cae567ae015d

--- a/test/jdk/java/lang/Character/Blocks.txt	Tue May 22 21:50:45 2018 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,316 +0,0 @@
-# Blocks-10.0.0.txt
-# Date: 2017-04-12, 17:30:00 GMT [KW]
-# Copyright (c) 2017 Unicode, Inc.
-# For terms of use, see http://www.unicode.org/terms_of_use.html
-#
-# Unicode Character Database
-# For documentation, see http://www.unicode.org/reports/tr44/
-#
-# Format:
-# Start Code..End Code; Block Name
-
-# ================================================
-
-# Note:   When comparing block names, casing, whitespace, hyphens,
-#         and underbars are ignored.
-#         For example, "Latin Extended-A" and "latin extended a" are equivalent.
-#         For more information on the comparison of property values,
-#            see UAX #44: http://www.unicode.org/reports/tr44/
-#
-#  All block ranges start with a value where (cp MOD 16) = 0,
-#  and end with a value where (cp MOD 16) = 15. In other words,
-#  the last hexadecimal digit of the start of range is ...0
-#  and the last hexadecimal digit of the end of range is ...F.
-#  This constraint on block ranges guarantees that allocations
-#  are done in terms of whole columns, and that code chart display
-#  never involves splitting columns in the charts.
-#
-#  All code points not explicitly listed for Block
-#  have the value No_Block.
-
-# Property:	Block
-#
-# @missing: 0000..10FFFF; No_Block
-
-0000..007F; Basic Latin
-0080..00FF; Latin-1 Supplement
-0100..017F; Latin Extended-A
-0180..024F; Latin Extended-B
-0250..02AF; IPA Extensions
-02B0..02FF; Spacing Modifier Letters
-0300..036F; Combining Diacritical Marks
-0370..03FF; Greek and Coptic
-0400..04FF; Cyrillic
-0500..052F; Cyrillic Supplement
-0530..058F; Armenian
-0590..05FF; Hebrew
-0600..06FF; Arabic
-0700..074F; Syriac
-0750..077F; Arabic Supplement
-0780..07BF; Thaana
-07C0..07FF; NKo
-0800..083F; Samaritan
-0840..085F; Mandaic
-0860..086F; Syriac Supplement
-08A0..08FF; Arabic Extended-A
-0900..097F; Devanagari
-0980..09FF; Bengali
-0A00..0A7F; Gurmukhi
-0A80..0AFF; Gujarati
-0B00..0B7F; Oriya
-0B80..0BFF; Tamil
-0C00..0C7F; Telugu
-0C80..0CFF; Kannada
-0D00..0D7F; Malayalam
-0D80..0DFF; Sinhala
-0E00..0E7F; Thai
-0E80..0EFF; Lao
-0F00..0FFF; Tibetan
-1000..109F; Myanmar
-10A0..10FF; Georgian
-1100..11FF; Hangul Jamo
-1200..137F; Ethiopic
-1380..139F; Ethiopic Supplement
-13A0..13FF; Cherokee
-1400..167F; Unified Canadian Aboriginal Syllabics
-1680..169F; Ogham
-16A0..16FF; Runic
-1700..171F; Tagalog
-1720..173F; Hanunoo
-1740..175F; Buhid
-1760..177F; Tagbanwa
-1780..17FF; Khmer
-1800..18AF; Mongolian
-18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
-1900..194F; Limbu
-1950..197F; Tai Le
-1980..19DF; New Tai Lue
-19E0..19FF; Khmer Symbols
-1A00..1A1F; Buginese
-1A20..1AAF; Tai Tham
-1AB0..1AFF; Combining Diacritical Marks Extended
-1B00..1B7F; Balinese
-1B80..1BBF; Sundanese
-1BC0..1BFF; Batak
-1C00..1C4F; Lepcha
-1C50..1C7F; Ol Chiki
-1C80..1C8F; Cyrillic Extended-C
-1CC0..1CCF; Sundanese Supplement
-1CD0..1CFF; Vedic Extensions
-1D00..1D7F; Phonetic Extensions
-1D80..1DBF; Phonetic Extensions Supplement
-1DC0..1DFF; Combining Diacritical Marks Supplement
-1E00..1EFF; Latin Extended Additional
-1F00..1FFF; Greek Extended
-2000..206F; General Punctuation
-2070..209F; Superscripts and Subscripts
-20A0..20CF; Currency Symbols
-20D0..20FF; Combining Diacritical Marks for Symbols
-2100..214F; Letterlike Symbols
-2150..218F; Number Forms
-2190..21FF; Arrows
-2200..22FF; Mathematical Operators
-2300..23FF; Miscellaneous Technical
-2400..243F; Control Pictures
-2440..245F; Optical Character Recognition
-2460..24FF; Enclosed Alphanumerics
-2500..257F; Box Drawing
-2580..259F; Block Elements
-25A0..25FF; Geometric Shapes
-2600..26FF; Miscellaneous Symbols
-2700..27BF; Dingbats
-27C0..27EF; Miscellaneous Mathematical Symbols-A
-27F0..27FF; Supplemental Arrows-A
-2800..28FF; Braille Patterns
-2900..297F; Supplemental Arrows-B
-2980..29FF; Miscellaneous Mathematical Symbols-B
-2A00..2AFF; Supplemental Mathematical Operators
-2B00..2BFF; Miscellaneous Symbols and Arrows
-2C00..2C5F; Glagolitic
-2C60..2C7F; Latin Extended-C
-2C80..2CFF; Coptic
-2D00..2D2F; Georgian Supplement
-2D30..2D7F; Tifinagh
-2D80..2DDF; Ethiopic Extended
-2DE0..2DFF; Cyrillic Extended-A
-2E00..2E7F; Supplemental Punctuation
-2E80..2EFF; CJK Radicals Supplement
-2F00..2FDF; Kangxi Radicals
-2FF0..2FFF; Ideographic Description Characters
-3000..303F; CJK Symbols and Punctuation
-3040..309F; Hiragana
-30A0..30FF; Katakana
-3100..312F; Bopomofo
-3130..318F; Hangul Compatibility Jamo
-3190..319F; Kanbun
-31A0..31BF; Bopomofo Extended
-31C0..31EF; CJK Strokes
-31F0..31FF; Katakana Phonetic Extensions
-3200..32FF; Enclosed CJK Letters and Months
-3300..33FF; CJK Compatibility
-3400..4DBF; CJK Unified Ideographs Extension A
-4DC0..4DFF; Yijing Hexagram Symbols
-4E00..9FFF; CJK Unified Ideographs
-A000..A48F; Yi Syllables
-A490..A4CF; Yi Radicals
-A4D0..A4FF; Lisu
-A500..A63F; Vai
-A640..A69F; Cyrillic Extended-B
-A6A0..A6FF; Bamum
-A700..A71F; Modifier Tone Letters
-A720..A7FF; Latin Extended-D
-A800..A82F; Syloti Nagri
-A830..A83F; Common Indic Number Forms
-A840..A87F; Phags-pa
-A880..A8DF; Saurashtra
-A8E0..A8FF; Devanagari Extended
-A900..A92F; Kayah Li
-A930..A95F; Rejang
-A960..A97F; Hangul Jamo Extended-A
-A980..A9DF; Javanese
-A9E0..A9FF; Myanmar Extended-B
-AA00..AA5F; Cham
-AA60..AA7F; Myanmar Extended-A
-AA80..AADF; Tai Viet
-AAE0..AAFF; Meetei Mayek Extensions
-AB00..AB2F; Ethiopic Extended-A
-AB30..AB6F; Latin Extended-E
-AB70..ABBF; Cherokee Supplement
-ABC0..ABFF; Meetei Mayek
-AC00..D7AF; Hangul Syllables
-D7B0..D7FF; Hangul Jamo Extended-B
-D800..DB7F; High Surrogates
-DB80..DBFF; High Private Use Surrogates
-DC00..DFFF; Low Surrogates
-E000..F8FF; Private Use Area
-F900..FAFF; CJK Compatibility Ideographs
-FB00..FB4F; Alphabetic Presentation Forms
-FB50..FDFF; Arabic Presentation Forms-A
-FE00..FE0F; Variation Selectors
-FE10..FE1F; Vertical Forms
-FE20..FE2F; Combining Half Marks
-FE30..FE4F; CJK Compatibility Forms
-FE50..FE6F; Small Form Variants
-FE70..FEFF; Arabic Presentation Forms-B
-FF00..FFEF; Halfwidth and Fullwidth Forms
-FFF0..FFFF; Specials
-10000..1007F; Linear B Syllabary
-10080..100FF; Linear B Ideograms
-10100..1013F; Aegean Numbers
-10140..1018F; Ancient Greek Numbers
-10190..101CF; Ancient Symbols
-101D0..101FF; Phaistos Disc
-10280..1029F; Lycian
-102A0..102DF; Carian
-102E0..102FF; Coptic Epact Numbers
-10300..1032F; Old Italic
-10330..1034F; Gothic
-10350..1037F; Old Permic
-10380..1039F; Ugaritic
-103A0..103DF; Old Persian
-10400..1044F; Deseret
-10450..1047F; Shavian
-10480..104AF; Osmanya
-104B0..104FF; Osage
-10500..1052F; Elbasan
-10530..1056F; Caucasian Albanian
-10600..1077F; Linear A
-10800..1083F; Cypriot Syllabary
-10840..1085F; Imperial Aramaic
-10860..1087F; Palmyrene
-10880..108AF; Nabataean
-108E0..108FF; Hatran
-10900..1091F; Phoenician
-10920..1093F; Lydian
-10980..1099F; Meroitic Hieroglyphs
-109A0..109FF; Meroitic Cursive
-10A00..10A5F; Kharoshthi
-10A60..10A7F; Old South Arabian
-10A80..10A9F; Old North Arabian
-10AC0..10AFF; Manichaean
-10B00..10B3F; Avestan
-10B40..10B5F; Inscriptional Parthian
-10B60..10B7F; Inscriptional Pahlavi
-10B80..10BAF; Psalter Pahlavi
-10C00..10C4F; Old Turkic
-10C80..10CFF; Old Hungarian
-10E60..10E7F; Rumi Numeral Symbols
-11000..1107F; Brahmi
-11080..110CF; Kaithi
-110D0..110FF; Sora Sompeng
-11100..1114F; Chakma
-11150..1117F; Mahajani
-11180..111DF; Sharada
-111E0..111FF; Sinhala Archaic Numbers
-11200..1124F; Khojki
-11280..112AF; Multani
-112B0..112FF; Khudawadi
-11300..1137F; Grantha
-11400..1147F; Newa
-11480..114DF; Tirhuta
-11580..115FF; Siddham
-11600..1165F; Modi
-11660..1167F; Mongolian Supplement
-11680..116CF; Takri
-11700..1173F; Ahom
-118A0..118FF; Warang Citi
-11A00..11A4F; Zanabazar Square
-11A50..11AAF; Soyombo
-11AC0..11AFF; Pau Cin Hau
-11C00..11C6F; Bhaiksuki
-11C70..11CBF; Marchen
-11D00..11D5F; Masaram Gondi
-12000..123FF; Cuneiform
-12400..1247F; Cuneiform Numbers and Punctuation
-12480..1254F; Early Dynastic Cuneiform
-13000..1342F; Egyptian Hieroglyphs
-14400..1467F; Anatolian Hieroglyphs
-16800..16A3F; Bamum Supplement
-16A40..16A6F; Mro
-16AD0..16AFF; Bassa Vah
-16B00..16B8F; Pahawh Hmong
-16F00..16F9F; Miao
-16FE0..16FFF; Ideographic Symbols and Punctuation
-17000..187FF; Tangut
-18800..18AFF; Tangut Components
-1B000..1B0FF; Kana Supplement
-1B100..1B12F; Kana Extended-A
-1B170..1B2FF; Nushu
-1BC00..1BC9F; Duployan
-1BCA0..1BCAF; Shorthand Format Controls
-1D000..1D0FF; Byzantine Musical Symbols
-1D100..1D1FF; Musical Symbols
-1D200..1D24F; Ancient Greek Musical Notation
-1D300..1D35F; Tai Xuan Jing Symbols
-1D360..1D37F; Counting Rod Numerals
-1D400..1D7FF; Mathematical Alphanumeric Symbols
-1D800..1DAAF; Sutton SignWriting
-1E000..1E02F; Glagolitic Supplement
-1E800..1E8DF; Mende Kikakui
-1E900..1E95F; Adlam
-1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
-1F000..1F02F; Mahjong Tiles
-1F030..1F09F; Domino Tiles
-1F0A0..1F0FF; Playing Cards
-1F100..1F1FF; Enclosed Alphanumeric Supplement
-1F200..1F2FF; Enclosed Ideographic Supplement
-1F300..1F5FF; Miscellaneous Symbols and Pictographs
-1F600..1F64F; Emoticons
-1F650..1F67F; Ornamental Dingbats
-1F680..1F6FF; Transport and Map Symbols
-1F700..1F77F; Alchemical Symbols
-1F780..1F7FF; Geometric Shapes Extended
-1F800..1F8FF; Supplemental Arrows-C
-1F900..1F9FF; Supplemental Symbols and Pictographs
-20000..2A6DF; CJK Unified Ideographs Extension B
-2A700..2B73F; CJK Unified Ideographs Extension C
-2B740..2B81F; CJK Unified Ideographs Extension D
-2B820..2CEAF; CJK Unified Ideographs Extension E
-2CEB0..2EBEF; CJK Unified Ideographs Extension F
-2F800..2FA1F; CJK Compatibility Ideographs Supplement
-E0000..E007F; Tags
-E0100..E01EF; Variation Selectors Supplement
-F0000..FFFFF; Supplementary Private Use Area-A
-100000..10FFFF; Supplementary Private Use Area-B
-
-# EOF

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/Character/CharPropTest.java	Wed May 23 14:21:14 2018 +0800
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8202771
+ * @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
+ * /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
+ * /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
+ * /isUnicodeIdentifierPart
+ * @run main CharPropTest
+ */
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.stream.Stream;
+
+public class CharPropTest {
+    private static int diffs = 0;
+    private static int rangeStart = 0x0000;
+    private static boolean isRange = false;
+
+    public static void main(String[] args) throws Exception {
+        Path path = Paths.get(System.getProperty("test.src", "."),
+                "UnicodeData.txt");
+        try (Stream<String> lines = Files.lines(path)) {
+            lines.map(String::trim)
+                 .filter(line -> line.length() != 0 && line.charAt(0) != '#')
+                 .forEach(line -> handleOneLine(line));
+
+            if (diffs != 0) {
+                throw new RuntimeException("Total differences: " + diffs);
+            }
+        }
+    }
+
+    private static void handleOneLine(String line) {
+        String[] fields = line.split(";");
+        int currentCp = Integer.parseInt(fields[0], 16);
+        String name = fields[1];
+        String category = fields[2];
+
+        // Except single code point, also handle ranges like the following:
+        // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+        // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;
+        if (isRange) {
+            if (name.endsWith("Last>")) {
+                for (int cp = rangeStart; cp <= currentCp; cp++) {
+                    testCodePoint(cp, category);
+                }
+            } else {
+                throw new RuntimeException("Not a valid range, first range <"
+                        + Integer.toHexString(rangeStart) + "> without last.");
+            }
+            isRange = false;
+        } else {
+            if (name.endsWith("First>")) {
+                rangeStart = currentCp;
+                isRange = true;
+            } else {
+                testCodePoint(currentCp, category);
+            }
+        }
+    }
+
+    private static void testCodePoint(int codePoint, String category) {
+        isDigitTest(codePoint, category);
+        isLetterTest(codePoint, category);
+        isLetterOrDigitTest(codePoint, category);
+
+        isSpaceCharTest(codePoint, category);
+        isWhitespaceTest(codePoint, category);
+
+        isTitleCaseTest(codePoint, category);
+
+        isISOControlTest(codePoint);
+
+        isIdentifierIgnorableTest(codePoint, category);
+        isJavaIdentifierStartTest(codePoint, category);
+        isJavaIdentifierPartTest(codePoint, category);
+        isUnicodeIdentifierStartTest(codePoint, category);
+        isUnicodeIdentifierPartTest(codePoint, category);
+    }
+
+    private static void isDigitTest(int codePoint, String category) {
+        boolean actual = Character.isDigit(codePoint);
+        boolean expected = category.equals("Nd");
+        if (actual != expected) {
+            printDiff(codePoint, "isDigit", actual, expected);
+        }
+    }
+
+    private static void isLetterTest(int codePoint, String category) {
+        boolean actual = Character.isLetter(codePoint);
+        boolean expected = isLetter(category);
+        if (actual != expected) {
+            printDiff(codePoint, "isLetter", actual, expected);
+        }
+    }
+
+    private static void isLetterOrDigitTest(int codePoint, String category) {
+        boolean actual = Character.isLetterOrDigit(codePoint);
+        boolean expected = isLetter(category) || category.equals("Nd");
+        if (actual != expected) {
+            printDiff(codePoint, "isLetterOrDigit", actual, expected);
+        }
+    }
+
+    private static void isSpaceCharTest(int codePoint, String category) {
+        boolean actual = Character.isSpaceChar(codePoint);
+        boolean expected = isSpaceChar(category);
+        if (actual != expected) {
+            printDiff(codePoint, "isSpaceChar", actual, expected);
+        }
+    }
+
+    private static void isWhitespaceTest(int codePoint, String category) {
+        boolean actual = Character.isWhitespace(codePoint);
+        boolean expected = isWhitespace(codePoint, category);
+        if (actual != expected) {
+            printDiff(codePoint, "isWhitespace", actual, expected);
+        }
+    }
+
+    private static void isTitleCaseTest(int codePoint, String category) {
+        boolean actual = Character.isTitleCase(codePoint);
+        boolean expected = category.equals("Lt");
+        if (actual != expected) {
+            printDiff(codePoint, "isTitleCase", actual, expected);
+        }
+    }
+
+    private static void isISOControlTest(int codePoint) {
+        boolean actual = Character.isISOControl(codePoint);
+        boolean expected = isISOControl(codePoint);
+        if (actual != expected) {
+            printDiff(codePoint, "isISOControl", actual, expected);
+        }
+    }
+
+    private static void isIdentifierIgnorableTest(int codePoint, String category) {
+        boolean actual = Character.isIdentifierIgnorable(codePoint);
+        boolean expected = isIdentifierIgnorable(codePoint, category);
+        if (actual != expected) {
+            printDiff(codePoint, "isIdentifierIgnorable", actual, expected);
+        }
+    }
+
+    private static void isJavaIdentifierStartTest(int codePoint, String category) {
+        boolean actual = Character.isJavaIdentifierStart(codePoint);
+        boolean expected = isJavaIdentifierStart(category);
+        if (actual != expected) {
+            printDiff(codePoint, "isJavaIdentifierStart", actual, expected);
+        }
+    }
+
+    private static void isJavaIdentifierPartTest(int codePoint, String category) {
+        boolean actual = Character.isJavaIdentifierPart(codePoint);
+        boolean expected = isJavaIdentifierPart(codePoint, category);
+        if (actual != expected) {
+            printDiff(codePoint, "isJavaIdentifierPart", actual, expected);
+        }
+    }
+
+    private static void isUnicodeIdentifierStartTest(int codePoint, String category) {
+        boolean actual = Character.isUnicodeIdentifierStart(codePoint);
+        boolean expected = isUnicodeIdentifierStart(category);
+        if (actual != expected) {
+            printDiff(codePoint, "isUnicodeIdentifierStart", actual, expected);
+        }
+    }
+
+    private static void isUnicodeIdentifierPartTest(int codePoint, String category) {
+        boolean actual = Character.isUnicodeIdentifierPart(codePoint);
+        boolean expected = isUnicodeIdentifierPart(codePoint, category);
+        if (actual != expected) {
+            printDiff(codePoint, "isUnicodeIdentifierPart", actual, expected);
+        }
+    }
+
+    private static boolean isLetter(String category) {
+        return category.equals("Lu") || category.equals("Ll")
+               || category.equals("Lt") || category.equals("Lm")
+               || category.equals("Lo");
+    }
+
+    private static boolean isSpaceChar(String category) {
+        return category.equals("Zs") || category.equals("Zl")
+               || category.equals("Zp");
+    }
+
+    private static boolean isWhitespace(int codePoint, String category) {
+        if (isSpaceChar(category) && codePoint != Integer.parseInt("00A0", 16)
+                && codePoint != Integer.parseInt("2007", 16)
+                && codePoint != Integer.parseInt("202F", 16)) {
+            return true;
+        } else {
+            if (codePoint == Integer.parseInt("0009", 16)
+                    || codePoint == Integer.parseInt("000A", 16)
+                    || codePoint == Integer.parseInt("000B", 16)
+                    || codePoint == Integer.parseInt("000C", 16)
+                    || codePoint == Integer.parseInt("000D", 16)
+                    || codePoint == Integer.parseInt("001C", 16)
+                    || codePoint == Integer.parseInt("001D", 16)
+                    || codePoint == Integer.parseInt("001E", 16)
+                    || codePoint == Integer.parseInt("001F", 16)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static boolean isISOControl(int codePoint) {
+        return (codePoint > 0x00 && codePoint < 0x1f)
+               || (codePoint > 0x7f && codePoint < 0x9f)
+               || (codePoint == 0x00 || codePoint == 0x1f || codePoint == 0x7f || codePoint == 0x9f);
+    }
+
+    private static boolean isIdentifierIgnorable(int codePoint, String category) {
+        if (category.equals("Cf")) {
+            return true;
+        } else {
+            int a1 = Integer.parseInt("0000", 16);
+            int a2 = Integer.parseInt("0008", 16);
+            int b1 = Integer.parseInt("000E", 16);
+            int b2 = Integer.parseInt("001B", 16);
+            int c1 = Integer.parseInt("007F", 16);
+            int c2 = Integer.parseInt("009F", 16);
+
+            if ((codePoint > a1 && codePoint < a2) || (codePoint > b1 && codePoint < b2)
+                    || (codePoint > c1 && codePoint < c2) || (codePoint == a1 || codePoint == a2
+                    || codePoint == b1 || codePoint == b2 || codePoint == c1 || codePoint == c2)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static boolean isJavaIdentifierStart(String category) {
+        return isLetter(category) || category.equals("Nl") || category.equals("Sc")
+               || category.equals("Pc");
+    }
+
+    private static boolean isJavaIdentifierPart(int codePoint, String category) {
+        return isLetter(category) || category.equals("Sc") || category.equals("Pc")
+               || category.equals("Nd") || category.equals("Nl")
+               || category.equals("Mc") || category.equals("Mn")
+               || isIdentifierIgnorable(codePoint, category);
+    }
+
+    private static boolean isUnicodeIdentifierStart(String category) {
+        return isLetter(category) || category.equals("Nl");
+    }
+
+    private static boolean isUnicodeIdentifierPart(int codePoint, String category) {
+        return isLetter(category) || category.equals("Pc") || category.equals("Nd")
+               || category.equals("Nl") || category.equals("Mc") || category.equals("Mn")
+               || isIdentifierIgnorable(codePoint, category);
+    }
+
+    private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
+        System.out.println("Not equal at codePoint <" + Integer.toHexString(codePoint)
+                + ">, method: " + method
+                + ", actual: " + actual + ", expected: " + expected);
+        diffs++;
+    }
+}

--- a/test/jdk/java/lang/Character/CheckBlocks.java	Tue May 22 21:50:45 2018 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/**
- * @test
- * @bug 4830803 4886934 6565620 6959267 7070436 7198195 8032446 8072600
- * @summary  Check that the UnicodeBlock forName() method works as expected and block ranges are correct for all Unicode characters.
- * @run main CheckBlocks
- * @author John O'Conner
- */
-
-import java.io.*;
-import java.util.*;
-import java.lang.Character.UnicodeBlock;
-
-
-public class CheckBlocks {
-
-    static boolean err = false;
-    static Class<?> character;
-
-    public static void main(String[] args) throws Exception {
-        generateBlockList();
-
-        try {
-            character = Class.forName("java.lang.Character$UnicodeBlock");
-        } catch (ClassNotFoundException e) {
-            throw new RuntimeException("Class.forName(\"Character\") failed.");
-        }
-
-        for (Block blk : blocks) {
-            test4830803_1(blk);
-            test4830803_2();
-            test4886934(blk);
-        }
-
-        if (err) {
-            throw new RuntimeException("Failed");
-        } else {
-            System.out.println("Passed");
-        }
-    }
-
-    /**
-     * Check that the UnicodeBlock forName() method works as expected.
-     */
-    private static void test4830803_1(Block blk) throws Exception {
-
-        /*
-         * Try 3 forms of block name in the forName() method. Each form should
-         * produce the same expected block.
-         */
-        String blkName = blk.getName();
-
-        // For backward compatibility
-        if (blkName.equals("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS")) {
-            blkName = "COMBINING_MARKS_FOR_SYMBOLS";
-            System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS is replaced with COMBINING_MARKS_FOR_SYMBOLS for backward compatibility.");
-        } else if (blkName.equals("GREEK_AND_COPTIC")) {
-            blkName = "GREEK";
-            System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK for backward compatibility.");
-        } else if (blkName.equals("CYRILLIC_SUPPLEMENT")) {
-            blkName = "CYRILLIC_SUPPLEMENTARY";
-            System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with CYRILLIC_SUPPLEMENTARY for backward compatibility.");
-        }
-
-        String expectedBlock = null;
-        try {
-            expectedBlock = character.getField(blkName).getName();
-        } catch (NoSuchFieldException | SecurityException e) {
-            System.err.println("Error: " + blkName + " was not found.");
-            err = true;
-            return;
-        }
-
-        String canonicalBlockName = blk.getOriginalName();
-        String idBlockName = expectedBlock;
-        String regexBlockName = toRegExString(canonicalBlockName);
-
-        if (regexBlockName == null) {
-            System.err.println("Error: Block name which was processed with regex was null.");
-            err = true;
-            return;
-        }
-
-        if (!expectedBlock.equals(UnicodeBlock.forName(canonicalBlockName).toString())) {
-            System.err.println("Error #1: UnicodeBlock.forName(\"" +
-                canonicalBlockName + "\") returned wrong value.\n\tGot: " +
-                UnicodeBlock.forName(canonicalBlockName) +
-                "\n\tExpected: " + expectedBlock);
-            err = true;
-        }
-
-        if (!expectedBlock.equals(UnicodeBlock.forName(idBlockName).toString())) {
-            System.err.println("Error #2: UnicodeBlock.forName(\"" +
-                idBlockName + "\") returned wrong value.\n\tGot: " +
-                UnicodeBlock.forName(idBlockName) +
-                "\n\tExpected: " + expectedBlock);
-            err = true;
-        }
-
-        if (!expectedBlock.equals(UnicodeBlock.forName(regexBlockName).toString())) {
-            System.err.println("Error #3: UnicodeBlock.forName(\"" +
-                regexBlockName + "\") returned wrong value.\n\tGot: " +
-                UnicodeBlock.forName(regexBlockName) +
-                "\n\tExpected: " + expectedBlock);
-            err = true;
-        }
-    }
-
-    /**
-     * now try a bad block name. This should produce an IAE.
-     */
-    private static void test4830803_2() {
-        boolean threwExpected = false;
-
-        try {
-            UnicodeBlock block = UnicodeBlock.forName("notdefined");
-        }
-        catch(IllegalArgumentException e) {
-            threwExpected = true;
-        }
-
-        if (threwExpected == false) {
-            System.err.println("Error: UnicodeBlock.forName(\"notdefined\") should throw IllegalArgumentException.");
-            err = true;
-        }
-    }
-
-    /**
-     * Convert the argument to a block name form used by the regex package.
-     * That is, remove all spaces.
-     */
-    private static String toRegExString(String str) {
-        String[] tokens = null;
-        StringBuilder retStr = new StringBuilder();
-        try {
-                   tokens = str.split(" ");
-        }
-        catch(java.util.regex.PatternSyntaxException e) {
-                   return null;
-        }
-        for(int x=0; x < tokens.length; ++x) {
-            retStr.append(tokens[x]);
-        }
-        return retStr.toString();
-    }
-
-    private static void test4886934(Block blk) {
-        String blkName = blk.getName();
-        String blkOrigName = blk.getOriginalName();
-        int ch =  blk.getBegin();
-        UnicodeBlock block = UnicodeBlock.of(ch);
-
-        if (block == null) {
-            System.err.println("Error: The block for " + blkName +
-                " is missing. Please check java.lang.Character.UnicodeBlock.");
-            err = true;
-            return;
-        }
-
-        // For backward compatibility
-        if (blkName.equals("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS")) {
-            blkName = "COMBINING_MARKS_FOR_SYMBOLS";
-            System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS is replaced with COMBINING_MARKS_FOR_SYMBOLS for backward compatibility.");
-        } else if (blkName.equals("GREEK_AND_COPTIC")) {
-            blkName = "GREEK";
-            System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK for backward compatibility.");
-        } else if (blkName.equals("CYRILLIC_SUPPLEMENT")) {
-            blkName = "CYRILLIC_SUPPLEMENTARY";
-            System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with CYRILLIC_SUPPLEMENTARY for backward compatibility.");
-        }
-
-        String blockName = block.toString();
-        if (!blockName.equals(blkName)) {
-            System.err.println("Error: Begin-of-block character(0x" +
-                Integer.toHexString(ch).toUpperCase() +
-                ") should be in \"" + blkName + "\" block " +
-                "(Block name is \"" + blkOrigName + "\")" +
-                " but found in \"" + blockName + "\" block.");
-            err = true;
-        }
-
-        block = UnicodeBlock.of(++ch);
-        blockName = block.toString();
-        if (!blockName.equals(blkName)) {
-            System.err.println("Error: Character(0x" +
-                Integer.toHexString(ch).toUpperCase() +
-                ") should be in \"" + blkName + "\" block " +
-                "(Block name is \"" + blkOrigName + "\")" +
-                " but found in \"" + blockName + "\" block.");
-            err = true;
-        }
-
-        ch = blk.getEnd();
-        block = UnicodeBlock.of(ch);
-        blockName = block.toString();
-        if (!blockName.equals(blkName)) {
-            System.err.println("Error: End-of-block Character(0x" +
-                Integer.toHexString(ch).toUpperCase() +
-                ") should be in \"" + blkName + "\" block " +
-                "(Block name is \"" + blkOrigName + "\")" +
-                " but found in \"" + blockName + "\" block.");
-            err = true;
-        }
-    }
-
-    // List of all Unicode blocks, their start, and end codepoints.
-    public static HashSet<Block> blocks = new HashSet<>();
-
-    private static void generateBlockList() throws Exception {
-        BufferedReader f = new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Blocks.txt")));
-
-        String line;
-        while ((line = f.readLine()) != null) {
-            if (line.length() == 0 || line.charAt(0) == '#') {
-                continue;
-            }
-
-            int index1 = line.indexOf('.');
-            int begin = Integer.parseInt(line.substring(0, index1), 16);
-            int index2 = line.indexOf(';');
-            int end = Integer.parseInt(line.substring(index1+2, index2), 16);
-            String name = line.substring(index2+1).trim();
-
-            System.out.println("  Adding a Block(" +
-                Integer.toHexString(begin) + ", " + Integer.toHexString(end) +
-                ", " + name + ")");
-            blocks.add(new Block(begin, end, name));
-        }
-        f.close();
-    }
-}
-
-class Block {
-
-    public Block() {
-        blockBegin = 0;
-        blockEnd = 0;
-        blockName = null;
-    }
-
-    public Block(int begin, int end, String name) {
-        blockBegin = begin;
-        blockEnd = end;
-        blockName = name.replaceAll("[ -]", "_").toUpperCase(Locale.ENGLISH);
-        originalBlockName = name;
-    }
-
-    public int getBegin() {
-        return blockBegin;
-    }
-
-    public int getEnd() {
-        return blockEnd;
-    }
-
-    public String getName() {
-        return blockName;
-    }
-
-    public String getOriginalName() {
-        return originalBlockName;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (obj == null) return false;
-        if (!(obj instanceof Block)) return false;
-
-        Block other = (Block)obj;
-        return other.blockBegin == blockBegin &&
-                other.blockEnd == blockEnd &&
-                other.blockName.equals(blockName) &&
-                other.originalBlockName.equals(originalBlockName);
-    }
-    int blockBegin, blockEnd;
-    String blockName, originalBlockName;
-}

--- a/test/jdk/java/lang/Character/TestISOControls.java	Tue May 22 21:50:45 2018 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @summary  Check that ISO control ranges are valid.
- * @run main TestISOControls
- * @author John O'Conner
- */
-
-public class TestISOControls {
-
-
-  public static void main(String[] args) {
-
-    int[] test = { -1, 0, 0x0010, 0x001F, 0x0020, 0x007E, 0x007F, 0x0090,
-                   0x009F, 0x00A0 };
-    boolean[] expectedResult = { false, true, true, true, false, false, true,
-                                 true, true, false };
-
-    for (int x=0; x < test.length; ++x) {
-      if (Character.isISOControl(test[x]) != expectedResult[x]) {
-          System.out.println("Fail: " + test[x]);
-          throw new RuntimeException();
-      }
-
-    }
-    System.out.println("Passed");
-
-  }
-
-}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/Character/UnicodeBlock/Blocks.txt	Wed May 23 14:21:14 2018 +0800
@@ -0,0 +1,316 @@
+# Blocks-10.0.0.txt
+# Date: 2017-04-12, 17:30:00 GMT [KW]
+# Copyright (c) 2017 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Format:
+# Start Code..End Code; Block Name
+
+# ================================================
+
+# Note:   When comparing block names, casing, whitespace, hyphens,
+#         and underbars are ignored.
+#         For example, "Latin Extended-A" and "latin extended a" are equivalent.
+#         For more information on the comparison of property values,
+#            see UAX #44: http://www.unicode.org/reports/tr44/
+#
+#  All block ranges start with a value where (cp MOD 16) = 0,
+#  and end with a value where (cp MOD 16) = 15. In other words,
+#  the last hexadecimal digit of the start of range is ...0
+#  and the last hexadecimal digit of the end of range is ...F.
+#  This constraint on block ranges guarantees that allocations
+#  are done in terms of whole columns, and that code chart display
+#  never involves splitting columns in the charts.
+#
+#  All code points not explicitly listed for Block
+#  have the value No_Block.
+
+# Property:	Block
+#
+# @missing: 0000..10FFFF; No_Block
+
+0000..007F; Basic Latin
+0080..00FF; Latin-1 Supplement
+0100..017F; Latin Extended-A
+0180..024F; Latin Extended-B
+0250..02AF; IPA Extensions
+02B0..02FF; Spacing Modifier Letters
+0300..036F; Combining Diacritical Marks
+0370..03FF; Greek and Coptic
+0400..04FF; Cyrillic
+0500..052F; Cyrillic Supplement
+0530..058F; Armenian
+0590..05FF; Hebrew
+0600..06FF; Arabic
+0700..074F; Syriac
+0750..077F; Arabic Supplement
+0780..07BF; Thaana
+07C0..07FF; NKo
+0800..083F; Samaritan
+0840..085F; Mandaic
+0860..086F; Syriac Supplement
+08A0..08FF; Arabic Extended-A
+0900..097F; Devanagari
+0980..09FF; Bengali
+0A00..0A7F; Gurmukhi
+0A80..0AFF; Gujarati
+0B00..0B7F; Oriya
+0B80..0BFF; Tamil
+0C00..0C7F; Telugu
+0C80..0CFF; Kannada
+0D00..0D7F; Malayalam
+0D80..0DFF; Sinhala
+0E00..0E7F; Thai
+0E80..0EFF; Lao
+0F00..0FFF; Tibetan
+1000..109F; Myanmar
+10A0..10FF; Georgian
+1100..11FF; Hangul Jamo
+1200..137F; Ethiopic
+1380..139F; Ethiopic Supplement
+13A0..13FF; Cherokee
+1400..167F; Unified Canadian Aboriginal Syllabics
+1680..169F; Ogham
+16A0..16FF; Runic
+1700..171F; Tagalog
+1720..173F; Hanunoo
+1740..175F; Buhid
+1760..177F; Tagbanwa
+1780..17FF; Khmer
+1800..18AF; Mongolian
+18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
+1900..194F; Limbu
+1950..197F; Tai Le
+1980..19DF; New Tai Lue
+19E0..19FF; Khmer Symbols
+1A00..1A1F; Buginese
+1A20..1AAF; Tai Tham
+1AB0..1AFF; Combining Diacritical Marks Extended
+1B00..1B7F; Balinese
+1B80..1BBF; Sundanese
+1BC0..1BFF; Batak
+1C00..1C4F; Lepcha
+1C50..1C7F; Ol Chiki
+1C80..1C8F; Cyrillic Extended-C
+1CC0..1CCF; Sundanese Supplement
+1CD0..1CFF; Vedic Extensions
+1D00..1D7F; Phonetic Extensions
+1D80..1DBF; Phonetic Extensions Supplement
+1DC0..1DFF; Combining Diacritical Marks Supplement
+1E00..1EFF; Latin Extended Additional
+1F00..1FFF; Greek Extended
+2000..206F; General Punctuation
+2070..209F; Superscripts and Subscripts
+20A0..20CF; Currency Symbols
+20D0..20FF; Combining Diacritical Marks for Symbols
+2100..214F; Letterlike Symbols
+2150..218F; Number Forms
+2190..21FF; Arrows
+2200..22FF; Mathematical Operators
+2300..23FF; Miscellaneous Technical
+2400..243F; Control Pictures
+2440..245F; Optical Character Recognition
+2460..24FF; Enclosed Alphanumerics
+2500..257F; Box Drawing
+2580..259F; Block Elements
+25A0..25FF; Geometric Shapes
+2600..26FF; Miscellaneous Symbols
+2700..27BF; Dingbats
+27C0..27EF; Miscellaneous Mathematical Symbols-A
+27F0..27FF; Supplemental Arrows-A
+2800..28FF; Braille Patterns
+2900..297F; Supplemental Arrows-B
+2980..29FF; Miscellaneous Mathematical Symbols-B
+2A00..2AFF; Supplemental Mathematical Operators
+2B00..2BFF; Miscellaneous Symbols and Arrows
+2C00..2C5F; Glagolitic
+2C60..2C7F; Latin Extended-C
+2C80..2CFF; Coptic
+2D00..2D2F; Georgian Supplement
+2D30..2D7F; Tifinagh
+2D80..2DDF; Ethiopic Extended
+2DE0..2DFF; Cyrillic Extended-A
+2E00..2E7F; Supplemental Punctuation
+2E80..2EFF; CJK Radicals Supplement
+2F00..2FDF; Kangxi Radicals
+2FF0..2FFF; Ideographic Description Characters
+3000..303F; CJK Symbols and Punctuation
+3040..309F; Hiragana
+30A0..30FF; Katakana
+3100..312F; Bopomofo
+3130..318F; Hangul Compatibility Jamo
+3190..319F; Kanbun
+31A0..31BF; Bopomofo Extended
+31C0..31EF; CJK Strokes
+31F0..31FF; Katakana Phonetic Extensions
+3200..32FF; Enclosed CJK Letters and Months
+3300..33FF; CJK Compatibility
+3400..4DBF; CJK Unified Ideographs Extension A
+4DC0..4DFF; Yijing Hexagram Symbols
+4E00..9FFF; CJK Unified Ideographs
+A000..A48F; Yi Syllables
+A490..A4CF; Yi Radicals
+A4D0..A4FF; Lisu
+A500..A63F; Vai
+A640..A69F; Cyrillic Extended-B
+A6A0..A6FF; Bamum
+A700..A71F; Modifier Tone Letters
+A720..A7FF; Latin Extended-D
+A800..A82F; Syloti Nagri
+A830..A83F; Common Indic Number Forms
+A840..A87F; Phags-pa
+A880..A8DF; Saurashtra
+A8E0..A8FF; Devanagari Extended
+A900..A92F; Kayah Li
+A930..A95F; Rejang
+A960..A97F; Hangul Jamo Extended-A
+A980..A9DF; Javanese
+A9E0..A9FF; Myanmar Extended-B
+AA00..AA5F; Cham
+AA60..AA7F; Myanmar Extended-A
+AA80..AADF; Tai Viet
+AAE0..AAFF; Meetei Mayek Extensions
+AB00..AB2F; Ethiopic Extended-A
+AB30..AB6F; Latin Extended-E
+AB70..ABBF; Cherokee Supplement
+ABC0..ABFF; Meetei Mayek
+AC00..D7AF; Hangul Syllables
+D7B0..D7FF; Hangul Jamo Extended-B
+D800..DB7F; High Surrogates
+DB80..DBFF; High Private Use Surrogates
+DC00..DFFF; Low Surrogates
+E000..F8FF; Private Use Area
+F900..FAFF; CJK Compatibility Ideographs
+FB00..FB4F; Alphabetic Presentation Forms
+FB50..FDFF; Arabic Presentation Forms-A
+FE00..FE0F; Variation Selectors
+FE10..FE1F; Vertical Forms
+FE20..FE2F; Combining Half Marks
+FE30..FE4F; CJK Compatibility Forms
+FE50..FE6F; Small Form Variants
+FE70..FEFF; Arabic Presentation Forms-B
+FF00..FFEF; Halfwidth and Fullwidth Forms
+FFF0..FFFF; Specials
+10000..1007F; Linear B Syllabary
+10080..100FF; Linear B Ideograms
+10100..1013F; Aegean Numbers
+10140..1018F; Ancient Greek Numbers
+10190..101CF; Ancient Symbols
+101D0..101FF; Phaistos Disc
+10280..1029F; Lycian
+102A0..102DF; Carian
+102E0..102FF; Coptic Epact Numbers
+10300..1032F; Old Italic
+10330..1034F; Gothic
+10350..1037F; Old Permic
+10380..1039F; Ugaritic
+103A0..103DF; Old Persian
+10400..1044F; Deseret
+10450..1047F; Shavian
+10480..104AF; Osmanya
+104B0..104FF; Osage
+10500..1052F; Elbasan
+10530..1056F; Caucasian Albanian
+10600..1077F; Linear A
+10800..1083F; Cypriot Syllabary
+10840..1085F; Imperial Aramaic
+10860..1087F; Palmyrene
+10880..108AF; Nabataean
+108E0..108FF; Hatran
+10900..1091F; Phoenician
+10920..1093F; Lydian
+10980..1099F; Meroitic Hieroglyphs
+109A0..109FF; Meroitic Cursive
+10A00..10A5F; Kharoshthi
+10A60..10A7F; Old South Arabian
+10A80..10A9F; Old North Arabian
+10AC0..10AFF; Manichaean
+10B00..10B3F; Avestan
+10B40..10B5F; Inscriptional Parthian
+10B60..10B7F; Inscriptional Pahlavi
+10B80..10BAF; Psalter Pahlavi
+10C00..10C4F; Old Turkic
+10C80..10CFF; Old Hungarian
+10E60..10E7F; Rumi Numeral Symbols
+11000..1107F; Brahmi
+11080..110CF; Kaithi
+110D0..110FF; Sora Sompeng
+11100..1114F; Chakma
+11150..1117F; Mahajani
+11180..111DF; Sharada
+111E0..111FF; Sinhala Archaic Numbers
+11200..1124F; Khojki
+11280..112AF; Multani
+112B0..112FF; Khudawadi
+11300..1137F; Grantha
+11400..1147F; Newa
+11480..114DF; Tirhuta
+11580..115FF; Siddham
+11600..1165F; Modi
+11660..1167F; Mongolian Supplement
+11680..116CF; Takri
+11700..1173F; Ahom
+118A0..118FF; Warang Citi
+11A00..11A4F; Zanabazar Square
+11A50..11AAF; Soyombo
+11AC0..11AFF; Pau Cin Hau
+11C00..11C6F; Bhaiksuki
+11C70..11CBF; Marchen
+11D00..11D5F; Masaram Gondi
+12000..123FF; Cuneiform
+12400..1247F; Cuneiform Numbers and Punctuation
+12480..1254F; Early Dynastic Cuneiform
+13000..1342F; Egyptian Hieroglyphs
+14400..1467F; Anatolian Hieroglyphs
+16800..16A3F; Bamum Supplement
+16A40..16A6F; Mro
+16AD0..16AFF; Bassa Vah
+16B00..16B8F; Pahawh Hmong
+16F00..16F9F; Miao
+16FE0..16FFF; Ideographic Symbols and Punctuation
+17000..187FF; Tangut
+18800..18AFF; Tangut Components
+1B000..1B0FF; Kana Supplement
+1B100..1B12F; Kana Extended-A
+1B170..1B2FF; Nushu
+1BC00..1BC9F; Duployan
+1BCA0..1BCAF; Shorthand Format Controls
+1D000..1D0FF; Byzantine Musical Symbols
+1D100..1D1FF; Musical Symbols
+1D200..1D24F; Ancient Greek Musical Notation
+1D300..1D35F; Tai Xuan Jing Symbols
+1D360..1D37F; Counting Rod Numerals
+1D400..1D7FF; Mathematical Alphanumeric Symbols
+1D800..1DAAF; Sutton SignWriting
+1E000..1E02F; Glagolitic Supplement
+1E800..1E8DF; Mende Kikakui
+1E900..1E95F; Adlam
+1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
+1F000..1F02F; Mahjong Tiles
+1F030..1F09F; Domino Tiles
+1F0A0..1F0FF; Playing Cards
+1F100..1F1FF; Enclosed Alphanumeric Supplement
+1F200..1F2FF; Enclosed Ideographic Supplement
+1F300..1F5FF; Miscellaneous Symbols and Pictographs
+1F600..1F64F; Emoticons
+1F650..1F67F; Ornamental Dingbats
+1F680..1F6FF; Transport and Map Symbols
+1F700..1F77F; Alchemical Symbols
+1F780..1F7FF; Geometric Shapes Extended
+1F800..1F8FF; Supplemental Arrows-C
+1F900..1F9FF; Supplemental Symbols and Pictographs
+20000..2A6DF; CJK Unified Ideographs Extension B
+2A700..2B73F; CJK Unified Ideographs Extension C
+2B740..2B81F; CJK Unified Ideographs Extension D
+2B820..2CEAF; CJK Unified Ideographs Extension E
+2CEB0..2EBEF; CJK Unified Ideographs Extension F
+2F800..2FA1F; CJK Compatibility Ideographs Supplement
+E0000..E007F; Tags
+E0100..E01EF; Variation Selectors Supplement
+F0000..FFFFF; Supplementary Private Use Area-A
+100000..10FFFF; Supplementary Private Use Area-B
+
+# EOF

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/lang/Character/UnicodeBlock/CheckBlocks.java	Wed May 23 14:21:14 2018 +0800
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 4830803 4886934 6565620 6959267 7070436 7198195 8032446 8072600 8202771
+ * @summary  Check that the UnicodeBlock forName() method works as expected and block ranges are correct for all Unicode characters.
+ * @run main CheckBlocks
+ * @author John O'Conner
+ */
+
+import java.lang.Character.UnicodeBlock;
+import java.lang.reflect.Field;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashSet;
+import java.util.Locale;
+
+public class CheckBlocks {
+
+    static boolean err = false;
+    static Class<?> clazzUnicodeBlock;
+
+    public static void main(String[] args) throws Exception {
+        generateBlockList();
+
+        try {
+            clazzUnicodeBlock = Class.forName("java.lang.Character$UnicodeBlock");
+        } catch (ClassNotFoundException e) {
+            throw new RuntimeException("Class.forName(\"java.lang.Character$UnicodeBlock\") failed.");
+        }
+
+        for (Block blk : blocks) {
+            test4830803_1(blk);
+            test4830803_2();
+            test4886934(blk);
+        }
+
+        test8202771();
+
+        if (err) {
+            throw new RuntimeException("Failed");
+        } else {
+            System.out.println("Passed");
+        }
+    }
+
+    /**
+     * Check that the UnicodeBlock forName() method works as expected.
+     */
+    private static void test4830803_1(Block blk) throws Exception {
+
+        /*
+         * Try 3 forms of block name in the forName() method. Each form should
+         * produce the same expected block.
+         */
+        String blkName = blk.getName();
+
+        // For backward compatibility
+        switch (blkName) {
+            case "COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS":
+                blkName = "COMBINING_MARKS_FOR_SYMBOLS";
+                System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS"
+                        + " is replaced with COMBINING_MARKS_FOR_SYMBOLS"
+                        + " for backward compatibility.");
+                break;
+            case "GREEK_AND_COPTIC":
+                blkName = "GREEK";
+                System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK"
+                        + " for backward compatibility.");
+                break;
+            case "CYRILLIC_SUPPLEMENT":
+                blkName = "CYRILLIC_SUPPLEMENTARY";
+                System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with"
+                        + " CYRILLIC_SUPPLEMENTARY for backward compatibility.");
+                break;
+            default:
+                break;
+        }
+
+        String expectedBlock = null;
+        try {
+            expectedBlock = clazzUnicodeBlock.getField(blkName).getName();
+        } catch (NoSuchFieldException | SecurityException e) {
+            System.err.println("Error: " + blkName + " was not found.");
+            err = true;
+            return;
+        }
+
+        String canonicalBlockName = blk.getOriginalName();
+        String idBlockName = expectedBlock;
+        String regexBlockName = toRegExString(canonicalBlockName);
+
+        if (regexBlockName == null) {
+            System.err.println("Error: Block name which was processed with regex was null.");
+            err = true;
+            return;
+        }
+
+        if (!expectedBlock.equals(UnicodeBlock.forName(canonicalBlockName).toString())) {
+            System.err.println("Error #1: UnicodeBlock.forName(\"" +
+                    canonicalBlockName + "\") returned wrong value.\n\tGot: " +
+                    UnicodeBlock.forName(canonicalBlockName) +
+                    "\n\tExpected: " + expectedBlock);
+            err = true;
+        }
+
+        if (!expectedBlock.equals(UnicodeBlock.forName(idBlockName).toString())) {
+            System.err.println("Error #2: UnicodeBlock.forName(\"" +
+                    idBlockName + "\") returned wrong value.\n\tGot: " +
+                    UnicodeBlock.forName(idBlockName) +
+                    "\n\tExpected: " + expectedBlock);
+            err = true;
+        }
+
+        if (!expectedBlock.equals(UnicodeBlock.forName(regexBlockName).toString())) {
+            System.err.println("Error #3: UnicodeBlock.forName(\"" +
+                    regexBlockName + "\") returned wrong value.\n\tGot: " +
+                    UnicodeBlock.forName(regexBlockName) +
+                    "\n\tExpected: " + expectedBlock);
+            err = true;
+        }
+    }
+
+    /**
+     * now try a bad block name. This should produce an IAE.
+     */
+    private static void test4830803_2() {
+        boolean threwExpected = false;
+
+        try {
+            UnicodeBlock block = UnicodeBlock.forName("notdefined");
+        }
+        catch(IllegalArgumentException e) {
+            threwExpected = true;
+        }
+
+        if (threwExpected == false) {
+            System.err.println("Error: UnicodeBlock.forName(\"notdefined\") should throw IllegalArgumentException.");
+            err = true;
+        }
+    }
+
+    /**
+     * Convert the argument to a block name form used by the regex package.
+     * That is, remove all spaces.
+     */
+    private static String toRegExString(String str) {
+        String[] tokens = null;
+        StringBuilder retStr = new StringBuilder();
+        try {
+            tokens = str.split(" ");
+        }
+        catch(java.util.regex.PatternSyntaxException e) {
+            return null;
+        }
+        for(int x=0; x < tokens.length; ++x) {
+            retStr.append(tokens[x]);
+        }
+        return retStr.toString();
+    }
+
+    private static void test4886934(Block blk) {
+        String blkName = blk.getName();
+        String blkOrigName = blk.getOriginalName();
+        UnicodeBlock block;
+        String blockName;
+
+        // For backward compatibility
+        switch (blkName) {
+            case "COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS":
+                blkName = "COMBINING_MARKS_FOR_SYMBOLS";
+                System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS"
+                        + " is replaced with COMBINING_MARKS_FOR_SYMBOLS"
+                        + " for backward compatibility.");
+                break;
+            case "GREEK_AND_COPTIC":
+                blkName = "GREEK";
+                System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK"
+                        + " for backward compatibility.");
+                break;
+            case "CYRILLIC_SUPPLEMENT":
+                blkName = "CYRILLIC_SUPPLEMENTARY";
+                System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with"
+                        + " CYRILLIC_SUPPLEMENTARY for backward compatibility.");
+                break;
+            default:
+                break;
+        }
+
+        for (int ch = blk.getBegin(); ch <= blk.getEnd(); ch++) {
+            block = UnicodeBlock.of(ch);
+            if (block == null) {
+                System.err.println("Error: The block for " + blkName
+                        + " is missing. Please check java.lang.Character.UnicodeBlock.");
+                err = true;
+                break;
+            }
+            blockName = block.toString();
+            if (!blockName.equals(blkName)) {
+                System.err.println("Error: Character(0x"
+                        + Integer.toHexString(ch).toUpperCase()
+                        + ") should be in \"" + blkName + "\" block "
+                        + "(Block name is \"" + blkOrigName + "\")"
+                        + " but found in \"" + blockName + "\" block.");
+                err = true;
+            }
+        }
+    }
+
+    /**
+     * Check if every Field of Character.UnicodeBlock is a valid Unicode Block.
+     */
+    private static void test8202771() {
+        Field[] fields = clazzUnicodeBlock.getFields();
+
+        for (Field f : fields) {
+            // Handle Deprecated field "SURROGATES_AREA".
+            if (f.getAnnotation(Deprecated.class) != null) {
+                continue;
+            }
+
+            String blkName = f.getName();
+            switch (blkName) {
+                case "COMBINING_MARKS_FOR_SYMBOLS":
+                    validateBlock("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS");
+                    break;
+                case "GREEK":
+                    validateBlock("GREEK_AND_COPTIC");
+                    break;
+                case "CYRILLIC_SUPPLEMENTARY":
+                    validateBlock("CYRILLIC_SUPPLEMENT");
+                    break;
+                default:
+                    validateBlock(blkName);
+                    break;
+            }
+        }
+    }
+
+    private static void validateBlock(String blkName) {
+        for (Block block : blocks) {
+            String blockName = block.getName();
+            if (blockName.equals(blkName)) {
+                return;
+            }
+        }
+        err = true;
+        System.err.println(blkName + " is not a valid Unicode Block.");
+    }
+
+    // List of all Unicode blocks, their start, and end codepoints.
+    public static HashSet<Block> blocks = new HashSet<>();
+
+    private static void generateBlockList() throws Exception {
+        File blockData = new File(System.getProperty("test.src", "."),
+                "Blocks.txt");
+        try (BufferedReader f = new BufferedReader(new FileReader(blockData))) {
+            String line;
+            while ((line = f.readLine()) != null) {
+                if (line.length() == 0 || line.charAt(0) == '#') {
+                    continue;
+                }
+
+                int index1 = line.indexOf('.');
+                int begin = Integer.parseInt(line.substring(0, index1), 16);
+                int index2 = line.indexOf(';');
+                int end = Integer.parseInt(line.substring(index1 + 2, index2), 16);
+                String name = line.substring(index2 + 1).trim();
+
+                System.out.println("  Adding a Block(" + Integer.toHexString(begin) + ", " + Integer.toHexString(end)
+                        + ", " + name + ")");
+                blocks.add(new Block(begin, end, name));
+            }
+        }
+    }
+}
+
+class Block {
+
+    public Block() {
+        blockBegin = 0;
+        blockEnd = 0;
+        blockName = null;
+    }
+
+    public Block(int begin, int end, String name) {
+        blockBegin = begin;
+        blockEnd = end;
+        blockName = name.replaceAll("[ -]", "_").toUpperCase(Locale.ENGLISH);
+        originalBlockName = name;
+    }
+
+    public int getBegin() {
+        return blockBegin;
+    }
+
+    public int getEnd() {
+        return blockEnd;
+    }
+
+    public String getName() {
+        return blockName;
+    }
+
+    public String getOriginalName() {
+        return originalBlockName;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) return false;
+        if (!(obj instanceof Block)) return false;
+
+        Block other = (Block)obj;
+        return other.blockBegin == blockBegin &&
+                other.blockEnd == blockEnd &&
+                other.blockName.equals(blockName) &&
+                other.originalBlockName.equals(originalBlockName);
+    }
+    int blockBegin, blockEnd;
+    String blockName, originalBlockName;
+}

author	mli
	Wed, 23 May 2018 14:21:14 +0800
changeset 50230	cae567ae015d
parent 50229	6b29ef846c5c
child 50231	10b14c9ee78d

test/jdk/java/lang/Character/Blocks.txt		file \| annotate \| diff \| comparison \| revisions
test/jdk/java/lang/Character/CharPropTest.java		file \| annotate \| diff \| comparison \| revisions
test/jdk/java/lang/Character/CheckBlocks.java		file \| annotate \| diff \| comparison \| revisions
test/jdk/java/lang/Character/TestISOControls.java		file \| annotate \| diff \| comparison \| revisions
test/jdk/java/lang/Character/UnicodeBlock/Blocks.txt		file \| annotate \| diff \| comparison \| revisions
test/jdk/java/lang/Character/UnicodeBlock/CheckBlocks.java		file \| annotate \| diff \| comparison \| revisions