6945564: Unicode script support in Character class
authorsherman
Tue, 18 May 2010 15:36:47 -0700
changeset 5610 fd2427610c7f
parent 5609 cc144006eb2a
child 5611 b4ef86f105ec
6945564: Unicode script support in Character class 6948903: Make Unicode scripts available for use in regular expressions Summary: added Unicode script suport Reviewed-by: martin
jdk/make/java/java/FILES_java.gmk
jdk/make/java/java/Makefile
jdk/make/tools/UnicodeData/Scripts.txt
jdk/make/tools/src/build/tools/generatecharacter/CharacterName.java
jdk/make/tools/src/build/tools/generatecharacter/CharacterScript.java
jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java
jdk/src/share/classes/java/lang/Character.java
jdk/src/share/classes/java/lang/CharacterName.java
jdk/src/share/classes/java/util/regex/Pattern.java
jdk/test/java/lang/Character/CheckScript.java
jdk/test/java/lang/Character/Scripts.txt
jdk/test/java/util/regex/RegExTest.java
--- a/jdk/make/java/java/FILES_java.gmk	Tue May 18 13:12:46 2010 -0700
+++ b/jdk/make/java/java/FILES_java.gmk	Tue May 18 15:36:47 2010 -0700
@@ -34,6 +34,7 @@
     java/lang/Thread.java \
     java/lang/Character.java \
     java/lang/CharacterData.java \
+    java/lang/CharacterName.java \
     sun/misc/ASCIICaseInsensitiveComparator.java \
     sun/misc/VM.java \
     sun/misc/Signal.java \
--- a/jdk/make/java/java/Makefile	Tue May 18 13:12:46 2010 -0700
+++ b/jdk/make/java/java/Makefile	Tue May 18 15:36:47 2010 -0700
@@ -385,6 +385,27 @@
 	$(RM) $(GENSRCDIR)/java/lang/CharacterDataPrivateUse.java
 
 #
+# Rules to generate classes/java/lang/uniName.dat
+#
+
+
+
+UNINAME = $(CLASSBINDIR)/java/lang/uniName.dat
+GENERATEUNINAME_JARFILE = $(BUILDTOOLJARDIR)/generatecharacter.jar
+
+build: $(UNINAME)
+
+$(UNINAME): $(UNICODEDATA)/UnicodeData.txt \
+	$(GENERATECHARACTER_JARFILE)
+	@$(prep-target)
+	$(BOOT_JAVA_CMD) -classpath $(GENERATECHARACTER_JARFILE) \
+	build.tools.generatecharacter.CharacterName \
+		$(UNICODEDATA)/UnicodeData.txt $(UNINAME)
+
+clean:: 
+	$(RM) $(UNINAME)
+
+#
 # End of rules to create $(GENSRCDIR)/java/lang/CharacterDataXX.java
 #
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/make/tools/UnicodeData/Scripts.txt	Tue May 18 15:36:47 2010 -0700
@@ -0,0 +1,1972 @@
+# Scripts-5.2.0.txt
+# Date: 2009-08-22, 04:58:43 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2009 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+
+# ================================================
+
+# Property:	Script
+
+#  All code points not explicitly listed for Script
+#  have the value Unknown (Zzzz).
+
+# @missing: 0000..10FFFF; Unknown
+
+# ================================================
+
+0000..001F    ; Common # Cc  [32] <control-0000>..<control-001F>
+0020          ; Common # Zs       SPACE
+0021..0023    ; Common # Po   [3] EXCLAMATION MARK..NUMBER SIGN
+0024          ; Common # Sc       DOLLAR SIGN
+0025..0027    ; Common # Po   [3] PERCENT SIGN..APOSTROPHE
+0028          ; Common # Ps       LEFT PARENTHESIS
+0029          ; Common # Pe       RIGHT PARENTHESIS
+002A          ; Common # Po       ASTERISK
+002B          ; Common # Sm       PLUS SIGN
+002C          ; Common # Po       COMMA
+002D          ; Common # Pd       HYPHEN-MINUS
+002E..002F    ; Common # Po   [2] FULL STOP..SOLIDUS
+0030..0039    ; Common # Nd  [10] DIGIT ZERO..DIGIT NINE
+003A..003B    ; Common # Po   [2] COLON..SEMICOLON
+003C..003E    ; Common # Sm   [3] LESS-THAN SIGN..GREATER-THAN SIGN
+003F..0040    ; Common # Po   [2] QUESTION MARK..COMMERCIAL AT
+005B          ; Common # Ps       LEFT SQUARE BRACKET
+005C          ; Common # Po       REVERSE SOLIDUS
+005D          ; Common # Pe       RIGHT SQUARE BRACKET
+005E          ; Common # Sk       CIRCUMFLEX ACCENT
+005F          ; Common # Pc       LOW LINE
+0060          ; Common # Sk       GRAVE ACCENT
+007B          ; Common # Ps       LEFT CURLY BRACKET
+007C          ; Common # Sm       VERTICAL LINE
+007D          ; Common # Pe       RIGHT CURLY BRACKET
+007E          ; Common # Sm       TILDE
+007F..009F    ; Common # Cc  [33] <control-007F>..<control-009F>
+00A0          ; Common # Zs       NO-BREAK SPACE
+00A1          ; Common # Po       INVERTED EXCLAMATION MARK
+00A2..00A5    ; Common # Sc   [4] CENT SIGN..YEN SIGN
+00A6..00A7    ; Common # So   [2] BROKEN BAR..SECTION SIGN
+00A8          ; Common # Sk       DIAERESIS
+00A9          ; Common # So       COPYRIGHT SIGN
+00AB          ; Common # Pi       LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00AC          ; Common # Sm       NOT SIGN
+00AD          ; Common # Cf       SOFT HYPHEN
+00AE          ; Common # So       REGISTERED SIGN
+00AF          ; Common # Sk       MACRON
+00B0          ; Common # So       DEGREE SIGN
+00B1          ; Common # Sm       PLUS-MINUS SIGN
+00B2..00B3    ; Common # No   [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
+00B4          ; Common # Sk       ACUTE ACCENT
+00B5          ; Common # L&       MICRO SIGN
+00B6          ; Common # So       PILCROW SIGN
+00B7          ; Common # Po       MIDDLE DOT
+00B8          ; Common # Sk       CEDILLA
+00B9          ; Common # No       SUPERSCRIPT ONE
+00BB          ; Common # Pf       RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BC..00BE    ; Common # No   [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
+00BF          ; Common # Po       INVERTED QUESTION MARK
+00D7          ; Common # Sm       MULTIPLICATION SIGN
+00F7          ; Common # Sm       DIVISION SIGN
+02B9..02C1    ; Common # Lm   [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
+02C2..02C5    ; Common # Sk   [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
+02C6..02D1    ; Common # Lm  [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
+02D2..02DF    ; Common # Sk  [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
+02E5..02EB    ; Common # Sk   [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
+02EC          ; Common # Lm       MODIFIER LETTER VOICING
+02ED          ; Common # Sk       MODIFIER LETTER UNASPIRATED
+02EE          ; Common # Lm       MODIFIER LETTER DOUBLE APOSTROPHE
+02EF..02FF    ; Common # Sk  [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
+0374          ; Common # Lm       GREEK NUMERAL SIGN
+037E          ; Common # Po       GREEK QUESTION MARK
+0385          ; Common # Sk       GREEK DIALYTIKA TONOS
+0387          ; Common # Po       GREEK ANO TELEIA
+0589          ; Common # Po       ARMENIAN FULL STOP
+0600..0603    ; Common # Cf   [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
+060C          ; Common # Po       ARABIC COMMA
+061B          ; Common # Po       ARABIC SEMICOLON
+061F          ; Common # Po       ARABIC QUESTION MARK
+0640          ; Common # Lm       ARABIC TATWEEL
+0660..0669    ; Common # Nd  [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+06DD          ; Common # Cf       ARABIC END OF AYAH
+0964..0965    ; Common # Po   [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+0970          ; Common # Po       DEVANAGARI ABBREVIATION SIGN
+0CF1..0CF2    ; Common # So   [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
+0E3F          ; Common # Sc       THAI CURRENCY SYMBOL BAHT
+0FD5..0FD8    ; Common # So   [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
+10FB          ; Common # Po       GEORGIAN PARAGRAPH SEPARATOR
+16EB..16ED    ; Common # Po   [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
+1735..1736    ; Common # Po   [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+1802..1803    ; Common # Po   [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
+1805          ; Common # Po       MONGOLIAN FOUR DOTS
+1CD3          ; Common # Po       VEDIC SIGN NIHSHVASA
+1CE1          ; Common # Mc       VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+1CE9..1CEC    ; Common # Lo   [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
+1CEE..1CF1    ; Common # Lo   [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
+1CF2          ; Common # Mc       VEDIC SIGN ARDHAVISARGA
+2000..200A    ; Common # Zs  [11] EN QUAD..HAIR SPACE
+200B          ; Common # Cf       ZERO WIDTH SPACE
+200E..200F    ; Common # Cf   [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
+2010..2015    ; Common # Pd   [6] HYPHEN..HORIZONTAL BAR
+2016..2017    ; Common # Po   [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
+2018          ; Common # Pi       LEFT SINGLE QUOTATION MARK
+2019          ; Common # Pf       RIGHT SINGLE QUOTATION MARK
+201A          ; Common # Ps       SINGLE LOW-9 QUOTATION MARK
+201B..201C    ; Common # Pi   [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
+201D          ; Common # Pf       RIGHT DOUBLE QUOTATION MARK
+201E          ; Common # Ps       DOUBLE LOW-9 QUOTATION MARK
+201F          ; Common # Pi       DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+2020..2027    ; Common # Po   [8] DAGGER..HYPHENATION POINT
+2028          ; Common # Zl       LINE SEPARATOR
+2029          ; Common # Zp       PARAGRAPH SEPARATOR
+202A..202E    ; Common # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+202F          ; Common # Zs       NARROW NO-BREAK SPACE
+2030..2038    ; Common # Po   [9] PER MILLE SIGN..CARET
+2039          ; Common # Pi       SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A          ; Common # Pf       SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+203B..203E    ; Common # Po   [4] REFERENCE MARK..OVERLINE
+203F..2040    ; Common # Pc   [2] UNDERTIE..CHARACTER TIE
+2041..2043    ; Common # Po   [3] CARET INSERTION POINT..HYPHEN BULLET
+2044          ; Common # Sm       FRACTION SLASH
+2045          ; Common # Ps       LEFT SQUARE BRACKET WITH QUILL
+2046          ; Common # Pe       RIGHT SQUARE BRACKET WITH QUILL
+2047..2051    ; Common # Po  [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
+2052          ; Common # Sm       COMMERCIAL MINUS SIGN
+2053          ; Common # Po       SWUNG DASH
+2054          ; Common # Pc       INVERTED UNDERTIE
+2055..205E    ; Common # Po  [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
+205F          ; Common # Zs       MEDIUM MATHEMATICAL SPACE
+2060..2064    ; Common # Cf   [5] WORD JOINER..INVISIBLE PLUS
+206A..206F    ; Common # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2070          ; Common # No       SUPERSCRIPT ZERO
+2074..2079    ; Common # No   [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
+207A..207C    ; Common # Sm   [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
+207D          ; Common # Ps       SUPERSCRIPT LEFT PARENTHESIS
+207E          ; Common # Pe       SUPERSCRIPT RIGHT PARENTHESIS
+2080..2089    ; Common # No  [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
+208A..208C    ; Common # Sm   [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
+208D          ; Common # Ps       SUBSCRIPT LEFT PARENTHESIS
+208E          ; Common # Pe       SUBSCRIPT RIGHT PARENTHESIS
+20A0..20B8    ; Common # Sc  [25] EURO-CURRENCY SIGN..TENGE SIGN
+2100..2101    ; Common # So   [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
+2102          ; Common # L&       DOUBLE-STRUCK CAPITAL C
+2103..2106    ; Common # So   [4] DEGREE CELSIUS..CADA UNA
+2107          ; Common # L&       EULER CONSTANT
+2108..2109    ; Common # So   [2] SCRUPLE..DEGREE FAHRENHEIT
+210A..2113    ; Common # L&  [10] SCRIPT SMALL G..SCRIPT SMALL L
+2114          ; Common # So       L B BAR SYMBOL
+2115          ; Common # L&       DOUBLE-STRUCK CAPITAL N
+2116..2118    ; Common # So   [3] NUMERO SIGN..SCRIPT CAPITAL P
+2119..211D    ; Common # L&   [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
+211E..2123    ; Common # So   [6] PRESCRIPTION TAKE..VERSICLE
+2124          ; Common # L&       DOUBLE-STRUCK CAPITAL Z
+2125          ; Common # So       OUNCE SIGN
+2127          ; Common # So       INVERTED OHM SIGN
+2128          ; Common # L&       BLACK-LETTER CAPITAL Z
+2129          ; Common # So       TURNED GREEK SMALL LETTER IOTA
+212C..212D    ; Common # L&   [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
+212E          ; Common # So       ESTIMATED SYMBOL
+212F..2131    ; Common # L&   [3] SCRIPT SMALL E..SCRIPT CAPITAL F
+2133..2134    ; Common # L&   [2] SCRIPT CAPITAL M..SCRIPT SMALL O
+2135..2138    ; Common # Lo   [4] ALEF SYMBOL..DALET SYMBOL
+2139          ; Common # L&       INFORMATION SOURCE
+213A..213B    ; Common # So   [2] ROTATED CAPITAL Q..FACSIMILE SIGN
+213C..213F    ; Common # L&   [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
+2140..2144    ; Common # Sm   [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
+2145..2149    ; Common # L&   [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
+214A          ; Common # So       PROPERTY LINE
+214B          ; Common # Sm       TURNED AMPERSAND
+214C..214D    ; Common # So   [2] PER SIGN..AKTIESELSKAB
+214F          ; Common # So       SYMBOL FOR SAMARITAN SOURCE
+2150..215F    ; Common # No  [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
+2189          ; Common # No       VULGAR FRACTION ZERO THIRDS
+2190..2194    ; Common # Sm   [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
+2195..2199    ; Common # So   [5] UP DOWN ARROW..SOUTH WEST ARROW
+219A..219B    ; Common # Sm   [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
+219C..219F    ; Common # So   [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
+21A0          ; Common # Sm       RIGHTWARDS TWO HEADED ARROW
+21A1..21A2    ; Common # So   [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
+21A3          ; Common # Sm       RIGHTWARDS ARROW WITH TAIL
+21A4..21A5    ; Common # So   [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
+21A6          ; Common # Sm       RIGHTWARDS ARROW FROM BAR
+21A7..21AD    ; Common # So   [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
+21AE          ; Common # Sm       LEFT RIGHT ARROW WITH STROKE
+21AF..21CD    ; Common # So  [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
+21CE..21CF    ; Common # Sm   [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
+21D0..21D1    ; Common # So   [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
+21D2          ; Common # Sm       RIGHTWARDS DOUBLE ARROW
+21D3          ; Common # So       DOWNWARDS DOUBLE ARROW
+21D4          ; Common # Sm       LEFT RIGHT DOUBLE ARROW
+21D5..21F3    ; Common # So  [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
+21F4..22FF    ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
+2300..2307    ; Common # So   [8] DIAMETER SIGN..WAVY LINE
+2308..230B    ; Common # Sm   [4] LEFT CEILING..RIGHT FLOOR
+230C..231F    ; Common # So  [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
+2320..2321    ; Common # Sm   [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
+2322..2328    ; Common # So   [7] FROWN..KEYBOARD
+2329          ; Common # Ps       LEFT-POINTING ANGLE BRACKET
+232A          ; Common # Pe       RIGHT-POINTING ANGLE BRACKET
+232B..237B    ; Common # So  [81] ERASE TO THE LEFT..NOT CHECK MARK
+237C          ; Common # Sm       RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
+237D..239A    ; Common # So  [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
+239B..23B3    ; Common # Sm  [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
+23B4..23DB    ; Common # So  [40] TOP SQUARE BRACKET..FUSE
+23DC..23E1    ; Common # Sm   [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
+23E2..23E8    ; Common # So   [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
+2400..2426    ; Common # So  [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+2440..244A    ; Common # So  [11] OCR HOOK..OCR DOUBLE BACKSLASH
+2460..249B    ; Common # No  [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
+249C..24E9    ; Common # So  [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
+24EA..24FF    ; Common # No  [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
+2500..25B6    ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
+25B7          ; Common # Sm       WHITE RIGHT-POINTING TRIANGLE
+25B8..25C0    ; Common # So   [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
+25C1          ; Common # Sm       WHITE LEFT-POINTING TRIANGLE
+25C2..25F7    ; Common # So  [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
+25F8..25FF    ; Common # Sm   [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
+2600..266E    ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
+266F          ; Common # Sm       MUSIC SHARP SIGN
+2670..26CD    ; Common # So  [94] WEST SYRIAC CROSS..DISABLED CAR
+26CF..26E1    ; Common # So  [19] PICK..RESTRICTED LEFT ENTRY-2
+26E3          ; Common # So       HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
+26E8..26FF    ; Common # So  [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
+2701..2704    ; Common # So   [4] UPPER BLADE SCISSORS..WHITE SCISSORS
+2706..2709    ; Common # So   [4] TELEPHONE LOCATION SIGN..ENVELOPE
+270C..2727    ; Common # So  [28] VICTORY HAND..WHITE FOUR POINTED STAR
+2729..274B    ; Common # So  [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
+274D          ; Common # So       SHADOWED WHITE CIRCLE
+274F..2752    ; Common # So   [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
+2756..275E    ; Common # So   [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
+2761..2767    ; Common # So   [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
+2768          ; Common # Ps       MEDIUM LEFT PARENTHESIS ORNAMENT
+2769          ; Common # Pe       MEDIUM RIGHT PARENTHESIS ORNAMENT
+276A          ; Common # Ps       MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+276B          ; Common # Pe       MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+276C          ; Common # Ps       MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+276D          ; Common # Pe       MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+276E          ; Common # Ps       HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+276F          ; Common # Pe       HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2770          ; Common # Ps       HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+2771          ; Common # Pe       HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+2772          ; Common # Ps       LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773          ; Common # Pe       LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+2774          ; Common # Ps       MEDIUM LEFT CURLY BRACKET ORNAMENT
+2775          ; Common # Pe       MEDIUM RIGHT CURLY BRACKET ORNAMENT
+2776..2793    ; Common # No  [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
+2794          ; Common # So       HEAVY WIDE-HEADED RIGHTWARDS ARROW
+2798..27AF    ; Common # So  [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
+27B1..27BE    ; Common # So  [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
+27C0..27C4    ; Common # Sm   [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
+27C5          ; Common # Ps       LEFT S-SHAPED BAG DELIMITER
+27C6          ; Common # Pe       RIGHT S-SHAPED BAG DELIMITER
+27C7..27CA    ; Common # Sm   [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
+27CC          ; Common # Sm       LONG DIVISION
+27D0..27E5    ; Common # Sm  [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
+27E6          ; Common # Ps       MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E7          ; Common # Pe       MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E8          ; Common # Ps       MATHEMATICAL LEFT ANGLE BRACKET
+27E9          ; Common # Pe       MATHEMATICAL RIGHT ANGLE BRACKET
+27EA          ; Common # Ps       MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EB          ; Common # Pe       MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27EC          ; Common # Ps       MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27ED          ; Common # Pe       MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EE          ; Common # Ps       MATHEMATICAL LEFT FLATTENED PARENTHESIS
+27EF          ; Common # Pe       MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+27F0..27FF    ; Common # Sm  [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
+2900..2982    ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
+2983          ; Common # Ps       LEFT WHITE CURLY BRACKET
+2984          ; Common # Pe       RIGHT WHITE CURLY BRACKET
+2985          ; Common # Ps       LEFT WHITE PARENTHESIS
+2986          ; Common # Pe       RIGHT WHITE PARENTHESIS
+2987          ; Common # Ps       Z NOTATION LEFT IMAGE BRACKET
+2988          ; Common # Pe       Z NOTATION RIGHT IMAGE BRACKET
+2989          ; Common # Ps       Z NOTATION LEFT BINDING BRACKET
+298A          ; Common # Pe       Z NOTATION RIGHT BINDING BRACKET
+298B          ; Common # Ps       LEFT SQUARE BRACKET WITH UNDERBAR
+298C          ; Common # Pe       RIGHT SQUARE BRACKET WITH UNDERBAR
+298D          ; Common # Ps       LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298E          ; Common # Pe       RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+298F          ; Common # Ps       LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990          ; Common # Pe       RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2991          ; Common # Ps       LEFT ANGLE BRACKET WITH DOT
+2992          ; Common # Pe       RIGHT ANGLE BRACKET WITH DOT
+2993          ; Common # Ps       LEFT ARC LESS-THAN BRACKET
+2994          ; Common # Pe       RIGHT ARC GREATER-THAN BRACKET
+2995          ; Common # Ps       DOUBLE LEFT ARC GREATER-THAN BRACKET
+2996          ; Common # Pe       DOUBLE RIGHT ARC LESS-THAN BRACKET
+2997          ; Common # Ps       LEFT BLACK TORTOISE SHELL BRACKET
+2998          ; Common # Pe       RIGHT BLACK TORTOISE SHELL BRACKET
+2999..29D7    ; Common # Sm  [63] DOTTED FENCE..BLACK HOURGLASS
+29D8          ; Common # Ps       LEFT WIGGLY FENCE
+29D9          ; Common # Pe       RIGHT WIGGLY FENCE
+29DA          ; Common # Ps       LEFT DOUBLE WIGGLY FENCE
+29DB          ; Common # Pe       RIGHT DOUBLE WIGGLY FENCE
+29DC..29FB    ; Common # Sm  [32] INCOMPLETE INFINITY..TRIPLE PLUS
+29FC          ; Common # Ps       LEFT-POINTING CURVED ANGLE BRACKET
+29FD          ; Common # Pe       RIGHT-POINTING CURVED ANGLE BRACKET
+29FE..2AFF    ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
+2B00..2B2F    ; Common # So  [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
+2B30..2B44    ; Common # Sm  [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
+2B45..2B46    ; Common # So   [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
+2B47..2B4C    ; Common # Sm   [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
+2B50..2B59    ; Common # So  [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
+2E00..2E01    ; Common # Po   [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
+2E02          ; Common # Pi       LEFT SUBSTITUTION BRACKET
+2E03          ; Common # Pf       RIGHT SUBSTITUTION BRACKET
+2E04          ; Common # Pi       LEFT DOTTED SUBSTITUTION BRACKET
+2E05          ; Common # Pf       RIGHT DOTTED SUBSTITUTION BRACKET
+2E06..2E08    ; Common # Po   [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
+2E09          ; Common # Pi       LEFT TRANSPOSITION BRACKET
+2E0A          ; Common # Pf       RIGHT TRANSPOSITION BRACKET
+2E0B          ; Common # Po       RAISED SQUARE
+2E0C          ; Common # Pi       LEFT RAISED OMISSION BRACKET
+2E0D          ; Common # Pf       RIGHT RAISED OMISSION BRACKET
+2E0E..2E16    ; Common # Po   [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
+2E17          ; Common # Pd       DOUBLE OBLIQUE HYPHEN
+2E18..2E19    ; Common # Po   [2] INVERTED INTERROBANG..PALM BRANCH
+2E1A          ; Common # Pd       HYPHEN WITH DIAERESIS
+2E1B          ; Common # Po       TILDE WITH RING ABOVE
+2E1C          ; Common # Pi       LEFT LOW PARAPHRASE BRACKET
+2E1D          ; Common # Pf       RIGHT LOW PARAPHRASE BRACKET
+2E1E..2E1F    ; Common # Po   [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
+2E20          ; Common # Pi       LEFT VERTICAL BAR WITH QUILL
+2E21          ; Common # Pf       RIGHT VERTICAL BAR WITH QUILL
+2E22          ; Common # Ps       TOP LEFT HALF BRACKET
+2E23          ; Common # Pe       TOP RIGHT HALF BRACKET
+2E24          ; Common # Ps       BOTTOM LEFT HALF BRACKET
+2E25          ; Common # Pe       BOTTOM RIGHT HALF BRACKET
+2E26          ; Common # Ps       LEFT SIDEWAYS U BRACKET
+2E27          ; Common # Pe       RIGHT SIDEWAYS U BRACKET
+2E28          ; Common # Ps       LEFT DOUBLE PARENTHESIS
+2E29          ; Common # Pe       RIGHT DOUBLE PARENTHESIS
+2E2A..2E2E    ; Common # Po   [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
+2E2F          ; Common # Lm       VERTICAL TILDE
+2E30..2E31    ; Common # Po   [2] RING POINT..WORD SEPARATOR MIDDLE DOT
+2FF0..2FFB    ; Common # So  [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+3000          ; Common # Zs       IDEOGRAPHIC SPACE
+3001..3003    ; Common # Po   [3] IDEOGRAPHIC COMMA..DITTO MARK
+3004          ; Common # So       JAPANESE INDUSTRIAL STANDARD SYMBOL
+3006          ; Common # Lo       IDEOGRAPHIC CLOSING MARK
+3008          ; Common # Ps       LEFT ANGLE BRACKET
+3009          ; Common # Pe       RIGHT ANGLE BRACKET
+300A          ; Common # Ps       LEFT DOUBLE ANGLE BRACKET
+300B          ; Common # Pe       RIGHT DOUBLE ANGLE BRACKET
+300C          ; Common # Ps       LEFT CORNER BRACKET
+300D          ; Common # Pe       RIGHT CORNER BRACKET
+300E          ; Common # Ps       LEFT WHITE CORNER BRACKET
+300F          ; Common # Pe       RIGHT WHITE CORNER BRACKET
+3010          ; Common # Ps       LEFT BLACK LENTICULAR BRACKET
+3011          ; Common # Pe       RIGHT BLACK LENTICULAR BRACKET
+3012..3013    ; Common # So   [2] POSTAL MARK..GETA MARK
+3014          ; Common # Ps       LEFT TORTOISE SHELL BRACKET
+3015          ; Common # Pe       RIGHT TORTOISE SHELL BRACKET
+3016          ; Common # Ps       LEFT WHITE LENTICULAR BRACKET
+3017          ; Common # Pe       RIGHT WHITE LENTICULAR BRACKET
+3018          ; Common # Ps       LEFT WHITE TORTOISE SHELL BRACKET
+3019          ; Common # Pe       RIGHT WHITE TORTOISE SHELL BRACKET
+301A          ; Common # Ps       LEFT WHITE SQUARE BRACKET
+301B          ; Common # Pe       RIGHT WHITE SQUARE BRACKET
+301C          ; Common # Pd       WAVE DASH
+301D          ; Common # Ps       REVERSED DOUBLE PRIME QUOTATION MARK
+301E..301F    ; Common # Pe   [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+3020          ; Common # So       POSTAL MARK FACE
+3030          ; Common # Pd       WAVY DASH
+3031..3035    ; Common # Lm   [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+3036..3037    ; Common # So   [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
+303C          ; Common # Lo       MASU MARK
+303D          ; Common # Po       PART ALTERNATION MARK
+303E..303F    ; Common # So   [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
+309B..309C    ; Common # Sk   [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+30A0          ; Common # Pd       KATAKANA-HIRAGANA DOUBLE HYPHEN
+30FB          ; Common # Po       KATAKANA MIDDLE DOT
+30FC          ; Common # Lm       KATAKANA-HIRAGANA PROLONGED SOUND MARK
+3190..3191    ; Common # So   [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
+3192..3195    ; Common # No   [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+3196..319F    ; Common # So  [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+31C0..31E3    ; Common # So  [36] CJK STROKE T..CJK STROKE Q
+3220..3229    ; Common # No  [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+322A..3250    ; Common # So  [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN
+3251..325F    ; Common # No  [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
+327F          ; Common # So       KOREAN STANDARD SYMBOL
+3280..3289    ; Common # No  [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+328A..32B0    ; Common # So  [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+32B1..32BF    ; Common # No  [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
+32C0..32CF    ; Common # So  [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN
+3358..33FF    ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL
+4DC0..4DFF    ; Common # So  [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
+A700..A716    ; Common # Sk  [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
+A717..A71F    ; Common # Lm   [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+A720..A721    ; Common # Sk   [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
+A788          ; Common # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A789..A78A    ; Common # Sk   [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
+A830..A835    ; Common # No   [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
+A836..A837    ; Common # So   [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
+A838          ; Common # Sc       NORTH INDIC RUPEE MARK
+A839          ; Common # So       NORTH INDIC QUANTITY MARK
+FD3E          ; Common # Ps       ORNATE LEFT PARENTHESIS
+FD3F          ; Common # Pe       ORNATE RIGHT PARENTHESIS
+FDFD          ; Common # So       ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+FE10..FE16    ; Common # Po   [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
+FE17          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
+FE18          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
+FE19          ; Common # Po       PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+FE30          ; Common # Po       PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
+FE31..FE32    ; Common # Pd   [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
+FE33..FE34    ; Common # Pc   [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
+FE35          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+FE36          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+FE37          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+FE38          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+FE39          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
+FE3A          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
+FE3B          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
+FE3C          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
+FE3D          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
+FE3E          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
+FE3F          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
+FE40          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
+FE41          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
+FE42          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
+FE43          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
+FE44          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
+FE45..FE46    ; Common # Po   [2] SESAME DOT..WHITE SESAME DOT
+FE47          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
+FE48          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
+FE49..FE4C    ; Common # Po   [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
+FE4D..FE4F    ; Common # Pc   [3] DASHED LOW LINE..WAVY LOW LINE
+FE50..FE52    ; Common # Po   [3] SMALL COMMA..SMALL FULL STOP
+FE54..FE57    ; Common # Po   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
+FE58          ; Common # Pd       SMALL EM DASH
+FE59          ; Common # Ps       SMALL LEFT PARENTHESIS
+FE5A          ; Common # Pe       SMALL RIGHT PARENTHESIS
+FE5B          ; Common # Ps       SMALL LEFT CURLY BRACKET
+FE5C          ; Common # Pe       SMALL RIGHT CURLY BRACKET
+FE5D          ; Common # Ps       SMALL LEFT TORTOISE SHELL BRACKET
+FE5E          ; Common # Pe       SMALL RIGHT TORTOISE SHELL BRACKET
+FE5F..FE61    ; Common # Po   [3] SMALL NUMBER SIGN..SMALL ASTERISK
+FE62          ; Common # Sm       SMALL PLUS SIGN
+FE63          ; Common # Pd       SMALL HYPHEN-MINUS
+FE64..FE66    ; Common # Sm   [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
+FE68          ; Common # Po       SMALL REVERSE SOLIDUS
+FE69          ; Common # Sc       SMALL DOLLAR SIGN
+FE6A..FE6B    ; Common # Po   [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
+FEFF          ; Common # Cf       ZERO WIDTH NO-BREAK SPACE
+FF01..FF03    ; Common # Po   [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
+FF04          ; Common # Sc       FULLWIDTH DOLLAR SIGN
+FF05..FF07    ; Common # Po   [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
+FF08          ; Common # Ps       FULLWIDTH LEFT PARENTHESIS
+FF09          ; Common # Pe       FULLWIDTH RIGHT PARENTHESIS
+FF0A          ; Common # Po       FULLWIDTH ASTERISK
+FF0B          ; Common # Sm       FULLWIDTH PLUS SIGN
+FF0C          ; Common # Po       FULLWIDTH COMMA
+FF0D          ; Common # Pd       FULLWIDTH HYPHEN-MINUS
+FF0E..FF0F    ; Common # Po   [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
+FF10..FF19    ; Common # Nd  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
+FF1A..FF1B    ; Common # Po   [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
+FF1C..FF1E    ; Common # Sm   [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
+FF1F..FF20    ; Common # Po   [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
+FF3B          ; Common # Ps       FULLWIDTH LEFT SQUARE BRACKET
+FF3C          ; Common # Po       FULLWIDTH REVERSE SOLIDUS
+FF3D          ; Common # Pe       FULLWIDTH RIGHT SQUARE BRACKET
+FF3E          ; Common # Sk       FULLWIDTH CIRCUMFLEX ACCENT
+FF3F          ; Common # Pc       FULLWIDTH LOW LINE
+FF40          ; Common # Sk       FULLWIDTH GRAVE ACCENT
+FF5B          ; Common # Ps       FULLWIDTH LEFT CURLY BRACKET
+FF5C          ; Common # Sm       FULLWIDTH VERTICAL LINE
+FF5D          ; Common # Pe       FULLWIDTH RIGHT CURLY BRACKET
+FF5E          ; Common # Sm       FULLWIDTH TILDE
+FF5F          ; Common # Ps       FULLWIDTH LEFT WHITE PARENTHESIS
+FF60          ; Common # Pe       FULLWIDTH RIGHT WHITE PARENTHESIS
+FF61          ; Common # Po       HALFWIDTH IDEOGRAPHIC FULL STOP
+FF62          ; Common # Ps       HALFWIDTH LEFT CORNER BRACKET
+FF63          ; Common # Pe       HALFWIDTH RIGHT CORNER BRACKET
+FF64..FF65    ; Common # Po   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+FF70          ; Common # Lm       HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF9E..FF9F    ; Common # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+FFE0..FFE1    ; Common # Sc   [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
+FFE2          ; Common # Sm       FULLWIDTH NOT SIGN
+FFE3          ; Common # Sk       FULLWIDTH MACRON
+FFE4          ; Common # So       FULLWIDTH BROKEN BAR
+FFE5..FFE6    ; Common # Sc   [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
+FFE8          ; Common # So       HALFWIDTH FORMS LIGHT VERTICAL
+FFE9..FFEC    ; Common # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+FFED..FFEE    ; Common # So   [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
+FFF9..FFFB    ; Common # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
+FFFC..FFFD    ; Common # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
+10100..10101  ; Common # Po   [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
+10102         ; Common # So       AEGEAN CHECK MARK
+10107..10133  ; Common # No  [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+10137..1013F  ; Common # So   [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
+10190..1019B  ; Common # So  [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
+101D0..101FC  ; Common # So  [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
+1D000..1D0F5  ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
+1D100..1D126  ; Common # So  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
+1D129..1D164  ; Common # So  [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+1D165..1D166  ; Common # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
+1D16A..1D16C  ; Common # So   [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
+1D16D..1D172  ; Common # Mc   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
+1D173..1D17A  ; Common # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+1D183..1D184  ; Common # So   [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
+1D18C..1D1A9  ; Common # So  [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
+1D1AE..1D1DD  ; Common # So  [48] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL PES SUBPUNCTIS
+1D300..1D356  ; Common # So  [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
+1D360..1D371  ; Common # No  [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
+1D400..1D454  ; Common # L&  [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
+1D456..1D49C  ; Common # L&  [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
+1D49E..1D49F  ; Common # L&   [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+1D4A2         ; Common # L&       MATHEMATICAL SCRIPT CAPITAL G
+1D4A5..1D4A6  ; Common # L&   [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+1D4A9..1D4AC  ; Common # L&   [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+1D4AE..1D4B9  ; Common # L&  [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
+1D4BB         ; Common # L&       MATHEMATICAL SCRIPT SMALL F
+1D4BD..1D4C3  ; Common # L&   [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+1D4C5..1D505  ; Common # L&  [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
+1D507..1D50A  ; Common # L&   [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+1D50D..1D514  ; Common # L&   [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+1D516..1D51C  ; Common # L&   [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+1D51E..1D539  ; Common # L&  [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+1D53B..1D53E  ; Common # L&   [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+1D540..1D544  ; Common # L&   [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+1D546         ; Common # L&       MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+1D54A..1D550  ; Common # L&   [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+1D552..1D6A5  ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+1D6A8..1D6C0  ; Common # L&  [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
+1D6C1         ; Common # Sm       MATHEMATICAL BOLD NABLA
+1D6C2..1D6DA  ; Common # L&  [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
+1D6DB         ; Common # Sm       MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+1D6DC..1D6FA  ; Common # L&  [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
+1D6FB         ; Common # Sm       MATHEMATICAL ITALIC NABLA
+1D6FC..1D714  ; Common # L&  [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
+1D715         ; Common # Sm       MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+1D716..1D734  ; Common # L&  [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
+1D735         ; Common # Sm       MATHEMATICAL BOLD ITALIC NABLA
+1D736..1D74E  ; Common # L&  [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
+1D74F         ; Common # Sm       MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+1D750..1D76E  ; Common # L&  [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
+1D76F         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD NABLA
+1D770..1D788  ; Common # L&  [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
+1D789         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+1D78A..1D7A8  ; Common # L&  [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
+1D7A9         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
+1D7AA..1D7C2  ; Common # L&  [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
+1D7C3         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+1D7C4..1D7CB  ; Common # L&   [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
+1D7CE..1D7FF  ; Common # Nd  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1F000..1F02B  ; Common # So  [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
+1F030..1F093  ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
+1F100..1F10A  ; Common # No  [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
+1F110..1F12E  ; Common # So  [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
+1F131         ; Common # So       SQUARED LATIN CAPITAL LETTER B
+1F13D         ; Common # So       SQUARED LATIN CAPITAL LETTER N
+1F13F         ; Common # So       SQUARED LATIN CAPITAL LETTER P
+1F142         ; Common # So       SQUARED LATIN CAPITAL LETTER S
+1F146         ; Common # So       SQUARED LATIN CAPITAL LETTER W
+1F14A..1F14E  ; Common # So   [5] SQUARED HV..SQUARED PPV
+1F157         ; Common # So       NEGATIVE CIRCLED LATIN CAPITAL LETTER H
+1F15F         ; Common # So       NEGATIVE CIRCLED LATIN CAPITAL LETTER P
+1F179         ; Common # So       NEGATIVE SQUARED LATIN CAPITAL LETTER J
+1F17B..1F17C  ; Common # So   [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
+1F17F         ; Common # So       NEGATIVE SQUARED LATIN CAPITAL LETTER P
+1F18A..1F18D  ; Common # So   [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
+1F190         ; Common # So       SQUARE DJ
+1F210..1F231  ; Common # So  [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
+1F240..1F248  ; Common # So   [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+E0001         ; Common # Cf       LANGUAGE TAG
+E0020..E007F  ; Common # Cf  [96] TAG SPACE..CANCEL TAG
+
+# Total code points: 5395
+
+# ================================================
+
+0041..005A    ; Latin # L&  [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+0061..007A    ; Latin # L&  [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+00AA          ; Latin # L&       FEMININE ORDINAL INDICATOR
+00BA          ; Latin # L&       MASCULINE ORDINAL INDICATOR
+00C0..00D6    ; Latin # L&  [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8..00F6    ; Latin # L&  [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
+00F8..01BA    ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
+01BB          ; Latin # Lo       LATIN LETTER TWO WITH STROKE
+01BC..01BF    ; Latin # L&   [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
+01C0..01C3    ; Latin # Lo   [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
+01C4..0293    ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
+0294          ; Latin # Lo       LATIN LETTER GLOTTAL STOP
+0295..02AF    ; Latin # L&  [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
+02B0..02B8    ; Latin # Lm   [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
+02E0..02E4    ; Latin # Lm   [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+1D00..1D25    ; Latin # L&  [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN
+1D2C..1D5C    ; Latin # Lm  [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN
+1D62..1D65    ; Latin # L&   [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V
+1D6B..1D77    ; Latin # L&  [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
+1D79..1D9A    ; Latin # L&  [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
+1D9B..1DBE    ; Latin # Lm  [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH
+1E00..1EFF    ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
+2071          ; Latin # Lm       SUPERSCRIPT LATIN SMALL LETTER I
+207F          ; Latin # Lm       SUPERSCRIPT LATIN SMALL LETTER N
+2090..2094    ; Latin # Lm   [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
+212A..212B    ; Latin # L&   [2] KELVIN SIGN..ANGSTROM SIGN
+2132          ; Latin # L&       TURNED CAPITAL F
+214E          ; Latin # L&       TURNED SMALL F
+2160..2182    ; Latin # Nl  [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
+2183..2184    ; Latin # L&   [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
+2185..2188    ; Latin # Nl   [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
+2C60..2C7C    ; Latin # L&  [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
+2C7D          ; Latin # Lm       MODIFIER LETTER CAPITAL V
+2C7E..2C7F    ; Latin # L&   [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
+A722..A76F    ; Latin # L&  [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
+A770          ; Latin # Lm       MODIFIER LETTER US
+A771..A787    ; Latin # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
+A78B..A78C    ; Latin # L&   [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
+A7FB..A7FF    ; Latin # Lo   [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
+FB00..FB06    ; Latin # L&   [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
+FF21..FF3A    ; Latin # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
+FF41..FF5A    ; Latin # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+
+# Total code points: 1244
+
+# ================================================
+
+0370..0373    ; Greek # L&   [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
+0375          ; Greek # Sk       GREEK LOWER NUMERAL SIGN
+0376..0377    ; Greek # L&   [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
+037A          ; Greek # Lm       GREEK YPOGEGRAMMENI
+037B..037D    ; Greek # L&   [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0384          ; Greek # Sk       GREEK TONOS
+0386          ; Greek # L&       GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388..038A    ; Greek # L&   [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
+038C          ; Greek # L&       GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E..03A1    ; Greek # L&  [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
+03A3..03E1    ; Greek # L&  [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI
+03F0..03F5    ; Greek # L&   [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL
+03F6          ; Greek # Sm       GREEK REVERSED LUNATE EPSILON SYMBOL
+03F7..03FF    ; Greek # L&   [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+1D26..1D2A    ; Greek # L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
+1D5D..1D61    ; Greek # Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
+1D66..1D6A    ; Greek # L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
+1DBF          ; Greek # Lm       MODIFIER LETTER SMALL THETA
+1F00..1F15    ; Greek # L&  [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
+1F18..1F1D    ; Greek # L&   [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F20..1F45    ; Greek # L&  [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
+1F48..1F4D    ; Greek # L&   [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50..1F57    ; Greek # L&   [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F59          ; Greek # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B          ; Greek # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D          ; Greek # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F..1F7D    ; Greek # L&  [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
+1F80..1FB4    ; Greek # L&  [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6..1FBC    ; Greek # L&   [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBD          ; Greek # Sk       GREEK KORONIS
+1FBE          ; Greek # L&       GREEK PROSGEGRAMMENI
+1FBF..1FC1    ; Greek # Sk   [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
+1FC2..1FC4    ; Greek # L&   [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6..1FCC    ; Greek # L&   [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCD..1FCF    ; Greek # Sk   [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
+1FD0..1FD3    ; Greek # L&   [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6..1FDB    ; Greek # L&   [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
+1FDD..1FDF    ; Greek # Sk   [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+1FE0..1FEC    ; Greek # L&  [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
+1FED..1FEF    ; Greek # Sk   [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
+1FF2..1FF4    ; Greek # L&   [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6..1FFC    ; Greek # L&   [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFD..1FFE    ; Greek # Sk   [2] GREEK OXIA..GREEK DASIA
+2126          ; Greek # L&       OHM SIGN
+10140..10174  ; Greek # Nl  [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
+10175..10178  ; Greek # No   [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
+10179..10189  ; Greek # So  [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
+1018A         ; Greek # No       GREEK ZERO SIGN
+1D200..1D241  ; Greek # So  [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
+1D242..1D244  ; Greek # Mn   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+1D245         ; Greek # So       GREEK MUSICAL LEIMMA
+
+# Total code points: 511
+
+# ================================================
+
+0400..0481    ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA
+0482          ; Cyrillic # So       CYRILLIC THOUSANDS SIGN
+0483..0484    ; Cyrillic # Mn   [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
+0487          ; Cyrillic # Mn       COMBINING CYRILLIC POKRYTIE
+0488..0489    ; Cyrillic # Me   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+048A..0525    ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
+1D2B          ; Cyrillic # L&       CYRILLIC LETTER SMALL CAPITAL EL
+1D78          ; Cyrillic # Lm       MODIFIER LETTER CYRILLIC EN
+2DE0..2DFF    ; Cyrillic # Mn  [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+A640..A65F    ; Cyrillic # L&  [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
+A662..A66D    ; Cyrillic # L&  [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
+A66E          ; Cyrillic # Lo       CYRILLIC LETTER MULTIOCULAR O
+A66F          ; Cyrillic # Mn       COMBINING CYRILLIC VZMET
+A670..A672    ; Cyrillic # Me   [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
+A673          ; Cyrillic # Po       SLAVONIC ASTERISK
+A67C..A67D    ; Cyrillic # Mn   [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
+A67E          ; Cyrillic # Po       CYRILLIC KAVYKA
+A67F          ; Cyrillic # Lm       CYRILLIC PAYEROK
+A680..A697    ; Cyrillic # L&  [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
+
+# Total code points: 404
+
+# ================================================
+
+0531..0556    ; Armenian # L&  [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
+0559          ; Armenian # Lm       ARMENIAN MODIFIER LETTER LEFT HALF RING
+055A..055F    ; Armenian # Po   [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
+0561..0587    ; Armenian # L&  [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
+058A          ; Armenian # Pd       ARMENIAN HYPHEN
+FB13..FB17    ; Armenian # L&   [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
+
+# Total code points: 90
+
+# ================================================
+
+0591..05BD    ; Hebrew # Mn  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+05BE          ; Hebrew # Pd       HEBREW PUNCTUATION MAQAF
+05BF          ; Hebrew # Mn       HEBREW POINT RAFE
+05C0          ; Hebrew # Po       HEBREW PUNCTUATION PASEQ
+05C1..05C2    ; Hebrew # Mn   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C3          ; Hebrew # Po       HEBREW PUNCTUATION SOF PASUQ
+05C4..05C5    ; Hebrew # Mn   [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C6          ; Hebrew # Po       HEBREW PUNCTUATION NUN HAFUKHA
+05C7          ; Hebrew # Mn       HEBREW POINT QAMATS QATAN
+05D0..05EA    ; Hebrew # Lo  [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
+05F0..05F2    ; Hebrew # Lo   [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
+05F3..05F4    ; Hebrew # Po   [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
+FB1D          ; Hebrew # Lo       HEBREW LETTER YOD WITH HIRIQ
+FB1E          ; Hebrew # Mn       HEBREW POINT JUDEO-SPANISH VARIKA
+FB1F..FB28    ; Hebrew # Lo  [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
+FB29          ; Hebrew # Sm       HEBREW LETTER ALTERNATIVE PLUS SIGN
+FB2A..FB36    ; Hebrew # Lo  [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
+FB38..FB3C    ; Hebrew # Lo   [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+FB3E          ; Hebrew # Lo       HEBREW LETTER MEM WITH DAGESH
+FB40..FB41    ; Hebrew # Lo   [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+FB43..FB44    ; Hebrew # Lo   [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+FB46..FB4F    ; Hebrew # Lo  [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
+
+# Total code points: 133
+
+# ================================================
+
+0606..0608    ; Arabic # Sm   [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
+0609..060A    ; Arabic # Po   [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
+060B          ; Arabic # Sc       AFGHANI SIGN
+060D          ; Arabic # Po       ARABIC DATE SEPARATOR
+060E..060F    ; Arabic # So   [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
+0610..061A    ; Arabic # Mn  [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+061E          ; Arabic # Po       ARABIC TRIPLE DOT PUNCTUATION MARK
+0621..063F    ; Arabic # Lo  [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
+0641..064A    ; Arabic # Lo  [10] ARABIC LETTER FEH..ARABIC LETTER YEH
+0656..065E    ; Arabic # Mn   [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
+066A..066D    ; Arabic # Po   [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
+066E..066F    ; Arabic # Lo   [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
+0671..06D3    ; Arabic # Lo  [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
+06D4          ; Arabic # Po       ARABIC FULL STOP
+06D5          ; Arabic # Lo       ARABIC LETTER AE
+06D6..06DC    ; Arabic # Mn   [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+06DE          ; Arabic # Me       ARABIC START OF RUB EL HIZB
+06DF..06E4    ; Arabic # Mn   [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+06E5..06E6    ; Arabic # Lm   [2] ARABIC SMALL WAW..ARABIC SMALL YEH
+06E7..06E8    ; Arabic # Mn   [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+06E9          ; Arabic # So       ARABIC PLACE OF SAJDAH
+06EA..06ED    ; Arabic # Mn   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+06EE..06EF    ; Arabic # Lo   [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
+06F0..06F9    ; Arabic # Nd  [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
+06FA..06FC    ; Arabic # Lo   [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
+06FD..06FE    ; Arabic # So   [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
+06FF          ; Arabic # Lo       ARABIC LETTER HEH WITH INVERTED V
+0750..077F    ; Arabic # Lo  [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
+FB50..FBB1    ; Arabic # Lo  [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+FBD3..FD3D    ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
+FD50..FD8F    ; Arabic # Lo  [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
+FD92..FDC7    ; Arabic # Lo  [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+FDF0..FDFB    ; Arabic # Lo  [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
+FDFC          ; Arabic # Sc       RIAL SIGN
+FE70..FE74    ; Arabic # Lo   [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
+FE76..FEFC    ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+10E60..10E7E  ; Arabic # No  [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
+
+# Total code points: 1030
+
+# ================================================
+
+0700..070D    ; Syriac # Po  [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
+070F          ; Syriac # Cf       SYRIAC ABBREVIATION MARK
+0710          ; Syriac # Lo       SYRIAC LETTER ALAPH
+0711          ; Syriac # Mn       SYRIAC LETTER SUPERSCRIPT ALAPH
+0712..072F    ; Syriac # Lo  [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
+0730..074A    ; Syriac # Mn  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+074D..074F    ; Syriac # Lo   [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
+
+# Total code points: 77
+
+# ================================================
+
+0780..07A5    ; Thaana # Lo  [38] THAANA LETTER HAA..THAANA LETTER WAAVU
+07A6..07B0    ; Thaana # Mn  [11] THAANA ABAFILI..THAANA SUKUN
+07B1          ; Thaana # Lo       THAANA LETTER NAA
+
+# Total code points: 50
+
+# ================================================
+
+0900..0902    ; Devanagari # Mn   [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
+0903          ; Devanagari # Mc       DEVANAGARI SIGN VISARGA
+0904..0939    ; Devanagari # Lo  [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
+093C          ; Devanagari # Mn       DEVANAGARI SIGN NUKTA
+093D          ; Devanagari # Lo       DEVANAGARI SIGN AVAGRAHA
+093E..0940    ; Devanagari # Mc   [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
+0941..0948    ; Devanagari # Mn   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+0949..094C    ; Devanagari # Mc   [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
+094D          ; Devanagari # Mn       DEVANAGARI SIGN VIRAMA
+094E          ; Devanagari # Mc       DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
+0950          ; Devanagari # Lo       DEVANAGARI OM
+0953..0955    ; Devanagari # Mn   [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E
+0958..0961    ; Devanagari # Lo  [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
+0962..0963    ; Devanagari # Mn   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0966..096F    ; Devanagari # Nd  [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+0971          ; Devanagari # Lm       DEVANAGARI SIGN HIGH SPACING DOT
+0972          ; Devanagari # Lo       DEVANAGARI LETTER CANDRA A
+0979..097F    ; Devanagari # Lo   [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
+A8E0..A8F1    ; Devanagari # Mn  [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+A8F2..A8F7    ; Devanagari # Lo   [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
+A8F8..A8FA    ; Devanagari # Po   [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
+A8FB          ; Devanagari # Lo       DEVANAGARI HEADSTROKE
+
+# Total code points: 140
+
+# ================================================
+
+0981          ; Bengali # Mn       BENGALI SIGN CANDRABINDU
+0982..0983    ; Bengali # Mc   [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
+0985..098C    ; Bengali # Lo   [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
+098F..0990    ; Bengali # Lo   [2] BENGALI LETTER E..BENGALI LETTER AI
+0993..09A8    ; Bengali # Lo  [22] BENGALI LETTER O..BENGALI LETTER NA
+09AA..09B0    ; Bengali # Lo   [7] BENGALI LETTER PA..BENGALI LETTER RA
+09B2          ; Bengali # Lo       BENGALI LETTER LA
+09B6..09B9    ; Bengali # Lo   [4] BENGALI LETTER SHA..BENGALI LETTER HA
+09BC          ; Bengali # Mn       BENGALI SIGN NUKTA
+09BD          ; Bengali # Lo       BENGALI SIGN AVAGRAHA
+09BE..09C0    ; Bengali # Mc   [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
+09C1..09C4    ; Bengali # Mn   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09C7..09C8    ; Bengali # Mc   [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+09CB..09CC    ; Bengali # Mc   [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
+09CD          ; Bengali # Mn       BENGALI SIGN VIRAMA
+09CE          ; Bengali # Lo       BENGALI LETTER KHANDA TA
+09D7          ; Bengali # Mc       BENGALI AU LENGTH MARK
+09DC..09DD    ; Bengali # Lo   [2] BENGALI LETTER RRA..BENGALI LETTER RHA
+09DF..09E1    ; Bengali # Lo   [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
+09E2..09E3    ; Bengali # Mn   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+09E6..09EF    ; Bengali # Nd  [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
+09F0..09F1    ; Bengali # Lo   [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
+09F2..09F3    ; Bengali # Sc   [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
+09F4..09F9    ; Bengali # No   [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
+09FA          ; Bengali # So       BENGALI ISSHAR
+09FB          ; Bengali # Sc       BENGALI GANDA MARK
+
+# Total code points: 92
+
+# ================================================
+
+0A01..0A02    ; Gurmukhi # Mn   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+0A03          ; Gurmukhi # Mc       GURMUKHI SIGN VISARGA
+0A05..0A0A    ; Gurmukhi # Lo   [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
+0A0F..0A10    ; Gurmukhi # Lo   [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
+0A13..0A28    ; Gurmukhi # Lo  [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
+0A2A..0A30    ; Gurmukhi # Lo   [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
+0A32..0A33    ; Gurmukhi # Lo   [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
+0A35..0A36    ; Gurmukhi # Lo   [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
+0A38..0A39    ; Gurmukhi # Lo   [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
+0A3C          ; Gurmukhi # Mn       GURMUKHI SIGN NUKTA
+0A3E..0A40    ; Gurmukhi # Mc   [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
+0A41..0A42    ; Gurmukhi # Mn   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48    ; Gurmukhi # Mn   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4D    ; Gurmukhi # Mn   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A51          ; Gurmukhi # Mn       GURMUKHI SIGN UDAAT
+0A59..0A5C    ; Gurmukhi # Lo   [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
+0A5E          ; Gurmukhi # Lo       GURMUKHI LETTER FA
+0A66..0A6F    ; Gurmukhi # Nd  [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
+0A70..0A71    ; Gurmukhi # Mn   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A72..0A74    ; Gurmukhi # Lo   [3] GURMUKHI IRI..GURMUKHI EK ONKAR
+0A75          ; Gurmukhi # Mn       GURMUKHI SIGN YAKASH
+
+# Total code points: 79
+
+# ================================================
+
+0A81..0A82    ; Gujarati # Mn   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+0A83          ; Gujarati # Mc       GUJARATI SIGN VISARGA
+0A85..0A8D    ; Gujarati # Lo   [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
+0A8F..0A91    ; Gujarati # Lo   [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
+0A93..0AA8    ; Gujarati # Lo  [22] GUJARATI LETTER O..GUJARATI LETTER NA
+0AAA..0AB0    ; Gujarati # Lo   [7] GUJARATI LETTER PA..GUJARATI LETTER RA
+0AB2..0AB3    ; Gujarati # Lo   [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
+0AB5..0AB9    ; Gujarati # Lo   [5] GUJARATI LETTER VA..GUJARATI LETTER HA
+0ABC          ; Gujarati # Mn       GUJARATI SIGN NUKTA
+0ABD          ; Gujarati # Lo       GUJARATI SIGN AVAGRAHA
+0ABE..0AC0    ; Gujarati # Mc   [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
+0AC1..0AC5    ; Gujarati # Mn   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8    ; Gujarati # Mn   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0AC9          ; Gujarati # Mc       GUJARATI VOWEL SIGN CANDRA O
+0ACB..0ACC    ; Gujarati # Mc   [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
+0ACD          ; Gujarati # Mn       GUJARATI SIGN VIRAMA
+0AD0          ; Gujarati # Lo       GUJARATI OM
+0AE0..0AE1    ; Gujarati # Lo   [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
+0AE2..0AE3    ; Gujarati # Mn   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AE6..0AEF    ; Gujarati # Nd  [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+0AF1          ; Gujarati # Sc       GUJARATI RUPEE SIGN
+
+# Total code points: 83
+
+# ================================================
+
+0B01          ; Oriya # Mn       ORIYA SIGN CANDRABINDU
+0B02..0B03    ; Oriya # Mc   [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
+0B05..0B0C    ; Oriya # Lo   [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
+0B0F..0B10    ; Oriya # Lo   [2] ORIYA LETTER E..ORIYA LETTER AI
+0B13..0B28    ; Oriya # Lo  [22] ORIYA LETTER O..ORIYA LETTER NA
+0B2A..0B30    ; Oriya # Lo   [7] ORIYA LETTER PA..ORIYA LETTER RA
+0B32..0B33    ; Oriya # Lo   [2] ORIYA LETTER LA..ORIYA LETTER LLA
+0B35..0B39    ; Oriya # Lo   [5] ORIYA LETTER VA..ORIYA LETTER HA
+0B3C          ; Oriya # Mn       ORIYA SIGN NUKTA
+0B3D          ; Oriya # Lo       ORIYA SIGN AVAGRAHA
+0B3E          ; Oriya # Mc       ORIYA VOWEL SIGN AA
+0B3F          ; Oriya # Mn       ORIYA VOWEL SIGN I
+0B40          ; Oriya # Mc       ORIYA VOWEL SIGN II
+0B41..0B44    ; Oriya # Mn   [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+0B47..0B48    ; Oriya # Mc   [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+0B4B..0B4C    ; Oriya # Mc   [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
+0B4D          ; Oriya # Mn       ORIYA SIGN VIRAMA
+0B56          ; Oriya # Mn       ORIYA AI LENGTH MARK
+0B57          ; Oriya # Mc       ORIYA AU LENGTH MARK
+0B5C..0B5D    ; Oriya # Lo   [2] ORIYA LETTER RRA..ORIYA LETTER RHA
+0B5F..0B61    ; Oriya # Lo   [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
+0B62..0B63    ; Oriya # Mn   [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+0B66..0B6F    ; Oriya # Nd  [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
+0B70          ; Oriya # So       ORIYA ISSHAR
+0B71          ; Oriya # Lo       ORIYA LETTER WA
+
+# Total code points: 84
+
+# ================================================
+
+0B82          ; Tamil # Mn       TAMIL SIGN ANUSVARA
+0B83          ; Tamil # Lo       TAMIL SIGN VISARGA
+0B85..0B8A    ; Tamil # Lo   [6] TAMIL LETTER A..TAMIL LETTER UU
+0B8E..0B90    ; Tamil # Lo   [3] TAMIL LETTER E..TAMIL LETTER AI
+0B92..0B95    ; Tamil # Lo   [4] TAMIL LETTER O..TAMIL LETTER KA
+0B99..0B9A    ; Tamil # Lo   [2] TAMIL LETTER NGA..TAMIL LETTER CA
+0B9C          ; Tamil # Lo       TAMIL LETTER JA
+0B9E..0B9F    ; Tamil # Lo   [2] TAMIL LETTER NYA..TAMIL LETTER TTA
+0BA3..0BA4    ; Tamil # Lo   [2] TAMIL LETTER NNA..TAMIL LETTER TA
+0BA8..0BAA    ; Tamil # Lo   [3] TAMIL LETTER NA..TAMIL LETTER PA
+0BAE..0BB9    ; Tamil # Lo  [12] TAMIL LETTER MA..TAMIL LETTER HA
+0BBE..0BBF    ; Tamil # Mc   [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
+0BC0          ; Tamil # Mn       TAMIL VOWEL SIGN II
+0BC1..0BC2    ; Tamil # Mc   [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
+0BC6..0BC8    ; Tamil # Mc   [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+0BCA..0BCC    ; Tamil # Mc   [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
+0BCD          ; Tamil # Mn       TAMIL SIGN VIRAMA
+0BD0          ; Tamil # Lo       TAMIL OM
+0BD7          ; Tamil # Mc       TAMIL AU LENGTH MARK
+0BE6..0BEF    ; Tamil # Nd  [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+0BF0..0BF2    ; Tamil # No   [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
+0BF3..0BF8    ; Tamil # So   [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
+0BF9          ; Tamil # Sc       TAMIL RUPEE SIGN
+0BFA          ; Tamil # So       TAMIL NUMBER SIGN
+
+# Total code points: 72
+
+# ================================================
+
+0C01..0C03    ; Telugu # Mc   [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+0C05..0C0C    ; Telugu # Lo   [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
+0C0E..0C10    ; Telugu # Lo   [3] TELUGU LETTER E..TELUGU LETTER AI
+0C12..0C28    ; Telugu # Lo  [23] TELUGU LETTER O..TELUGU LETTER NA
+0C2A..0C33    ; Telugu # Lo  [10] TELUGU LETTER PA..TELUGU LETTER LLA
+0C35..0C39    ; Telugu # Lo   [5] TELUGU LETTER VA..TELUGU LETTER HA
+0C3D          ; Telugu # Lo       TELUGU SIGN AVAGRAHA
+0C3E..0C40    ; Telugu # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C41..0C44    ; Telugu # Mc   [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
+0C46..0C48    ; Telugu # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4D    ; Telugu # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+0C55..0C56    ; Telugu # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C58..0C59    ; Telugu # Lo   [2] TELUGU LETTER TSA..TELUGU LETTER DZA
+0C60..0C61    ; Telugu # Lo   [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
+0C62..0C63    ; Telugu # Mn   [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+0C66..0C6F    ; Telugu # Nd  [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
+0C78..0C7E    ; Telugu # No   [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
+0C7F          ; Telugu # So       TELUGU SIGN TUUMU
+
+# Total code points: 93
+
+# ================================================
+
+0C82..0C83    ; Kannada # Mc   [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+0C85..0C8C    ; Kannada # Lo   [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
+0C8E..0C90    ; Kannada # Lo   [3] KANNADA LETTER E..KANNADA LETTER AI
+0C92..0CA8    ; Kannada # Lo  [23] KANNADA LETTER O..KANNADA LETTER NA
+0CAA..0CB3    ; Kannada # Lo  [10] KANNADA LETTER PA..KANNADA LETTER LLA
+0CB5..0CB9    ; Kannada # Lo   [5] KANNADA LETTER VA..KANNADA LETTER HA
+0CBC          ; Kannada # Mn       KANNADA SIGN NUKTA
+0CBD          ; Kannada # Lo       KANNADA SIGN AVAGRAHA
+0CBE          ; Kannada # Mc       KANNADA VOWEL SIGN AA
+0CBF          ; Kannada # Mn       KANNADA VOWEL SIGN I
+0CC0..0CC4    ; Kannada # Mc   [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
+0CC6          ; Kannada # Mn       KANNADA VOWEL SIGN E
+0CC7..0CC8    ; Kannada # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB    ; Kannada # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+0CCC..0CCD    ; Kannada # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+0CD5..0CD6    ; Kannada # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0CDE          ; Kannada # Lo       KANNADA LETTER FA
+0CE0..0CE1    ; Kannada # Lo   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
+0CE2..0CE3    ; Kannada # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0CE6..0CEF    ; Kannada # Nd  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+
+# Total code points: 84
+
+# ================================================
+
+0D02..0D03    ; Malayalam # Mc   [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D05..0D0C    ; Malayalam # Lo   [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
+0D0E..0D10    ; Malayalam # Lo   [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
+0D12..0D28    ; Malayalam # Lo  [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
+0D2A..0D39    ; Malayalam # Lo  [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
+0D3D          ; Malayalam # Lo       MALAYALAM SIGN AVAGRAHA
+0D3E..0D40    ; Malayalam # Mc   [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
+0D41..0D44    ; Malayalam # Mn   [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+0D46..0D48    ; Malayalam # Mc   [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+0D4A..0D4C    ; Malayalam # Mc   [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
+0D4D          ; Malayalam # Mn       MALAYALAM SIGN VIRAMA
+0D57          ; Malayalam # Mc       MALAYALAM AU LENGTH MARK
+0D60..0D61    ; Malayalam # Lo   [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
+0D62..0D63    ; Malayalam # Mn   [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+0D66..0D6F    ; Malayalam # Nd  [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
+0D70..0D75    ; Malayalam # No   [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
+0D79          ; Malayalam # So       MALAYALAM DATE MARK
+0D7A..0D7F    ; Malayalam # Lo   [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
+
+# Total code points: 95
+
+# ================================================
+
+0D82..0D83    ; Sinhala # Mc   [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+0D85..0D96    ; Sinhala # Lo  [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
+0D9A..0DB1    ; Sinhala # Lo  [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
+0DB3..0DBB    ; Sinhala # Lo   [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
+0DBD          ; Sinhala # Lo       SINHALA LETTER DANTAJA LAYANNA
+0DC0..0DC6    ; Sinhala # Lo   [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
+0DCA          ; Sinhala # Mn       SINHALA SIGN AL-LAKUNA
+0DCF..0DD1    ; Sinhala # Mc   [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
+0DD2..0DD4    ; Sinhala # Mn   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6          ; Sinhala # Mn       SINHALA VOWEL SIGN DIGA PAA-PILLA
+0DD8..0DDF    ; Sinhala # Mc   [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
+0DF2..0DF3    ; Sinhala # Mc   [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+0DF4          ; Sinhala # Po       SINHALA PUNCTUATION KUNDDALIYA
+
+# Total code points: 80
+
+# ================================================
+
+0E01..0E30    ; Thai # Lo  [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A
+0E31          ; Thai # Mn       THAI CHARACTER MAI HAN-AKAT
+0E32..0E33    ; Thai # Lo   [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
+0E34..0E3A    ; Thai # Mn   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E40..0E45    ; Thai # Lo   [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
+0E46          ; Thai # Lm       THAI CHARACTER MAIYAMOK
+0E47..0E4E    ; Thai # Mn   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+0E4F          ; Thai # Po       THAI CHARACTER FONGMAN
+0E50..0E59    ; Thai # Nd  [10] THAI DIGIT ZERO..THAI DIGIT NINE
+0E5A..0E5B    ; Thai # Po   [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
+
+# Total code points: 86
+
+# ================================================
+
+0E81..0E82    ; Lao # Lo   [2] LAO LETTER KO..LAO LETTER KHO SUNG
+0E84          ; Lao # Lo       LAO LETTER KHO TAM
+0E87..0E88    ; Lao # Lo   [2] LAO LETTER NGO..LAO LETTER CO
+0E8A          ; Lao # Lo       LAO LETTER SO TAM
+0E8D          ; Lao # Lo       LAO LETTER NYO
+0E94..0E97    ; Lao # Lo   [4] LAO LETTER DO..LAO LETTER THO TAM
+0E99..0E9F    ; Lao # Lo   [7] LAO LETTER NO..LAO LETTER FO SUNG
+0EA1..0EA3    ; Lao # Lo   [3] LAO LETTER MO..LAO LETTER LO LING
+0EA5          ; Lao # Lo       LAO LETTER LO LOOT
+0EA7          ; Lao # Lo       LAO LETTER WO
+0EAA..0EAB    ; Lao # Lo   [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG
+0EAD..0EB0    ; Lao # Lo   [4] LAO LETTER O..LAO VOWEL SIGN A
+0EB1          ; Lao # Mn       LAO VOWEL SIGN MAI KAN
+0EB2..0EB3    ; Lao # Lo   [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
+0EB4..0EB9    ; Lao # Mn   [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
+0EBB..0EBC    ; Lao # Mn   [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
+0EBD          ; Lao # Lo       LAO SEMIVOWEL SIGN NYO
+0EC0..0EC4    ; Lao # Lo   [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
+0EC6          ; Lao # Lm       LAO KO LA
+0EC8..0ECD    ; Lao # Mn   [6] LAO TONE MAI EK..LAO NIGGAHITA
+0ED0..0ED9    ; Lao # Nd  [10] LAO DIGIT ZERO..LAO DIGIT NINE
+0EDC..0EDD    ; Lao # Lo   [2] LAO HO NO..LAO HO MO
+
+# Total code points: 65
+
+# ================================================
+
+0F00          ; Tibetan # Lo       TIBETAN SYLLABLE OM
+0F01..0F03    ; Tibetan # So   [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
+0F04..0F12    ; Tibetan # Po  [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD
+0F13..0F17    ; Tibetan # So   [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
+0F18..0F19    ; Tibetan # Mn   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F1A..0F1F    ; Tibetan # So   [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
+0F20..0F29    ; Tibetan # Nd  [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
+0F2A..0F33    ; Tibetan # No  [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
+0F34          ; Tibetan # So       TIBETAN MARK BSDUS RTAGS
+0F35          ; Tibetan # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
+0F36          ; Tibetan # So       TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
+0F37          ; Tibetan # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F38          ; Tibetan # So       TIBETAN MARK CHE MGO
+0F39          ; Tibetan # Mn       TIBETAN MARK TSA -PHRU
+0F3A          ; Tibetan # Ps       TIBETAN MARK GUG RTAGS GYON
+0F3B          ; Tibetan # Pe       TIBETAN MARK GUG RTAGS GYAS
+0F3C          ; Tibetan # Ps       TIBETAN MARK ANG KHANG GYON
+0F3D          ; Tibetan # Pe       TIBETAN MARK ANG KHANG GYAS
+0F3E..0F3F    ; Tibetan # Mc   [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
+0F40..0F47    ; Tibetan # Lo   [8] TIBETAN LETTER KA..TIBETAN LETTER JA
+0F49..0F6C    ; Tibetan # Lo  [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
+0F71..0F7E    ; Tibetan # Mn  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F7F          ; Tibetan # Mc       TIBETAN SIGN RNAM BCAD
+0F80..0F84    ; Tibetan # Mn   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+0F85          ; Tibetan # Po       TIBETAN MARK PALUTA
+0F86..0F87    ; Tibetan # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0F88..0F8B    ; Tibetan # Lo   [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
+0F90..0F97    ; Tibetan # Mn   [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC    ; Tibetan # Mn  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+0FBE..0FC5    ; Tibetan # So   [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
+0FC6          ; Tibetan # Mn       TIBETAN SYMBOL PADMA GDAN
+0FC7..0FCC    ; Tibetan # So   [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
+0FCE..0FCF    ; Tibetan # So   [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
+0FD0..0FD4    ; Tibetan # Po   [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
+
+# Total code points: 201
+
+# ================================================
+
+1000..102A    ; Myanmar # Lo  [43] MYANMAR LETTER KA..MYANMAR LETTER AU
+102B..102C    ; Myanmar # Mc   [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
+102D..1030    ; Myanmar # Mn   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1031          ; Myanmar # Mc       MYANMAR VOWEL SIGN E
+1032..1037    ; Myanmar # Mn   [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
+1038          ; Myanmar # Mc       MYANMAR SIGN VISARGA
+1039..103A    ; Myanmar # Mn   [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+103B..103C    ; Myanmar # Mc   [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
+103D..103E    ; Myanmar # Mn   [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+103F          ; Myanmar # Lo       MYANMAR LETTER GREAT SA
+1040..1049    ; Myanmar # Nd  [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
+104A..104F    ; Myanmar # Po   [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
+1050..1055    ; Myanmar # Lo   [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL
+1056..1057    ; Myanmar # Mc   [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
+1058..1059    ; Myanmar # Mn   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+105A..105D    ; Myanmar # Lo   [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE
+105E..1060    ; Myanmar # Mn   [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+1061          ; Myanmar # Lo       MYANMAR LETTER SGAW KAREN SHA
+1062..1064    ; Myanmar # Mc   [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
+1065..1066    ; Myanmar # Lo   [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA
+1067..106D    ; Myanmar # Mc   [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
+106E..1070    ; Myanmar # Lo   [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA
+1071..1074    ; Myanmar # Mn   [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+1075..1081    ; Myanmar # Lo  [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA
+1082          ; Myanmar # Mn       MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+1083..1084    ; Myanmar # Mc   [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
+1085..1086    ; Myanmar # Mn   [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+1087..108C    ; Myanmar # Mc   [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
+108D          ; Myanmar # Mn       MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+108E          ; Myanmar # Lo       MYANMAR LETTER RUMAI PALAUNG FA
+108F          ; Myanmar # Mc       MYANMAR SIGN RUMAI PALAUNG TONE-5
+1090..1099    ; Myanmar # Nd  [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
+109A..109C    ; Myanmar # Mc   [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
+109D          ; Myanmar # Mn       MYANMAR VOWEL SIGN AITON AI
+109E..109F    ; Myanmar # So   [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
+AA60..AA6F    ; Myanmar # Lo  [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
+AA70          ; Myanmar # Lm       MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
+AA71..AA76    ; Myanmar # Lo   [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
+AA77..AA79    ; Myanmar # So   [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
+AA7A          ; Myanmar # Lo       MYANMAR LETTER AITON RA
+AA7B          ; Myanmar # Mc       MYANMAR SIGN PAO KAREN TONE
+
+# Total code points: 188
+
+# ================================================
+
+10A0..10C5    ; Georgian # L&  [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
+10D0..10FA    ; Georgian # Lo  [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
+10FC          ; Georgian # Lm       MODIFIER LETTER GEORGIAN NAR
+2D00..2D25    ; Georgian # L&  [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
+
+# Total code points: 120
+
+# ================================================
+
+1100..11FF    ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
+3131..318E    ; Hangul # Lo  [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
+3200..321E    ; Hangul # So  [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
+3260..327E    ; Hangul # So  [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
+A960..A97C    ; Hangul # Lo  [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
+AC00..D7A3    ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
+D7B0..D7C6    ; Hangul # Lo  [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+D7CB..D7FB    ; Hangul # Lo  [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
+FFA0..FFBE    ; Hangul # Lo  [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
+FFC2..FFC7    ; Hangul # Lo   [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+FFCA..FFCF    ; Hangul # Lo   [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+FFD2..FFD7    ; Hangul # Lo   [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+FFDA..FFDC    ; Hangul # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+
+# Total code points: 11737
+
+# ================================================
+
+1200..1248    ; Ethiopic # Lo  [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
+124A..124D    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
+1250..1256    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
+1258          ; Ethiopic # Lo       ETHIOPIC SYLLABLE QHWA
+125A..125D    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
+1260..1288    ; Ethiopic # Lo  [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
+128A..128D    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
+1290..12B0    ; Ethiopic # Lo  [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
+12B2..12B5    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
+12B8..12BE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
+12C0          ; Ethiopic # Lo       ETHIOPIC SYLLABLE KXWA
+12C2..12C5    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
+12C8..12D6    ; Ethiopic # Lo  [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
+12D8..1310    ; Ethiopic # Lo  [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
+1312..1315    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
+1318..135A    ; Ethiopic # Lo  [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
+135F          ; Ethiopic # Mn       ETHIOPIC COMBINING GEMINATION MARK
+1360          ; Ethiopic # So       ETHIOPIC SECTION MARK
+1361..1368    ; Ethiopic # Po   [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
+1369..137C    ; Ethiopic # No  [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
+1380..138F    ; Ethiopic # Lo  [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
+1390..1399    ; Ethiopic # So  [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
+2D80..2D96    ; Ethiopic # Lo  [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
+2DA0..2DA6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
+2DA8..2DAE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
+2DB0..2DB6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
+2DB8..2DBE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
+2DC0..2DC6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
+2DC8..2DCE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
+2DD0..2DD6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
+2DD8..2DDE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
+
+# Total code points: 461
+
+# ================================================
+
+13A0..13F4    ; Cherokee # Lo  [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
+
+# Total code points: 85
+
+# ================================================
+
+1400          ; Canadian_Aboriginal # Pd       CANADIAN SYLLABICS HYPHEN
+1401..166C    ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
+166D..166E    ; Canadian_Aboriginal # Po   [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
+166F..167F    ; Canadian_Aboriginal # Lo  [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
+18B0..18F5    ; Canadian_Aboriginal # Lo  [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
+
+# Total code points: 710
+
+# ================================================
+
+1680          ; Ogham # Zs       OGHAM SPACE MARK
+1681..169A    ; Ogham # Lo  [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
+169B          ; Ogham # Ps       OGHAM FEATHER MARK
+169C          ; Ogham # Pe       OGHAM REVERSED FEATHER MARK
+
+# Total code points: 29
+
+# ================================================
+
+16A0..16EA    ; Runic # Lo  [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
+16EE..16F0    ; Runic # Nl   [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
+
+# Total code points: 78
+
+# ================================================
+
+1780..17B3    ; Khmer # Lo  [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
+17B4..17B5    ; Khmer # Cf   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+17B6          ; Khmer # Mc       KHMER VOWEL SIGN AA
+17B7..17BD    ; Khmer # Mn   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17BE..17C5    ; Khmer # Mc   [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
+17C6          ; Khmer # Mn       KHMER SIGN NIKAHIT
+17C7..17C8    ; Khmer # Mc   [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
+17C9..17D3    ; Khmer # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+17D4..17D6    ; Khmer # Po   [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
+17D7          ; Khmer # Lm       KHMER SIGN LEK TOO
+17D8..17DA    ; Khmer # Po   [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT
+17DB          ; Khmer # Sc       KHMER CURRENCY SYMBOL RIEL
+17DC          ; Khmer # Lo       KHMER SIGN AVAKRAHASANYA
+17DD          ; Khmer # Mn       KHMER SIGN ATTHACAN
+17E0..17E9    ; Khmer # Nd  [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
+17F0..17F9    ; Khmer # No  [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
+19E0..19FF    ; Khmer # So  [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
+
+# Total code points: 146
+
+# ================================================
+
+1800..1801    ; Mongolian # Po   [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS
+1804          ; Mongolian # Po       MONGOLIAN COLON
+1806          ; Mongolian # Pd       MONGOLIAN TODO SOFT HYPHEN
+1807..180A    ; Mongolian # Po   [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
+180B..180D    ; Mongolian # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180E          ; Mongolian # Zs       MONGOLIAN VOWEL SEPARATOR
+1810..1819    ; Mongolian # Nd  [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
+1820..1842    ; Mongolian # Lo  [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
+1843          ; Mongolian # Lm       MONGOLIAN LETTER TODO LONG VOWEL SIGN
+1844..1877    ; Mongolian # Lo  [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
+1880..18A8    ; Mongolian # Lo  [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
+18A9          ; Mongolian # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
+18AA          ; Mongolian # Lo       MONGOLIAN LETTER MANCHU ALI GALI LHA
+
+# Total code points: 153
+
+# ================================================
+
+3041..3096    ; Hiragana # Lo  [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
+309D..309E    ; Hiragana # Lm   [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
+309F          ; Hiragana # Lo       HIRAGANA DIGRAPH YORI
+1F200         ; Hiragana # So       SQUARE HIRAGANA HOKA
+
+# Total code points: 90
+
+# ================================================
+
+30A1..30FA    ; Katakana # Lo  [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+30FD..30FE    ; Katakana # Lm   [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK
+30FF          ; Katakana # Lo       KATAKANA DIGRAPH KOTO
+31F0..31FF    ; Katakana # Lo  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
+32D0..32FE    ; Katakana # So  [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO
+3300..3357    ; Katakana # So  [88] SQUARE APAATO..SQUARE WATTO
+FF66..FF6F    ; Katakana # Lo  [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
+FF71..FF9D    ; Katakana # Lo  [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+
+# Total code points: 299
+
+# ================================================
+
+3105..312D    ; Bopomofo # Lo  [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
+31A0..31B7    ; Bopomofo # Lo  [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
+
+# Total code points: 65
+
+# ================================================
+
+2E80..2E99    ; Han # So  [26] CJK RADICAL REPEAT..CJK RADICAL RAP
+2E9B..2EF3    ; Han # So  [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
+2F00..2FD5    ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+3005          ; Han # Lm       IDEOGRAPHIC ITERATION MARK
+3007          ; Han # Nl       IDEOGRAPHIC NUMBER ZERO
+3021..3029    ; Han # Nl   [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
+3038..303A    ; Han # Nl   [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
+303B          ; Han # Lm       VERTICAL IDEOGRAPHIC ITERATION MARK
+3400..4DB5    ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
+4E00..9FCB    ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
+F900..FA2D    ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
+FA30..FA6D    ; Han # Lo  [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
+FA70..FAD9    ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+20000..2A6D6  ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
+2A700..2B734  ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+2F800..2FA1D  ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+
+# Total code points: 75738
+
+# ================================================
+
+A000..A014    ; Yi # Lo  [21] YI SYLLABLE IT..YI SYLLABLE E
+A015          ; Yi # Lm       YI SYLLABLE WU
+A016..A48C    ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
+A490..A4C6    ; Yi # So  [55] YI RADICAL QOT..YI RADICAL KE
+
+# Total code points: 1220
+
+# ================================================
+
+10300..1031E  ; Old_Italic # Lo  [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
+10320..10323  ; Old_Italic # No   [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
+
+# Total code points: 35
+
+# ================================================
+
+10330..10340  ; Gothic # Lo  [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
+10341         ; Gothic # Nl       GOTHIC LETTER NINETY
+10342..10349  ; Gothic # Lo   [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
+1034A         ; Gothic # Nl       GOTHIC LETTER NINE HUNDRED
+
+# Total code points: 27
+
+# ================================================
+
+10400..1044F  ; Deseret # L&  [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
+
+# Total code points: 80
+
+# ================================================
+
+0300..036F    ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+0485..0486    ; Inherited # Mn   [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
+064B..0655    ; Inherited # Mn  [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
+0670          ; Inherited # Mn       ARABIC LETTER SUPERSCRIPT ALEF
+0951..0952    ; Inherited # Mn   [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
+1CD0..1CD2    ; Inherited # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+1CD4..1CE0    ; Inherited # Mn  [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+1CE2..1CE8    ; Inherited # Mn   [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CED          ; Inherited # Mn       VEDIC SIGN TIRYAK
+1DC0..1DE6    ; Inherited # Mn  [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
+1DFD..1DFF    ; Inherited # Mn   [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+200C..200D    ; Inherited # Cf   [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+20D0..20DC    ; Inherited # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20DD..20E0    ; Inherited # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+20E1          ; Inherited # Mn       COMBINING LEFT RIGHT ARROW ABOVE
+20E2..20E4    ; Inherited # Me   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+20E5..20F0    ; Inherited # Mn  [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
+302A..302F    ; Inherited # Mn   [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
+3099..309A    ; Inherited # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+FE00..FE0F    ; Inherited # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+FE20..FE26    ; Inherited # Mn   [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
+101FD         ; Inherited # Mn       PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
+1D167..1D169  ; Inherited # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+1D17B..1D182  ; Inherited # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+1D185..1D18B  ; Inherited # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+1D1AA..1D1AD  ; Inherited # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+E0100..E01EF  ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+
+# Total code points: 523
+
+# ================================================
+
+1700..170C    ; Tagalog # Lo  [13] TAGALOG LETTER A..TAGALOG LETTER YA
+170E..1711    ; Tagalog # Lo   [4] TAGALOG LETTER LA..TAGALOG LETTER HA
+1712..1714    ; Tagalog # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+
+# Total code points: 20
+
+# ================================================
+
+1720..1731    ; Hanunoo # Lo  [18] HANUNOO LETTER A..HANUNOO LETTER HA
+1732..1734    ; Hanunoo # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+
+# Total code points: 21
+
+# ================================================
+
+1740..1751    ; Buhid # Lo  [18] BUHID LETTER A..BUHID LETTER HA
+1752..1753    ; Buhid # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+
+# Total code points: 20
+
+# ================================================
+
+1760..176C    ; Tagbanwa # Lo  [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
+176E..1770    ; Tagbanwa # Lo   [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
+1772..1773    ; Tagbanwa # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+
+# Total code points: 18
+
+# ================================================
+
+1900..191C    ; Limbu # Lo  [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
+1920..1922    ; Limbu # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1923..1926    ; Limbu # Mc   [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
+1927..1928    ; Limbu # Mn   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1929..192B    ; Limbu # Mc   [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
+1930..1931    ; Limbu # Mc   [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
+1932          ; Limbu # Mn       LIMBU SMALL LETTER ANUSVARA
+1933..1938    ; Limbu # Mc   [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
+1939..193B    ; Limbu # Mn   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+1940          ; Limbu # So       LIMBU SIGN LOO
+1944..1945    ; Limbu # Po   [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+1946..194F    ; Limbu # Nd  [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
+
+# Total code points: 66
+
+# ================================================
+
+1950..196D    ; Tai_Le # Lo  [30] TAI LE LETTER KA..TAI LE LETTER AI
+1970..1974    ; Tai_Le # Lo   [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
+
+# Total code points: 35
+
+# ================================================
+
+10000..1000B  ; Linear_B # Lo  [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
+1000D..10026  ; Linear_B # Lo  [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
+10028..1003A  ; Linear_B # Lo  [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
+1003C..1003D  ; Linear_B # Lo   [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
+1003F..1004D  ; Linear_B # Lo  [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
+10050..1005D  ; Linear_B # Lo  [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
+10080..100FA  ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
+
+# Total code points: 211
+
+# ================================================
+
+10380..1039D  ; Ugaritic # Lo  [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
+1039F         ; Ugaritic # Po       UGARITIC WORD DIVIDER
+
+# Total code points: 31
+
+# ================================================
+
+10450..1047F  ; Shavian # Lo  [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW
+
+# Total code points: 48
+
+# ================================================
+
+10480..1049D  ; Osmanya # Lo  [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO
+104A0..104A9  ; Osmanya # Nd  [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
+
+# Total code points: 40
+
+# ================================================
+
+10800..10805  ; Cypriot # Lo   [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
+10808         ; Cypriot # Lo       CYPRIOT SYLLABLE JO
+1080A..10835  ; Cypriot # Lo  [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
+10837..10838  ; Cypriot # Lo   [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
+1083C         ; Cypriot # Lo       CYPRIOT SYLLABLE ZA
+1083F         ; Cypriot # Lo       CYPRIOT SYLLABLE ZO
+
+# Total code points: 55
+
+# ================================================
+
+2800..28FF    ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
+
+# Total code points: 256
+
+# ================================================
+
+1A00..1A16    ; Buginese # Lo  [23] BUGINESE LETTER KA..BUGINESE LETTER HA
+1A17..1A18    ; Buginese # Mn   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A19..1A1B    ; Buginese # Mc   [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
+1A1E..1A1F    ; Buginese # Po   [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
+
+# Total code points: 30
+
+# ================================================
+
+03E2..03EF    ; Coptic # L&  [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI
+2C80..2CE4    ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
+2CE5..2CEA    ; Coptic # So   [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
+2CEB..2CEE    ; Coptic # L&   [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
+2CEF..2CF1    ; Coptic # Mn   [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+2CF9..2CFC    ; Coptic # Po   [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
+2CFD          ; Coptic # No       COPTIC FRACTION ONE HALF
+2CFE..2CFF    ; Coptic # Po   [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
+
+# Total code points: 135
+
+# ================================================
+
+1980..19AB    ; New_Tai_Lue # Lo  [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
+19B0..19C0    ; New_Tai_Lue # Mc  [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
+19C1..19C7    ; New_Tai_Lue # Lo   [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
+19C8..19C9    ; New_Tai_Lue # Mc   [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
+19D0..19DA    ; New_Tai_Lue # Nd  [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
+19DE..19DF    ; New_Tai_Lue # Po   [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
+
+# Total code points: 83
+
+# ================================================
+
+2C00..2C2E    ; Glagolitic # L&  [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C30..2C5E    ; Glagolitic # L&  [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
+
+# Total code points: 94
+
+# ================================================
+
+2D30..2D65    ; Tifinagh # Lo  [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
+2D6F          ; Tifinagh # Lm       TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+
+# Total code points: 55
+
+# ================================================
+
+A800..A801    ; Syloti_Nagri # Lo   [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
+A802          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN DVISVARA
+A803..A805    ; Syloti_Nagri # Lo   [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
+A806          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN HASANTA
+A807..A80A    ; Syloti_Nagri # Lo   [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
+A80B          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN ANUSVARA
+A80C..A822    ; Syloti_Nagri # Lo  [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
+A823..A824    ; Syloti_Nagri # Mc   [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
+A825..A826    ; Syloti_Nagri # Mn   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+A827          ; Syloti_Nagri # Mc       SYLOTI NAGRI VOWEL SIGN OO
+A828..A82B    ; Syloti_Nagri # So   [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
+
+# Total code points: 44
+
+# ================================================
+
+103A0..103C3  ; Old_Persian # Lo  [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
+103C8..103CF  ; Old_Persian # Lo   [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
+103D0         ; Old_Persian # Po       OLD PERSIAN WORD DIVIDER
+103D1..103D5  ; Old_Persian # Nl   [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
+
+# Total code points: 50
+
+# ================================================
+
+10A00         ; Kharoshthi # Lo       KHAROSHTHI LETTER A
+10A01..10A03  ; Kharoshthi # Mn   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06  ; Kharoshthi # Mn   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F  ; Kharoshthi # Mn   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+10A10..10A13  ; Kharoshthi # Lo   [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
+10A15..10A17  ; Kharoshthi # Lo   [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
+10A19..10A33  ; Kharoshthi # Lo  [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
+10A38..10A3A  ; Kharoshthi # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+10A3F         ; Kharoshthi # Mn       KHAROSHTHI VIRAMA
+10A40..10A47  ; Kharoshthi # No   [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
+10A50..10A58  ; Kharoshthi # Po   [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
+
+# Total code points: 65
+
+# ================================================
+
+1B00..1B03    ; Balinese # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+1B04          ; Balinese # Mc       BALINESE SIGN BISAH
+1B05..1B33    ; Balinese # Lo  [47] BALINESE LETTER AKARA..BALINESE LETTER HA
+1B34          ; Balinese # Mn       BALINESE SIGN REREKAN
+1B35          ; Balinese # Mc       BALINESE VOWEL SIGN TEDUNG
+1B36..1B3A    ; Balinese # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3B          ; Balinese # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
+1B3C          ; Balinese # Mn       BALINESE VOWEL SIGN LA LENGA
+1B3D..1B41    ; Balinese # Mc   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
+1B42          ; Balinese # Mn       BALINESE VOWEL SIGN PEPET
+1B43..1B44    ; Balinese # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+1B45..1B4B    ; Balinese # Lo   [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
+1B50..1B59    ; Balinese # Nd  [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
+1B5A..1B60    ; Balinese # Po   [7] BALINESE PANTI..BALINESE PAMENENG
+1B61..1B6A    ; Balinese # So  [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
+1B6B..1B73    ; Balinese # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1B74..1B7C    ; Balinese # So   [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
+
+# Total code points: 121
+
+# ================================================
+
+12000..1236E  ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
+12400..12462  ; Cuneiform # Nl  [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
+12470..12473  ; Cuneiform # Po   [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
+
+# Total code points: 982
+
+# ================================================
+
+10900..10915  ; Phoenician # Lo  [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
+10916..1091B  ; Phoenician # No   [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
+1091F         ; Phoenician # Po       PHOENICIAN WORD SEPARATOR
+
+# Total code points: 29
+
+# ================================================
+
+A840..A873    ; Phags_Pa # Lo  [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
+A874..A877    ; Phags_Pa # Po   [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
+
+# Total code points: 56
+
+# ================================================
+
+07C0..07C9    ; Nko # Nd  [10] NKO DIGIT ZERO..NKO DIGIT NINE
+07CA..07EA    ; Nko # Lo  [33] NKO LETTER A..NKO LETTER JONA RA
+07EB..07F3    ; Nko # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+07F4..07F5    ; Nko # Lm   [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
+07F6          ; Nko # So       NKO SYMBOL OO DENNEN
+07F7..07F9    ; Nko # Po   [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
+07FA          ; Nko # Lm       NKO LAJANYALAN
+
+# Total code points: 59
+
+# ================================================
+
+1B80..1B81    ; Sundanese # Mn   [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+1B82          ; Sundanese # Mc       SUNDANESE SIGN PANGWISAD
+1B83..1BA0    ; Sundanese # Lo  [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
+1BA1          ; Sundanese # Mc       SUNDANESE CONSONANT SIGN PAMINGKAL
+1BA2..1BA5    ; Sundanese # Mn   [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+1BA6..1BA7    ; Sundanese # Mc   [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
+1BA8..1BA9    ; Sundanese # Mn   [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAA          ; Sundanese # Mc       SUNDANESE SIGN PAMAAEH
+1BAE..1BAF    ; Sundanese # Lo   [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
+1BB0..1BB9    ; Sundanese # Nd  [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
+
+# Total code points: 55
+
+# ================================================
+
+1C00..1C23    ; Lepcha # Lo  [36] LEPCHA LETTER KA..LEPCHA LETTER A
+1C24..1C2B    ; Lepcha # Mc   [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
+1C2C..1C33    ; Lepcha # Mn   [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+1C34..1C35    ; Lepcha # Mc   [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
+1C36..1C37    ; Lepcha # Mn   [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+1C3B..1C3F    ; Lepcha # Po   [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
+1C40..1C49    ; Lepcha # Nd  [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
+1C4D..1C4F    ; Lepcha # Lo   [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
+
+# Total code points: 74
+
+# ================================================
+
+1C50..1C59    ; Ol_Chiki # Nd  [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
+1C5A..1C77    ; Ol_Chiki # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
+1C78..1C7D    ; Ol_Chiki # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
+1C7E..1C7F    ; Ol_Chiki # Po   [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+
+# Total code points: 48
+
+# ================================================
+
+A500..A60B    ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
+A60C          ; Vai # Lm       VAI SYLLABLE LENGTHENER
+A60D..A60F    ; Vai # Po   [3] VAI COMMA..VAI QUESTION MARK
+A610..A61F    ; Vai # Lo  [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
+A620..A629    ; Vai # Nd  [10] VAI DIGIT ZERO..VAI DIGIT NINE
+A62A..A62B    ; Vai # Lo   [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
+
+# Total code points: 300
+
+# ================================================
+
+A880..A881    ; Saurashtra # Mc   [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
+A882..A8B3    ; Saurashtra # Lo  [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
+A8B4..A8C3    ; Saurashtra # Mc  [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
+A8C4          ; Saurashtra # Mn       SAURASHTRA SIGN VIRAMA
+A8CE..A8CF    ; Saurashtra # Po   [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+A8D0..A8D9    ; Saurashtra # Nd  [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
+
+# Total code points: 81
+
+# ================================================
+
+A900..A909    ; Kayah_Li # Nd  [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
+A90A..A925    ; Kayah_Li # Lo  [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
+A926..A92D    ; Kayah_Li # Mn   [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+A92E..A92F    ; Kayah_Li # Po   [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
+
+# Total code points: 48
+
+# ================================================
+
+A930..A946    ; Rejang # Lo  [23] REJANG LETTER KA..REJANG LETTER A
+A947..A951    ; Rejang # Mn  [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A952..A953    ; Rejang # Mc   [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+A95F          ; Rejang # Po       REJANG SECTION MARK
+
+# Total code points: 37
+
+# ================================================
+
+10280..1029C  ; Lycian # Lo  [29] LYCIAN LETTER A..LYCIAN LETTER X
+
+# Total code points: 29
+
+# ================================================
+
+102A0..102D0  ; Carian # Lo  [49] CARIAN LETTER A..CARIAN LETTER UUU3
+
+# Total code points: 49
+
+# ================================================
+
+10920..10939  ; Lydian # Lo  [26] LYDIAN LETTER A..LYDIAN LETTER C
+1093F         ; Lydian # Po       LYDIAN TRIANGULAR MARK
+
+# Total code points: 27
+
+# ================================================
+
+AA00..AA28    ; Cham # Lo  [41] CHAM LETTER A..CHAM LETTER HA
+AA29..AA2E    ; Cham # Mn   [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+AA2F..AA30    ; Cham # Mc   [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
+AA31..AA32    ; Cham # Mn   [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+AA33..AA34    ; Cham # Mc   [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
+AA35..AA36    ; Cham # Mn   [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+AA40..AA42    ; Cham # Lo   [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
+AA43          ; Cham # Mn       CHAM CONSONANT SIGN FINAL NG
+AA44..AA4B    ; Cham # Lo   [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
+AA4C          ; Cham # Mn       CHAM CONSONANT SIGN FINAL M
+AA4D          ; Cham # Mc       CHAM CONSONANT SIGN FINAL H
+AA50..AA59    ; Cham # Nd  [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
+AA5C..AA5F    ; Cham # Po   [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
+
+# Total code points: 83
+
+# ================================================
+
+1A20..1A54    ; Tai_Tham # Lo  [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
+1A55          ; Tai_Tham # Mc       TAI THAM CONSONANT SIGN MEDIAL RA
+1A56          ; Tai_Tham # Mn       TAI THAM CONSONANT SIGN MEDIAL LA
+1A57          ; Tai_Tham # Mc       TAI THAM CONSONANT SIGN LA TANG LAI
+1A58..1A5E    ; Tai_Tham # Mn   [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+1A60          ; Tai_Tham # Mn       TAI THAM SIGN SAKOT
+1A61          ; Tai_Tham # Mc       TAI THAM VOWEL SIGN A
+1A62          ; Tai_Tham # Mn       TAI THAM VOWEL SIGN MAI SAT
+1A63..1A64    ; Tai_Tham # Mc   [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
+1A65..1A6C    ; Tai_Tham # Mn   [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+1A6D..1A72    ; Tai_Tham # Mc   [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
+1A73..1A7C    ; Tai_Tham # Mn  [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
+1A7F          ; Tai_Tham # Mn       TAI THAM COMBINING CRYPTOGRAMMIC DOT
+1A80..1A89    ; Tai_Tham # Nd  [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
+1A90..1A99    ; Tai_Tham # Nd  [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
+1AA0..1AA6    ; Tai_Tham # Po   [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
+1AA7          ; Tai_Tham # Lm       TAI THAM SIGN MAI YAMOK
+1AA8..1AAD    ; Tai_Tham # Po   [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
+
+# Total code points: 127
+
+# ================================================
+
+AA80..AAAF    ; Tai_Viet # Lo  [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O
+AAB0          ; Tai_Viet # Mn       TAI VIET MAI KANG
+AAB1          ; Tai_Viet # Lo       TAI VIET VOWEL AA
+AAB2..AAB4    ; Tai_Viet # Mn   [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+AAB5..AAB6    ; Tai_Viet # Lo   [2] TAI VIET VOWEL E..TAI VIET VOWEL O
+AAB7..AAB8    ; Tai_Viet # Mn   [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+AAB9..AABD    ; Tai_Viet # Lo   [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
+AABE..AABF    ; Tai_Viet # Mn   [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
+AAC0          ; Tai_Viet # Lo       TAI VIET TONE MAI NUENG
+AAC1          ; Tai_Viet # Mn       TAI VIET TONE MAI THO
+AAC2          ; Tai_Viet # Lo       TAI VIET TONE MAI SONG
+AADB..AADC    ; Tai_Viet # Lo   [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
+AADD          ; Tai_Viet # Lm       TAI VIET SYMBOL SAM
+AADE..AADF    ; Tai_Viet # Po   [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
+
+# Total code points: 72
+
+# ================================================
+
+10B00..10B35  ; Avestan # Lo  [54] AVESTAN LETTER A..AVESTAN LETTER HE
+10B39..10B3F  ; Avestan # Po   [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
+
+# Total code points: 61
+
+# ================================================
+
+13000..1342E  ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+
+# Total code points: 1071
+
+# ================================================
+
+0800..0815    ; Samaritan # Lo  [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
+0816..0819    ; Samaritan # Mn   [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
+081A          ; Samaritan # Lm       SAMARITAN MODIFIER LETTER EPENTHETIC YUT
+081B..0823    ; Samaritan # Mn   [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+0824          ; Samaritan # Lm       SAMARITAN MODIFIER LETTER SHORT A
+0825..0827    ; Samaritan # Mn   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+0828          ; Samaritan # Lm       SAMARITAN MODIFIER LETTER I
+0829..082D    ; Samaritan # Mn   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
+0830..083E    ; Samaritan # Po  [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
+
+# Total code points: 61
+
+# ================================================
+
+A4D0..A4F7    ; Lisu # Lo  [40] LISU LETTER BA..LISU LETTER OE
+A4F8..A4FD    ; Lisu # Lm   [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
+A4FE..A4FF    ; Lisu # Po   [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
+
+# Total code points: 48
+
+# ================================================
+
+A6A0..A6E5    ; Bamum # Lo  [70] BAMUM LETTER A..BAMUM LETTER KI
+A6E6..A6EF    ; Bamum # Nl  [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
+A6F0..A6F1    ; Bamum # Mn   [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A6F2..A6F7    ; Bamum # Po   [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
+
+# Total code points: 88
+
+# ================================================
+
+A980..A982    ; Javanese # Mn   [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+A983          ; Javanese # Mc       JAVANESE SIGN WIGNYAN
+A984..A9B2    ; Javanese # Lo  [47] JAVANESE LETTER A..JAVANESE LETTER HA
+A9B3          ; Javanese # Mn       JAVANESE SIGN CECAK TELU
+A9B4..A9B5    ; Javanese # Mc   [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
+A9B6..A9B9    ; Javanese # Mn   [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+A9BA..A9BB    ; Javanese # Mc   [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
+A9BC          ; Javanese # Mn       JAVANESE VOWEL SIGN PEPET
+A9BD..A9C0    ; Javanese # Mc   [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
+A9C1..A9CD    ; Javanese # Po  [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
+A9CF          ; Javanese # Lm       JAVANESE PANGRANGKEP
+A9D0..A9D9    ; Javanese # Nd  [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
+A9DE..A9DF    ; Javanese # Po   [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
+
+# Total code points: 91
+
+# ================================================
+
+ABC0..ABE2    ; Meetei_Mayek # Lo  [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
+ABE3..ABE4    ; Meetei_Mayek # Mc   [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
+ABE5          ; Meetei_Mayek # Mn       MEETEI MAYEK VOWEL SIGN ANAP
+ABE6..ABE7    ; Meetei_Mayek # Mc   [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
+ABE8          ; Meetei_Mayek # Mn       MEETEI MAYEK VOWEL SIGN UNAP
+ABE9..ABEA    ; Meetei_Mayek # Mc   [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
+ABEB          ; Meetei_Mayek # Po       MEETEI MAYEK CHEIKHEI
+ABEC          ; Meetei_Mayek # Mc       MEETEI MAYEK LUM IYEK
+ABED          ; Meetei_Mayek # Mn       MEETEI MAYEK APUN IYEK
+ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
+
+# Total code points: 56
+
+# ================================================
+
+10840..10855  ; Imperial_Aramaic # Lo  [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW
+10857         ; Imperial_Aramaic # Po       IMPERIAL ARAMAIC SECTION SIGN
+10858..1085F  ; Imperial_Aramaic # No   [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
+
+# Total code points: 31
+
+# ================================================
+
+10A60..10A7C  ; Old_South_Arabian # Lo  [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
+10A7D..10A7E  ; Old_South_Arabian # No   [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
+10A7F         ; Old_South_Arabian # Po       OLD SOUTH ARABIAN NUMERIC INDICATOR
+
+# Total code points: 32
+
+# ================================================
+
+10B40..10B55  ; Inscriptional_Parthian # Lo  [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
+10B58..10B5F  ; Inscriptional_Parthian # No   [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
+
+# Total code points: 30
+
+# ================================================
+
+10B60..10B72  ; Inscriptional_Pahlavi # Lo  [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
+10B78..10B7F  ; Inscriptional_Pahlavi # No   [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
+
+# Total code points: 27
+
+# ================================================
+
+10C00..10C48  ; Old_Turkic # Lo  [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
+
+# Total code points: 73
+
+# ================================================
+
+11080..11081  ; Kaithi # Mn   [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
+11082         ; Kaithi # Mc       KAITHI SIGN VISARGA
+11083..110AF  ; Kaithi # Lo  [45] KAITHI LETTER A..KAITHI LETTER HA
+110B0..110B2  ; Kaithi # Mc   [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
+110B3..110B6  ; Kaithi # Mn   [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+110B7..110B8  ; Kaithi # Mc   [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
+110B9..110BA  ; Kaithi # Mn   [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+110BB..110BC  ; Kaithi # Po   [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
+110BD         ; Kaithi # Cf       KAITHI NUMBER SIGN
+110BE..110C1  ; Kaithi # Po   [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+
+# Total code points: 66
+
+# EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/make/tools/src/build/tools/generatecharacter/CharacterName.java	Tue May 18 15:36:47 2010 -0700
@@ -0,0 +1,100 @@
+package build.tools.generatecharacter;
+
+import java.io.*;
+import java.nio.*;
+import java.util.*;
+import java.util.zip.*;
+
+public class CharacterName {
+
+    public static void main(String[] args) {
+        FileReader reader = null;
+        try {
+            if (args.length != 2) {
+                System.err.println("Usage: java CharacterName UniocdeData.txt uniName.dat");
+                System.exit(1);
+            }
+
+            reader = new FileReader(args[0]);
+            BufferedReader bfr = new BufferedReader(reader);
+            String line = null;
+
+            StringBuilder namePool = new StringBuilder();
+            byte[] cpPoolBytes = new byte[0x100000];
+            ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
+            int lastCp = 0;
+            int cpNum = 0;
+
+            while ((line = bfr.readLine()) != null) {
+                if (line.startsWith("#"))
+                    continue;
+                UnicodeSpec spec = UnicodeSpec.parse(line);
+                if (spec != null) {
+                    int cp = spec.getCodePoint();
+                    String name = spec.getName();
+                    cpNum++;
+                    if (name.equals("<control>") && spec.getOldName() != null) {
+                        if (spec.getOldName().length() != 0)
+                            name = spec.getOldName();
+                        else
+                            continue;
+                    } else if (name.startsWith("<")) {
+                        /*
+                          3400    <CJK Ideograph Extension A, First>
+                          4db5    <CJK Ideograph Extension A, Last>
+                          4e00    <CJK Ideograph, First>
+                          9fc3    <CJK Ideograph, Last>
+                          ac00    <Hangul Syllable, First>
+                          d7a3    <Hangul Syllable, Last>
+                          d800    <Non Private Use High Surrogate, First>
+                          db7f    <Non Private Use High Surrogate, Last>
+                          db80    <Private Use High Surrogate, First>
+                          dbff    <Private Use High Surrogate, Last>
+                          dc00    <Low Surrogate, First>
+                          dfff    <Low Surrogate, Last>
+                          e000    <Private Use, First>
+                          f8ff    <Private Use, Last>
+                         20000    <CJK Ideograph Extension B, First>
+                         2a6d6    <CJK Ideograph Extension B, Last>
+                         f0000    <Plane 15 Private Use, First>
+                         ffffd    <Plane 15 Private Use, Last>
+                        */
+                        continue;
+                    }
+
+                    if (cp == lastCp + 1) {
+                        cpBB.put((byte)name.length());
+                    } else {
+                        cpBB.put((byte)0);  // segment start flag
+                        cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
+                    }
+                    namePool.append(name);
+                    lastCp = cp;
+                }
+            }
+
+            byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
+            int cpLen = cpBB.position();
+            int total = cpLen + namePoolBytes.length;
+
+            DataOutputStream dos = new DataOutputStream(
+                                       new DeflaterOutputStream(
+                                           new FileOutputStream(args[1])));
+            dos.writeInt(total);  // total
+            dos.writeInt(cpLen);  // nameOff
+            dos.write(cpPoolBytes, 0, cpLen);
+            dos.write(namePoolBytes);
+            dos.close();
+
+        } catch (Throwable e) {
+            System.out.println("Unexpected exception:");
+            e.printStackTrace();
+        } finally {
+            if (reader != null) {
+                try {
+                    reader.close();
+                } catch (Throwable ee) { ee.printStackTrace(); }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/make/tools/src/build/tools/generatecharacter/CharacterScript.java	Tue May 18 15:36:47 2010 -0700
@@ -0,0 +1,214 @@
+import java.util.regex.*;
+import java.util.*;
+import java.io.*;
+
+public class CharacterScript {
+
+    // generate the code needed for j.l.C.UnicodeScript
+    static void fortest(String fmt, Object... o) {
+        //System.out.printf(fmt, o);
+    }
+
+    static void print(String fmt, Object... o) {
+        System.out.printf(fmt, o);
+    }
+
+    static void debug(String fmt, Object... o) {
+        //System.out.printf(fmt, o);
+    }
+
+    public static void main(String args[]){
+        try {
+            if (args.length != 1) {
+                System.out.println("java CharacterScript script.txt out");
+                System.exit(1);
+            }
+
+            int i, j;
+            BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
+            HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
+            String line = null;
+
+            Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
+
+            int prevS = -1;
+            int prevE = -1;
+            String prevN = null;
+            int[][] scripts = new int[1024][3];
+            int scriptSize = 0;
+
+            while ((line = sbfr.readLine()) != null) {
+                if (line.length() <= 1 || line.charAt(0) == '#') {
+                    continue;
+                }
+                m.reset(line);
+                if (m.matches()) {
+                    int start = Integer.parseInt(m.group(1), 16);
+                    int end = (m.group(2)==null)?start
+                              :Integer.parseInt(m.group(2), 16);
+                    String name = m.group(3);
+                    if (name.equals(prevN) && start == prevE + 1) {
+                        prevE = end;
+                    } else {
+                        if (prevS != -1) {
+                            if (scriptMap.get(prevN) == null) {
+                                scriptMap.put(prevN, scriptMap.size());
+                            }
+                            scripts[scriptSize][0] = prevS;
+                            scripts[scriptSize][1] = prevE;
+                            scripts[scriptSize][2] = scriptMap.get(prevN);
+                            scriptSize++;
+                        }
+                        debug("%x-%x\t%s%n", prevS, prevE, prevN);
+                        prevS = start; prevE = end; prevN = name;
+                    }
+                } else {
+                    debug("Warning: Unrecognized line <%s>%n", line);
+                }
+            }
+
+            //last one.
+            if (scriptMap.get(prevN) == null) {
+                scriptMap.put(prevN, scriptMap.size());
+            }
+            scripts[scriptSize][0] = prevS;
+            scripts[scriptSize][1] = prevE;
+            scripts[scriptSize][2] = scriptMap.get(prevN);
+            scriptSize++;
+
+            debug("%x-%x\t%s%n", prevS, prevE, prevN);
+            debug("-----------------%n");
+            debug("Total scripts=%s%n", scriptMap.size());
+            debug("-----------------%n%n");
+
+            String[] names = new String[scriptMap.size()];
+            for (String name: scriptMap.keySet()) {
+                names[scriptMap.get(name).intValue()] = name;
+            }
+
+            for (j = 0; j < scriptSize; j++) {
+                for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
+                    String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
+                    if (cp > 0xffff)
+                        System.out.printf("%05X    %s%n", cp, name);
+                    else
+                        System.out.printf("%05X    %s%n", cp, name);
+                }
+            }
+
+            Arrays.sort(scripts, 0, scriptSize,
+                        new Comparator<int[]>() {
+                            public int compare(int[] a1, int[] a2) {
+                                return a1[0] - a2[0];
+                            }
+                            public boolean compare(Object obj) {
+                                return obj == this;
+                            }
+                         });
+
+
+
+            // Consolidation: there are lots of "reserved" code points
+            // embedded in those otherwise "sequential" blocks.
+            // To make the lookup table smaller, we combine those
+            // separated segments with the assumption that the lookup
+            // implementation checks
+            //    Character.getType() !=  Character.UNASSIGNED
+            // first (return UNKNOWN for unassigned)
+
+            ArrayList<int[]> list = new ArrayList();
+            list.add(scripts[0]);
+
+            int[] last = scripts[0];
+            for (i = 1; i < scriptSize; i++) {
+                if (scripts[i][0] != (last[1] + 1)) {
+
+                    boolean isNotUnassigned = false;
+                    for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
+                        if (Character.getType(cp) != Character.UNASSIGNED) {
+                            isNotUnassigned = true;
+                            debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
+                            break;
+                        }
+                    }
+                    if (isNotUnassigned) {
+                        // surrogates only?
+                        int[] a = new int[3];
+                        a[0] = last[1] + 1;
+                        a[1] = scripts[i][0] - 1;
+                        a[2] = -1;  // unknown
+                        list.add(a);
+                    } else {
+                        if (last[2] == scripts[i][2]) {
+                            //combine
+                            last[1] = scripts[i][1];
+                            continue;
+                        } else {
+                            // expand last
+                            last[1] = scripts[i][0] - 1;
+                        }
+                    }
+                }
+                list.add(scripts[i]);
+                last = scripts[i];
+            }
+
+            for (i = 0; i < list.size(); i++) {
+                int[] a = (int[])list.get(i);
+                String name = "UNKNOWN";
+                if (a[2] != -1)
+                    name = names[a[2]].toUpperCase(Locale.US);
+                debug("0x%05x, 0x%05x  %s%n", a[0], a[1], name);
+            }
+            debug("--->total=%d%n", list.size());
+
+
+            //////////////////OUTPUT//////////////////////////////////
+            print("public class Scripts {%n%n");
+            print("    public static enum UnicodeScript {%n");
+            for (i = 0; i < names.length; i++) {
+                print("        /**%n         * Unicode script \"%s\".%n         */%n", names[i]);
+                print("        %s,%n%n",  names[i].toUpperCase(Locale.US));
+            }
+            print("        /**%n         * Unicode script \"Unknown\".%n         */%n        UNKNOWN;%n%n");
+
+
+            // lookup table
+            print("        private static final int[] scriptStarts = {%n");
+            for (int[] a : list) {
+                String name = "UNKNOWN";
+                if (a[2] != -1)
+                    name = names[a[2]].toUpperCase(Locale.US);
+                if (a[0] < 0x10000)
+                    print("            0x%04X,   // %04X..%04X; %s%n",
+                          a[0], a[0], a[1], name);
+                else
+                    print("            0x%05X,  // %05X..%05X; %s%n",
+                          a[0], a[0], a[1], name);
+            }
+            last = list.get(list.size() -1);
+            if (last[1] != Character.MAX_CODE_POINT)
+                print("            0x%05X   // %05X..%06X; %s%n",
+                      last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
+                      "UNKNOWN");
+            print("%n        };%n%n");
+
+            print("        private static final UnicodeScript[] scripts = {%n");
+            for (int[] a : list) {
+                String name = "UNKNOWN";
+                if (a[2] != -1)
+                    name = names[a[2]].toUpperCase(Locale.US);
+                print("            %s,%n", name);
+            }
+
+            if (last[1] != Character.MAX_CODE_POINT)
+                print("            UNKNOWN%n");
+            print("        };%n");
+            print("    }%n");
+            print("}%n");
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+}
--- a/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java	Tue May 18 13:12:46 2010 -0700
+++ b/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java	Tue May 18 15:36:47 2010 -0700
@@ -35,6 +35,8 @@
 import java.io.FileWriter;
 import java.io.File;
 
+import build.tools.generatecharacter.CharacterName;
+
 /**
  * This program generates the source code for the class java.lang.Character.
  * It also generates native C code that can perform the same operations.
--- a/jdk/src/share/classes/java/lang/Character.java	Tue May 18 13:12:46 2010 -0700
+++ b/jdk/src/share/classes/java/lang/Character.java	Tue May 18 15:36:47 2010 -0700
@@ -24,6 +24,7 @@
  */
 
 package java.lang;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Locale;
@@ -2547,6 +2548,1241 @@
 
 
     /**
+     * A family of character subsets representing the character scripts
+     * defined in the <a href="http://www.unicode.org/reports/tr24/">
+     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
+     * character is assigned to a single Unicode script, either a specific
+     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
+     * one of the following three special values,
+     * {@link Character.UnicodeScript#INHERITED Inherited},
+     * {@link Character.UnicodeScript#COMMON Common} or
+     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
+     *
+     * @since 1.7
+     */
+    public static enum UnicodeScript {
+        /**
+         * Unicode script "Common".
+         */
+        COMMON,
+
+        /**
+         * Unicode script "Latin".
+         */
+        LATIN,
+
+        /**
+         * Unicode script "Greek".
+         */
+        GREEK,
+
+        /**
+         * Unicode script "Cyrillic".
+         */
+        CYRILLIC,
+
+        /**
+         * Unicode script "Armenian".
+         */
+        ARMENIAN,
+
+        /**
+         * Unicode script "Hebrew".
+         */
+        HEBREW,
+
+        /**
+         * Unicode script "Arabic".
+         */
+        ARABIC,
+
+        /**
+         * Unicode script "Syriac".
+         */
+        SYRIAC,
+
+        /**
+         * Unicode script "Thaana".
+         */
+        THAANA,
+
+        /**
+         * Unicode script "Devanagari".
+         */
+        DEVANAGARI,
+
+        /**
+         * Unicode script "Bengali".
+         */
+        BENGALI,
+
+        /**
+         * Unicode script "Gurmukhi".
+         */
+        GURMUKHI,
+
+        /**
+         * Unicode script "Gujarati".
+         */
+        GUJARATI,
+
+        /**
+         * Unicode script "Oriya".
+         */
+        ORIYA,
+
+        /**
+         * Unicode script "Tamil".
+         */
+        TAMIL,
+
+        /**
+         * Unicode script "Telugu".
+         */
+        TELUGU,
+
+        /**
+         * Unicode script "Kannada".
+         */
+        KANNADA,
+
+        /**
+         * Unicode script "Malayalam".
+         */
+        MALAYALAM,
+
+        /**
+         * Unicode script "Sinhala".
+         */
+        SINHALA,
+
+        /**
+         * Unicode script "Thai".
+         */
+        THAI,
+
+        /**
+         * Unicode script "Lao".
+         */
+        LAO,
+
+        /**
+         * Unicode script "Tibetan".
+         */
+        TIBETAN,
+
+        /**
+         * Unicode script "Myanmar".
+         */
+        MYANMAR,
+
+        /**
+         * Unicode script "Georgian".
+         */
+        GEORGIAN,
+
+        /**
+         * Unicode script "Hangul".
+         */
+        HANGUL,
+
+        /**
+         * Unicode script "Ethiopic".
+         */
+        ETHIOPIC,
+
+        /**
+         * Unicode script "Cherokee".
+         */
+        CHEROKEE,
+
+        /**
+         * Unicode script "Canadian_Aboriginal".
+         */
+        CANADIAN_ABORIGINAL,
+
+        /**
+         * Unicode script "Ogham".
+         */
+        OGHAM,
+
+        /**
+         * Unicode script "Runic".
+         */
+        RUNIC,
+
+        /**
+         * Unicode script "Khmer".
+         */
+        KHMER,
+
+        /**
+         * Unicode script "Mongolian".
+         */
+        MONGOLIAN,
+
+        /**
+         * Unicode script "Hiragana".
+         */
+        HIRAGANA,
+
+        /**
+         * Unicode script "Katakana".
+         */
+        KATAKANA,
+
+        /**
+         * Unicode script "Bopomofo".
+         */
+        BOPOMOFO,
+
+        /**
+         * Unicode script "Han".
+         */
+        HAN,
+
+        /**
+         * Unicode script "Yi".
+         */
+        YI,
+
+        /**
+         * Unicode script "Old_Italic".
+         */
+        OLD_ITALIC,
+
+        /**
+         * Unicode script "Gothic".
+         */
+        GOTHIC,
+
+        /**
+         * Unicode script "Deseret".
+         */
+        DESERET,
+
+        /**
+         * Unicode script "Inherited".
+         */
+        INHERITED,
+
+        /**
+         * Unicode script "Tagalog".
+         */
+        TAGALOG,
+
+        /**
+         * Unicode script "Hanunoo".
+         */
+        HANUNOO,
+
+        /**
+         * Unicode script "Buhid".
+         */
+        BUHID,
+
+        /**
+         * Unicode script "Tagbanwa".
+         */
+        TAGBANWA,
+
+        /**
+         * Unicode script "Limbu".
+         */
+        LIMBU,
+
+        /**
+         * Unicode script "Tai_Le".
+         */
+        TAI_LE,
+
+        /**
+         * Unicode script "Linear_B".
+         */
+        LINEAR_B,
+
+        /**
+         * Unicode script "Ugaritic".
+         */
+        UGARITIC,
+
+        /**
+         * Unicode script "Shavian".
+         */
+        SHAVIAN,
+
+        /**
+         * Unicode script "Osmanya".
+         */
+        OSMANYA,
+
+        /**
+         * Unicode script "Cypriot".
+         */
+        CYPRIOT,
+
+        /**
+         * Unicode script "Braille".
+         */
+        BRAILLE,
+
+        /**
+         * Unicode script "Buginese".
+         */
+        BUGINESE,
+
+        /**
+         * Unicode script "Coptic".
+         */
+        COPTIC,
+
+        /**
+         * Unicode script "New_Tai_Lue".
+         */
+        NEW_TAI_LUE,
+
+        /**
+         * Unicode script "Glagolitic".
+         */
+        GLAGOLITIC,
+
+        /**
+         * Unicode script "Tifinagh".
+         */
+        TIFINAGH,
+
+        /**
+         * Unicode script "Syloti_Nagri".
+         */
+        SYLOTI_NAGRI,
+
+        /**
+         * Unicode script "Old_Persian".
+         */
+        OLD_PERSIAN,
+
+        /**
+         * Unicode script "Kharoshthi".
+         */
+        KHAROSHTHI,
+
+        /**
+         * Unicode script "Balinese".
+         */
+        BALINESE,
+
+        /**
+         * Unicode script "Cuneiform".
+         */
+        CUNEIFORM,
+
+        /**
+         * Unicode script "Phoenician".
+         */
+        PHOENICIAN,
+
+        /**
+         * Unicode script "Phags_Pa".
+         */
+        PHAGS_PA,
+
+        /**
+         * Unicode script "Nko".
+         */
+        NKO,
+
+        /**
+         * Unicode script "Sundanese".
+         */
+        SUNDANESE,
+
+        /**
+         * Unicode script "Lepcha".
+         */
+        LEPCHA,
+
+        /**
+         * Unicode script "Ol_Chiki".
+         */
+        OL_CHIKI,
+
+        /**
+         * Unicode script "Vai".
+         */
+        VAI,
+
+        /**
+         * Unicode script "Saurashtra".
+         */
+        SAURASHTRA,
+
+        /**
+         * Unicode script "Kayah_Li".
+         */
+        KAYAH_LI,
+
+        /**
+         * Unicode script "Rejang".
+         */
+        REJANG,
+
+        /**
+         * Unicode script "Lycian".
+         */
+        LYCIAN,
+
+        /**
+         * Unicode script "Carian".
+         */
+        CARIAN,
+
+        /**
+         * Unicode script "Lydian".
+         */
+        LYDIAN,
+
+        /**
+         * Unicode script "Cham".
+         */
+        CHAM,
+
+        /**
+         * Unicode script "Tai_Tham".
+         */
+        TAI_THAM,
+
+        /**
+         * Unicode script "Tai_Viet".
+         */
+        TAI_VIET,
+
+        /**
+         * Unicode script "Avestan".
+         */
+        AVESTAN,
+
+        /**
+         * Unicode script "Egyptian_Hieroglyphs".
+         */
+        EGYPTIAN_HIEROGLYPHS,
+
+        /**
+         * Unicode script "Samaritan".
+         */
+        SAMARITAN,
+
+        /**
+         * Unicode script "Lisu".
+         */
+        LISU,
+
+        /**
+         * Unicode script "Bamum".
+         */
+        BAMUM,
+
+        /**
+         * Unicode script "Javanese".
+         */
+        JAVANESE,
+
+        /**
+         * Unicode script "Meetei_Mayek".
+         */
+        MEETEI_MAYEK,
+
+        /**
+         * Unicode script "Imperial_Aramaic".
+         */
+        IMPERIAL_ARAMAIC,
+
+        /**
+         * Unicode script "Old_South_Arabian".
+         */
+        OLD_SOUTH_ARABIAN,
+
+        /**
+         * Unicode script "Inscriptional_Parthian".
+         */
+        INSCRIPTIONAL_PARTHIAN,
+
+        /**
+         * Unicode script "Inscriptional_Pahlavi".
+         */
+        INSCRIPTIONAL_PAHLAVI,
+
+        /**
+         * Unicode script "Old_Turkic".
+         */
+        OLD_TURKIC,
+
+        /**
+         * Unicode script "Kaithi".
+         */
+        KAITHI,
+
+        /**
+         * Unicode script "Unknown".
+         */
+        UNKNOWN;
+
+        private static final int[] scriptStarts = {
+            0x0000,   // 0000..0040; COMMON
+            0x0041,   // 0041..005A; LATIN
+            0x005B,   // 005B..0060; COMMON
+            0x0061,   // 0061..007A; LATIN
+            0x007B,   // 007B..00A9; COMMON
+            0x00AA,   // 00AA..00AA; LATIN
+            0x00AB,   // 00AB..00B9; COMMON
+            0x00BA,   // 00BA..00BA; LATIN
+            0x00BB,   // 00BB..00BF; COMMON
+            0x00C0,   // 00C0..00D6; LATIN
+            0x00D7,   // 00D7..00D7; COMMON
+            0x00D8,   // 00D8..00F6; LATIN
+            0x00F7,   // 00F7..00F7; COMMON
+            0x00F8,   // 00F8..02B8; LATIN
+            0x02B9,   // 02B9..02DF; COMMON
+            0x02E0,   // 02E0..02E4; LATIN
+            0x02E5,   // 02E5..02FF; COMMON
+            0x0300,   // 0300..036F; INHERITED
+            0x0370,   // 0370..0373; GREEK
+            0x0374,   // 0374..0374; COMMON
+            0x0375,   // 0375..037D; GREEK
+            0x037E,   // 037E..0383; COMMON
+            0x0384,   // 0384..0384; GREEK
+            0x0385,   // 0385..0385; COMMON
+            0x0386,   // 0386..0386; GREEK
+            0x0387,   // 0387..0387; COMMON
+            0x0388,   // 0388..03E1; GREEK
+            0x03E2,   // 03E2..03EF; COPTIC
+            0x03F0,   // 03F0..03FF; GREEK
+            0x0400,   // 0400..0484; CYRILLIC
+            0x0485,   // 0485..0486; INHERITED
+            0x0487,   // 0487..0530; CYRILLIC
+            0x0531,   // 0531..0588; ARMENIAN
+            0x0589,   // 0589..0589; COMMON
+            0x058A,   // 058A..0590; ARMENIAN
+            0x0591,   // 0591..05FF; HEBREW
+            0x0600,   // 0600..0605; COMMON
+            0x0606,   // 0606..060B; ARABIC
+            0x060C,   // 060C..060C; COMMON
+            0x060D,   // 060D..061A; ARABIC
+            0x061B,   // 061B..061D; COMMON
+            0x061E,   // 061E..061E; ARABIC
+            0x061F,   // 061F..0620; COMMON
+            0x0621,   // 0621..063F; ARABIC
+            0x0640,   // 0640..0640; COMMON
+            0x0641,   // 0641..064A; ARABIC
+            0x064B,   // 064B..0655; INHERITED
+            0x0656,   // 0656..065F; ARABIC
+            0x0660,   // 0660..0669; COMMON
+            0x066A,   // 066A..066F; ARABIC
+            0x0670,   // 0670..0670; INHERITED
+            0x0671,   // 0671..06DC; ARABIC
+            0x06DD,   // 06DD..06DD; COMMON
+            0x06DE,   // 06DE..06FF; ARABIC
+            0x0700,   // 0700..074F; SYRIAC
+            0x0750,   // 0750..077F; ARABIC
+            0x0780,   // 0780..07BF; THAANA
+            0x07C0,   // 07C0..07FF; NKO
+            0x0800,   // 0800..08FF; SAMARITAN
+            0x0900,   // 0900..0950; DEVANAGARI
+            0x0951,   // 0951..0952; INHERITED
+            0x0953,   // 0953..0963; DEVANAGARI
+            0x0964,   // 0964..0965; COMMON
+            0x0966,   // 0966..096F; DEVANAGARI
+            0x0970,   // 0970..0970; COMMON
+            0x0971,   // 0971..0980; DEVANAGARI
+            0x0981,   // 0981..0A00; BENGALI
+            0x0A01,   // 0A01..0A80; GURMUKHI
+            0x0A81,   // 0A81..0B00; GUJARATI
+            0x0B01,   // 0B01..0B81; ORIYA
+            0x0B82,   // 0B82..0C00; TAMIL
+            0x0C01,   // 0C01..0C81; TELUGU
+            0x0C82,   // 0C82..0CF0; KANNADA
+            0x0CF1,   // 0CF1..0D01; COMMON
+            0x0D02,   // 0D02..0D81; MALAYALAM
+            0x0D82,   // 0D82..0E00; SINHALA
+            0x0E01,   // 0E01..0E3E; THAI
+            0x0E3F,   // 0E3F..0E3F; COMMON
+            0x0E40,   // 0E40..0E80; THAI
+            0x0E81,   // 0E81..0EFF; LAO
+            0x0F00,   // 0F00..0FD4; TIBETAN
+            0x0FD5,   // 0FD5..0FFF; COMMON
+            0x1000,   // 1000..109F; MYANMAR
+            0x10A0,   // 10A0..10FA; GEORGIAN
+            0x10FB,   // 10FB..10FB; COMMON
+            0x10FC,   // 10FC..10FF; GEORGIAN
+            0x1100,   // 1100..11FF; HANGUL
+            0x1200,   // 1200..139F; ETHIOPIC
+            0x13A0,   // 13A0..13FF; CHEROKEE
+            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
+            0x1680,   // 1680..169F; OGHAM
+            0x16A0,   // 16A0..16EA; RUNIC
+            0x16EB,   // 16EB..16ED; COMMON
+            0x16EE,   // 16EE..16FF; RUNIC
+            0x1700,   // 1700..171F; TAGALOG
+            0x1720,   // 1720..1734; HANUNOO
+            0x1735,   // 1735..173F; COMMON
+            0x1740,   // 1740..175F; BUHID
+            0x1760,   // 1760..177F; TAGBANWA
+            0x1780,   // 1780..17FF; KHMER
+            0x1800,   // 1800..1801; MONGOLIAN
+            0x1802,   // 1802..1803; COMMON
+            0x1804,   // 1804..1804; MONGOLIAN
+            0x1805,   // 1805..1805; COMMON
+            0x1806,   // 1806..18AF; MONGOLIAN
+            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
+            0x1900,   // 1900..194F; LIMBU
+            0x1950,   // 1950..197F; TAI_LE
+            0x1980,   // 1980..19DF; NEW_TAI_LUE
+            0x19E0,   // 19E0..19FF; KHMER
+            0x1A00,   // 1A00..1A1F; BUGINESE
+            0x1A20,   // 1A20..1AFF; TAI_THAM
+            0x1B00,   // 1B00..1B7F; BALINESE
+            0x1B80,   // 1B80..1BFF; SUNDANESE
+            0x1C00,   // 1C00..1C4F; LEPCHA
+            0x1C50,   // 1C50..1CCF; OL_CHIKI
+            0x1CD0,   // 1CD0..1CD2; INHERITED
+            0x1CD3,   // 1CD3..1CD3; COMMON
+            0x1CD4,   // 1CD4..1CE0; INHERITED
+            0x1CE1,   // 1CE1..1CE1; COMMON
+            0x1CE2,   // 1CE2..1CE8; INHERITED
+            0x1CE9,   // 1CE9..1CEC; COMMON
+            0x1CED,   // 1CED..1CED; INHERITED
+            0x1CEE,   // 1CEE..1CFF; COMMON
+            0x1D00,   // 1D00..1D25; LATIN
+            0x1D26,   // 1D26..1D2A; GREEK
+            0x1D2B,   // 1D2B..1D2B; CYRILLIC
+            0x1D2C,   // 1D2C..1D5C; LATIN
+            0x1D5D,   // 1D5D..1D61; GREEK
+            0x1D62,   // 1D62..1D65; LATIN
+            0x1D66,   // 1D66..1D6A; GREEK
+            0x1D6B,   // 1D6B..1D77; LATIN
+            0x1D78,   // 1D78..1D78; CYRILLIC
+            0x1D79,   // 1D79..1DBE; LATIN
+            0x1DBF,   // 1DBF..1DBF; GREEK
+            0x1DC0,   // 1DC0..1DFF; INHERITED
+            0x1E00,   // 1E00..1EFF; LATIN
+            0x1F00,   // 1F00..1FFF; GREEK
+            0x2000,   // 2000..200B; COMMON
+            0x200C,   // 200C..200D; INHERITED
+            0x200E,   // 200E..2070; COMMON
+            0x2071,   // 2071..2073; LATIN
+            0x2074,   // 2074..207E; COMMON
+            0x207F,   // 207F..207F; LATIN
+            0x2080,   // 2080..208F; COMMON
+            0x2090,   // 2090..209F; LATIN
+            0x20A0,   // 20A0..20CF; COMMON
+            0x20D0,   // 20D0..20FF; INHERITED
+            0x2100,   // 2100..2125; COMMON
+            0x2126,   // 2126..2126; GREEK
+            0x2127,   // 2127..2129; COMMON
+            0x212A,   // 212A..212B; LATIN
+            0x212C,   // 212C..2131; COMMON
+            0x2132,   // 2132..2132; LATIN
+            0x2133,   // 2133..214D; COMMON
+            0x214E,   // 214E..214E; LATIN
+            0x214F,   // 214F..215F; COMMON
+            0x2160,   // 2160..2188; LATIN
+            0x2189,   // 2189..27FF; COMMON
+            0x2800,   // 2800..28FF; BRAILLE
+            0x2900,   // 2900..2BFF; COMMON
+            0x2C00,   // 2C00..2C5F; GLAGOLITIC
+            0x2C60,   // 2C60..2C7F; LATIN
+            0x2C80,   // 2C80..2CFF; COPTIC
+            0x2D00,   // 2D00..2D2F; GEORGIAN
+            0x2D30,   // 2D30..2D7F; TIFINAGH
+            0x2D80,   // 2D80..2DDF; ETHIOPIC
+            0x2DE0,   // 2DE0..2DFF; CYRILLIC
+            0x2E00,   // 2E00..2E7F; COMMON
+            0x2E80,   // 2E80..2FEF; HAN
+            0x2FF0,   // 2FF0..3004; COMMON
+            0x3005,   // 3005..3005; HAN
+            0x3006,   // 3006..3006; COMMON
+            0x3007,   // 3007..3007; HAN
+            0x3008,   // 3008..3020; COMMON
+            0x3021,   // 3021..3029; HAN
+            0x302A,   // 302A..302F; INHERITED
+            0x3030,   // 3030..3037; COMMON
+            0x3038,   // 3038..303B; HAN
+            0x303C,   // 303C..3040; COMMON
+            0x3041,   // 3041..3098; HIRAGANA
+            0x3099,   // 3099..309A; INHERITED
+            0x309B,   // 309B..309C; COMMON
+            0x309D,   // 309D..309F; HIRAGANA
+            0x30A0,   // 30A0..30A0; COMMON
+            0x30A1,   // 30A1..30FA; KATAKANA
+            0x30FB,   // 30FB..30FC; COMMON
+            0x30FD,   // 30FD..3104; KATAKANA
+            0x3105,   // 3105..3130; BOPOMOFO
+            0x3131,   // 3131..318F; HANGUL
+            0x3190,   // 3190..319F; COMMON
+            0x31A0,   // 31A0..31BF; BOPOMOFO
+            0x31C0,   // 31C0..31EF; COMMON
+            0x31F0,   // 31F0..31FF; KATAKANA
+            0x3200,   // 3200..321F; HANGUL
+            0x3220,   // 3220..325F; COMMON
+            0x3260,   // 3260..327E; HANGUL
+            0x327F,   // 327F..32CF; COMMON
+            0x32D0,   // 32D0..3357; KATAKANA
+            0x3358,   // 3358..33FF; COMMON
+            0x3400,   // 3400..4DBF; HAN
+            0x4DC0,   // 4DC0..4DFF; COMMON
+            0x4E00,   // 4E00..9FFF; HAN
+            0xA000,   // A000..A4CF; YI
+            0xA4D0,   // A4D0..A4FF; LISU
+            0xA500,   // A500..A63F; VAI
+            0xA640,   // A640..A69F; CYRILLIC
+            0xA6A0,   // A6A0..A6FF; BAMUM
+            0xA700,   // A700..A721; COMMON
+            0xA722,   // A722..A787; LATIN
+            0xA788,   // A788..A78A; COMMON
+            0xA78B,   // A78B..A7FF; LATIN
+            0xA800,   // A800..A82F; SYLOTI_NAGRI
+            0xA830,   // A830..A83F; COMMON
+            0xA840,   // A840..A87F; PHAGS_PA
+            0xA880,   // A880..A8DF; SAURASHTRA
+            0xA8E0,   // A8E0..A8FF; DEVANAGARI
+            0xA900,   // A900..A92F; KAYAH_LI
+            0xA930,   // A930..A95F; REJANG
+            0xA960,   // A960..A97F; HANGUL
+            0xA980,   // A980..A9FF; JAVANESE
+            0xAA00,   // AA00..AA5F; CHAM
+            0xAA60,   // AA60..AA7F; MYANMAR
+            0xAA80,   // AA80..ABBF; TAI_VIET
+            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
+            0xAC00,   // AC00..D7FB; HANGUL
+            0xD7FC,   // D7FC..F8FF; UNKNOWN
+            0xF900,   // F900..FAFF; HAN
+            0xFB00,   // FB00..FB12; LATIN
+            0xFB13,   // FB13..FB1C; ARMENIAN
+            0xFB1D,   // FB1D..FB4F; HEBREW
+            0xFB50,   // FB50..FD3D; ARABIC
+            0xFD3E,   // FD3E..FD4F; COMMON
+            0xFD50,   // FD50..FDFC; ARABIC
+            0xFDFD,   // FDFD..FDFF; COMMON
+            0xFE00,   // FE00..FE0F; INHERITED
+            0xFE10,   // FE10..FE1F; COMMON
+            0xFE20,   // FE20..FE2F; INHERITED
+            0xFE30,   // FE30..FE6F; COMMON
+            0xFE70,   // FE70..FEFE; ARABIC
+            0xFEFF,   // FEFF..FF20; COMMON
+            0xFF21,   // FF21..FF3A; LATIN
+            0xFF3B,   // FF3B..FF40; COMMON
+            0xFF41,   // FF41..FF5A; LATIN
+            0xFF5B,   // FF5B..FF65; COMMON
+            0xFF66,   // FF66..FF6F; KATAKANA
+            0xFF70,   // FF70..FF70; COMMON
+            0xFF71,   // FF71..FF9D; KATAKANA
+            0xFF9E,   // FF9E..FF9F; COMMON
+            0xFFA0,   // FFA0..FFDF; HANGUL
+            0xFFE0,   // FFE0..FFFF; COMMON
+            0x10000,  // 10000..100FF; LINEAR_B
+            0x10100,  // 10100..1013F; COMMON
+            0x10140,  // 10140..1018F; GREEK
+            0x10190,  // 10190..101FC; COMMON
+            0x101FD,  // 101FD..1027F; INHERITED
+            0x10280,  // 10280..1029F; LYCIAN
+            0x102A0,  // 102A0..102FF; CARIAN
+            0x10300,  // 10300..1032F; OLD_ITALIC
+            0x10330,  // 10330..1037F; GOTHIC
+            0x10380,  // 10380..1039F; UGARITIC
+            0x103A0,  // 103A0..103FF; OLD_PERSIAN
+            0x10400,  // 10400..1044F; DESERET
+            0x10450,  // 10450..1047F; SHAVIAN
+            0x10480,  // 10480..107FF; OSMANYA
+            0x10800,  // 10800..1083F; CYPRIOT
+            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
+            0x10900,  // 10900..1091F; PHOENICIAN
+            0x10920,  // 10920..109FF; LYDIAN
+            0x10A00,  // 10A00..10A5F; KHAROSHTHI
+            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
+            0x10B00,  // 10B00..10B3F; AVESTAN
+            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
+            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
+            0x10C00,  // 10C00..10E5F; OLD_TURKIC
+            0x10E60,  // 10E60..1107F; ARABIC
+            0x11080,  // 11080..11FFF; KAITHI
+            0x12000,  // 12000..12FFF; CUNEIFORM
+            0x13000,  // 13000..1CFFF; EGYPTIAN_HIEROGLYPHS
+            0x1D000,  // 1D000..1D166; COMMON
+            0x1D167,  // 1D167..1D169; INHERITED
+            0x1D16A,  // 1D16A..1D17A; COMMON
+            0x1D17B,  // 1D17B..1D182; INHERITED
+            0x1D183,  // 1D183..1D184; COMMON
+            0x1D185,  // 1D185..1D18B; INHERITED
+            0x1D18C,  // 1D18C..1D1A9; COMMON
+            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
+            0x1D1AE,  // 1D1AE..1D1FF; COMMON
+            0x1D200,  // 1D200..1D2FF; GREEK
+            0x1D300,  // 1D300..1F1FF; COMMON
+            0x1F200,  // 1F200..1F20F; HIRAGANA
+            0x1F210,  // 1F210..1FFFF; COMMON
+            0x20000,  // 20000..E0000; HAN
+            0xE0001,  // E0001..E00FF; COMMON
+            0xE0100,  // E0100..E01EF; INHERITED
+            0xE01F0   // E01F0..10FFFF; UNKNOWN
+
+        };
+
+        private static final UnicodeScript[] scripts = {
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            INHERITED,
+            GREEK,
+            COMMON,
+            GREEK,
+            COMMON,
+            GREEK,
+            COMMON,
+            GREEK,
+            COMMON,
+            GREEK,
+            COPTIC,
+            GREEK,
+            CYRILLIC,
+            INHERITED,
+            CYRILLIC,
+            ARMENIAN,
+            COMMON,
+            ARMENIAN,
+            HEBREW,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            INHERITED,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            INHERITED,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            SYRIAC,
+            ARABIC,
+            THAANA,
+            NKO,
+            SAMARITAN,
+            DEVANAGARI,
+            INHERITED,
+            DEVANAGARI,
+            COMMON,
+            DEVANAGARI,
+            COMMON,
+            DEVANAGARI,
+            BENGALI,
+            GURMUKHI,
+            GUJARATI,
+            ORIYA,
+            TAMIL,
+            TELUGU,
+            KANNADA,
+            COMMON,
+            MALAYALAM,
+            SINHALA,
+            THAI,
+            COMMON,
+            THAI,
+            LAO,
+            TIBETAN,
+            COMMON,
+            MYANMAR,
+            GEORGIAN,
+            COMMON,
+            GEORGIAN,
+            HANGUL,
+            ETHIOPIC,
+            CHEROKEE,
+            CANADIAN_ABORIGINAL,
+            OGHAM,
+            RUNIC,
+            COMMON,
+            RUNIC,
+            TAGALOG,
+            HANUNOO,
+            COMMON,
+            BUHID,
+            TAGBANWA,
+            KHMER,
+            MONGOLIAN,
+            COMMON,
+            MONGOLIAN,
+            COMMON,
+            MONGOLIAN,
+            CANADIAN_ABORIGINAL,
+            LIMBU,
+            TAI_LE,
+            NEW_TAI_LUE,
+            KHMER,
+            BUGINESE,
+            TAI_THAM,
+            BALINESE,
+            SUNDANESE,
+            LEPCHA,
+            OL_CHIKI,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            LATIN,
+            GREEK,
+            CYRILLIC,
+            LATIN,
+            GREEK,
+            LATIN,
+            GREEK,
+            LATIN,
+            CYRILLIC,
+            LATIN,
+            GREEK,
+            INHERITED,
+            LATIN,
+            GREEK,
+            COMMON,
+            INHERITED,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            INHERITED,
+            COMMON,
+            GREEK,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            BRAILLE,
+            COMMON,
+            GLAGOLITIC,
+            LATIN,
+            COPTIC,
+            GEORGIAN,
+            TIFINAGH,
+            ETHIOPIC,
+            CYRILLIC,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            INHERITED,
+            COMMON,
+            HAN,
+            COMMON,
+            HIRAGANA,
+            INHERITED,
+            COMMON,
+            HIRAGANA,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            KATAKANA,
+            BOPOMOFO,
+            HANGUL,
+            COMMON,
+            BOPOMOFO,
+            COMMON,
+            KATAKANA,
+            HANGUL,
+            COMMON,
+            HANGUL,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            YI,
+            LISU,
+            VAI,
+            CYRILLIC,
+            BAMUM,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            SYLOTI_NAGRI,
+            COMMON,
+            PHAGS_PA,
+            SAURASHTRA,
+            DEVANAGARI,
+            KAYAH_LI,
+            REJANG,
+            HANGUL,
+            JAVANESE,
+            CHAM,
+            MYANMAR,
+            TAI_VIET,
+            MEETEI_MAYEK,
+            HANGUL,
+            UNKNOWN,
+            HAN,
+            LATIN,
+            ARMENIAN,
+            HEBREW,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            ARABIC,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            HANGUL,
+            COMMON,
+            LINEAR_B,
+            COMMON,
+            GREEK,
+            COMMON,
+            INHERITED,
+            LYCIAN,
+            CARIAN,
+            OLD_ITALIC,
+            GOTHIC,
+            UGARITIC,
+            OLD_PERSIAN,
+            DESERET,
+            SHAVIAN,
+            OSMANYA,
+            CYPRIOT,
+            IMPERIAL_ARAMAIC,
+            PHOENICIAN,
+            LYDIAN,
+            KHAROSHTHI,
+            OLD_SOUTH_ARABIAN,
+            AVESTAN,
+            INSCRIPTIONAL_PARTHIAN,
+            INSCRIPTIONAL_PAHLAVI,
+            OLD_TURKIC,
+            ARABIC,
+            KAITHI,
+            CUNEIFORM,
+            EGYPTIAN_HIEROGLYPHS,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            GREEK,
+            COMMON,
+            HIRAGANA,
+            COMMON,
+            HAN,
+            COMMON,
+            INHERITED,
+            UNKNOWN
+        };
+
+        private static HashMap<String, Character.UnicodeScript> aliases;
+        static {
+            aliases = new HashMap<String, UnicodeScript>();
+            aliases.put("ARAB", ARABIC);
+            aliases.put("ARMI", IMPERIAL_ARAMAIC);
+            aliases.put("ARMN", ARMENIAN);
+            aliases.put("AVST", AVESTAN);
+            aliases.put("BALI", BALINESE);
+            aliases.put("BAMU", BAMUM);
+            aliases.put("BENG", BENGALI);
+            aliases.put("BOPO", BOPOMOFO);
+            aliases.put("BRAI", BRAILLE);
+            aliases.put("BUGI", BUGINESE);
+            aliases.put("BUHD", BUHID);
+            aliases.put("CANS", CANADIAN_ABORIGINAL);
+            aliases.put("CARI", CARIAN);
+            aliases.put("CHAM", CHAM);
+            aliases.put("CHER", CHEROKEE);
+            aliases.put("COPT", COPTIC);
+            aliases.put("CPRT", CYPRIOT);
+            aliases.put("CYRL", CYRILLIC);
+            aliases.put("DEVA", DEVANAGARI);
+            aliases.put("DSRT", DESERET);
+            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
+            aliases.put("ETHI", ETHIOPIC);
+            aliases.put("GEOR", GEORGIAN);
+            aliases.put("GLAG", GLAGOLITIC);
+            aliases.put("GOTH", GOTHIC);
+            aliases.put("GREK", GREEK);
+            aliases.put("GUJR", GUJARATI);
+            aliases.put("GURU", GURMUKHI);
+            aliases.put("HANG", HANGUL);
+            aliases.put("HANI", HAN);
+            aliases.put("HANO", HANUNOO);
+            aliases.put("HEBR", HEBREW);
+            aliases.put("HIRA", HIRAGANA);
+            // it appears we don't have the KATAKANA_OR_HIRAGANA
+            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
+            aliases.put("ITAL", OLD_ITALIC);
+            aliases.put("JAVA", JAVANESE);
+            aliases.put("KALI", KAYAH_LI);
+            aliases.put("KANA", KATAKANA);
+            aliases.put("KHAR", KHAROSHTHI);
+            aliases.put("KHMR", KHMER);
+            aliases.put("KNDA", KANNADA);
+            aliases.put("KTHI", KAITHI);
+            aliases.put("LANA", TAI_THAM);
+            aliases.put("LAOO", LAO);
+            aliases.put("LATN", LATIN);
+            aliases.put("LEPC", LEPCHA);
+            aliases.put("LIMB", LIMBU);
+            aliases.put("LINB", LINEAR_B);
+            aliases.put("LISU", LISU);
+            aliases.put("LYCI", LYCIAN);
+            aliases.put("LYDI", LYDIAN);
+            aliases.put("MLYM", MALAYALAM);
+            aliases.put("MONG", MONGOLIAN);
+            aliases.put("MTEI", MEETEI_MAYEK);
+            aliases.put("MYMR", MYANMAR);
+            aliases.put("NKOO", NKO);
+            aliases.put("OGAM", OGHAM);
+            aliases.put("OLCK", OL_CHIKI);
+            aliases.put("ORKH", OLD_TURKIC);
+            aliases.put("ORYA", ORIYA);
+            aliases.put("OSMA", OSMANYA);
+            aliases.put("PHAG", PHAGS_PA);
+            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
+            aliases.put("PHNX", PHOENICIAN);
+            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
+            aliases.put("RJNG", REJANG);
+            aliases.put("RUNR", RUNIC);
+            aliases.put("SAMR", SAMARITAN);
+            aliases.put("SARB", OLD_SOUTH_ARABIAN);
+            aliases.put("SAUR", SAURASHTRA);
+            aliases.put("SHAW", SHAVIAN);
+            aliases.put("SINH", SINHALA);
+            aliases.put("SUND", SUNDANESE);
+            aliases.put("SYLO", SYLOTI_NAGRI);
+            aliases.put("SYRC", SYRIAC);
+            aliases.put("TAGB", TAGBANWA);
+            aliases.put("TALE", TAI_LE);
+            aliases.put("TALU", NEW_TAI_LUE);
+            aliases.put("TAML", TAMIL);
+            aliases.put("TAVT", TAI_VIET);
+            aliases.put("TELU", TELUGU);
+            aliases.put("TFNG", TIFINAGH);
+            aliases.put("TGLG", TAGALOG);
+            aliases.put("THAA", THAANA);
+            aliases.put("THAI", THAI);
+            aliases.put("TIBT", TIBETAN);
+            aliases.put("UGAR", UGARITIC);
+            aliases.put("VAII", VAI);
+            aliases.put("XPEO", OLD_PERSIAN);
+            aliases.put("XSUX", CUNEIFORM);
+            aliases.put("YIII", YI);
+            aliases.put("ZINH", INHERITED);
+            aliases.put("ZYYY", COMMON);
+            aliases.put("ZZZZ", UNKNOWN);
+        }
+
+        /**
+         * Returns the enum constant representing the Unicode script of which
+         * the given character (Unicode code point) is assigned to.
+         *
+         * @param   codePoint the character (Unicode code point) in question.
+         * @return  The <code>UnicodeScript</code> constant representing the
+         *          Unicode script of which this character is assigned to.
+         *
+         * @exception IllegalArgumentException if the specified
+         * <code>codePoint</code> is an invalid Unicode code point.
+         * @see Character#isValidCodePoint(int)
+         *
+         */
+        public static UnicodeScript of(int codePoint) {
+            if (!isValidCodePoint(codePoint))
+                throw new IllegalArgumentException();
+            int type = getType(codePoint);
+            // leave SURROGATE and PRIVATE_USE for table lookup
+            if (type == UNASSIGNED)
+                return UNKNOWN;
+            int index = Arrays.binarySearch(scriptStarts, codePoint);
+            if (index < 0)
+                index = -index - 2;
+            return scripts[index];
+        }
+
+        /**
+         * Returns the UnicodeScript constant with the given Unicode script
+         * name or the script name alias. Script names and their aliases are
+         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
+         * and PropertyValueAliases&lt;version&gt;.txt define script names
+         * and the script name aliases for a particular version of the
+         * standard. The {@link Character} class specifies the version of
+         * the standard that it supports.
+         * <p>
+         * Character case is ignored for all of the valid script names.
+         * The en_US locale's case mapping rules are used to provide
+         * case-insensitive string comparisons for script name validation.
+         * <p>
+         *
+         * @param scriptName A <code>UnicodeScript</code> name.
+         * @return The <code>UnicodeScript</code> constant identified
+         *         by <code>scriptName</code>
+         * @throws IllegalArgumentException if <code>scriptName</code> is an
+         *         invalid name
+         * @throws NullPointerException if <code>scriptName</code> is null
+         */
+        public static final UnicodeScript forName(String scriptName) {
+            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
+                                 //.replace(' ', '_'));
+            UnicodeScript sc = aliases.get(scriptName);
+            if (sc != null)
+                return sc;
+            return valueOf(scriptName);
+        }
+    }
+
+    /**
      * The value of the <code>Character</code>.
      *
      * @serial
@@ -5042,4 +6278,51 @@
     public static char reverseBytes(char ch) {
         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
     }
+
+    /**
+     * Returns the Unicode name of the specified character
+     * <code>codePoint</code>, or null if the code point is
+     * {@link #UNASSIGNED unassigned}.
+     * <p>
+     * Note: if the specified character is not assigned a name by
+     * the <i>UnicodeData</i> file (part of the Unicode Character
+     * Database maintained by the Unicode Consortium), the returned
+     * name is the same as the result of expression
+     *
+     * <blockquote><code>
+     *     Character.UnicodeBlock.of(codePoint)
+     *                           .toString()
+     *                           .replace('_', ' ')
+     *     + " "
+     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
+     *
+     * </code></blockquote>
+     *
+     * @param  codePoint the character (Unicode code point)
+     *
+     * @return the Unicode name of the specified character, or null if
+     *         the code point is unassigned.
+     *
+     * @exception IllegalArgumentException if the specified
+     *            <code>codePoint</code> is not a valid Unicode
+     *            code point.
+     *
+     * @since 1.7
+     */
+    public static String getName(int codePoint) {
+        if (!isValidCodePoint(codePoint)) {
+            throw new IllegalArgumentException();
+        }
+        String name = CharacterName.get(codePoint);
+        if (name != null)
+            return name;
+        if (getType(codePoint) == UNASSIGNED)
+            return null;
+        UnicodeBlock block = UnicodeBlock.of(codePoint);
+        if (block != null)
+            return block.toString().replace('_', ' ') + " "
+                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
+        // should never come here
+        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
+    }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/java/lang/CharacterName.java	Tue May 18 15:36:47 2010 -0700
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+package java.lang;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.lang.ref.SoftReference;
+import java.util.Arrays;
+import java.util.zip.InflaterInputStream;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+class CharacterName {
+
+    private static SoftReference<byte[]> refStrPool;
+    private static int[][] lookup;
+
+    private static synchronized byte[] initNamePool() {
+        byte[] strPool = null;
+        if (refStrPool != null && (strPool = refStrPool.get()) != null)
+            return strPool;
+        DataInputStream dis = null;
+        try {
+            dis = new DataInputStream(new InflaterInputStream(
+                AccessController.doPrivileged(new PrivilegedAction<InputStream>()
+                {
+                    public InputStream run() {
+                        return getClass().getResourceAsStream("uniName.dat");
+                    }
+                })));
+
+            lookup = new int[(Character.MAX_CODE_POINT + 1) >> 8][];
+            int total = dis.readInt();
+            int cpEnd = dis.readInt();
+            byte ba[] = new byte[cpEnd];
+            dis.readFully(ba);
+
+            int nameOff = 0;
+            int cpOff = 0;
+            int cp = 0;
+            do {
+                int len = ba[cpOff++] & 0xff;
+                if (len == 0) {
+                    len = ba[cpOff++] & 0xff;
+                    // always big-endian
+                    cp = ((ba[cpOff++] & 0xff) << 16) |
+                         ((ba[cpOff++] & 0xff) <<  8) |
+                         ((ba[cpOff++] & 0xff));
+                }  else {
+                    cp++;
+                }
+                int hi = cp >> 8;
+                if (lookup[hi] == null) {
+                    lookup[hi] = new int[0x100];
+                }
+                lookup[hi][cp&0xff] = (nameOff << 8) | len;
+                nameOff += len;
+            } while (cpOff < cpEnd);
+            strPool = new byte[total - cpEnd];
+            dis.readFully(strPool);
+            refStrPool = new SoftReference<byte[]>(strPool);
+        } catch (Exception x) {
+            throw new InternalError(x.getMessage());
+        } finally {
+            try {
+                if (dis != null)
+                    dis.close();
+            } catch (Exception xx) {}
+        }
+        return strPool;
+    }
+
+    public static String get(int cp) {
+        byte[] strPool = null;
+        if (refStrPool == null || (strPool = refStrPool.get()) == null)
+            strPool = initNamePool();
+        int off = 0;
+        if (lookup[cp>>8] == null ||
+            (off = lookup[cp>>8][cp&0xff]) == 0)
+            return null;
+        return new String(strPool, 0, off >>> 8, off & 0xff);  // ASCII
+    }
+}
--- a/jdk/src/share/classes/java/util/regex/Pattern.java	Tue May 18 13:12:46 2010 -0700
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java	Tue May 18 15:36:47 2010 -0700
@@ -29,6 +29,7 @@
 import java.security.PrivilegedAction;
 import java.text.CharacterIterator;
 import java.text.Normalizer;
+import java.util.Locale;
 import java.util.Map;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -200,8 +201,9 @@
  *     <td>Equivalent to java.lang.Character.isMirrored()</td></tr>
  *
  * <tr><th>&nbsp;</th></tr>
- * <tr align="left"><th colspan="2" id="unicode">Classes for Unicode blocks and categories</th></tr>
- *
+ * <tr align="left"><th colspan="2" id="unicode">Classes for Unicode scripts, blocks and categories</th></tr>
+ * * <tr><td valign="top" headers="construct unicode"><tt>\p{IsLatin}</tt></td>
+ *     <td headers="matches">A Latin&nbsp;script character (simple <a href="#ubc">script</a>)</td></tr>
  * <tr><td valign="top" headers="construct unicode"><tt>\p{InGreek}</tt></td>
  *     <td headers="matches">A character in the Greek&nbsp;block (simple <a href="#ubc">block</a>)</td></tr>
  * <tr><td valign="top" headers="construct unicode"><tt>\p{Lu}</tt></td>
@@ -527,25 +529,40 @@
  * while not equal, compile into the same pattern, which matches the character
  * with hexadecimal value <tt>0x2014</tt>.
  *
- * <a name="ubc"> <p>Unicode blocks and categories are written with the
- * <tt>\p</tt> and <tt>\P</tt> constructs as in
- * Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if the input has the
- * property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt> does not match if
- * the input has that property.  Blocks are specified with the prefix
- * <tt>In</tt>, as in <tt>InMongolian</tt>.  Categories may be specified with
- * the optional prefix <tt>Is</tt>: Both <tt>\p{L}</tt> and <tt>\p{IsL}</tt>
- * denote the category of Unicode letters.  Blocks and categories can be used
- * both inside and outside of a character class.
- *
+ * <a name="ubc">
+ * <p>Unicode scripts, blocks and categories are written with the <tt>\p</tt> and
+ * <tt>\P</tt> constructs as in Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if
+ * the input has the property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt>
+ * does not match if the input has that property.
+ * <p>
+ * Scripts are specified either with the prefix {@code Is}, as in
+ * {@code IsHiragana}, or by using  the {@code script} keyword (or its short
+ * form {@code sc})as in {@code script=Hiragana} or {@code sc=Hiragana}.
+ * <p>
+ * Blocks are specified with the prefix {@code In}, as in
+ * {@code InMongolian}, or by using the keyword {@code block} (or its short
+ * form {@code blk}) as in {@code block=Mongolian} or {@code blk=Mongolian}.
+ * <p>
+ * Categories may be specified with the optional prefix {@code Is}:
+ * Both {@code \p{L}} and {@code \p{IsL}} denote the category of Unicode
+ * letters. Same as scripts and blocks, categories can also be specified
+ * by using the keyword {@code general_category} (or its short form
+ * {@code gc}) as in {@code general_category=Lu} or {@code gc=Lu}.
+ * <p>
+ * Scripts, blocks and categories can be used both inside and outside of a
+ * character class.
  * <p> The supported categories are those of
  * <a href="http://www.unicode.org/unicode/standard/standard.html">
  * <i>The Unicode Standard</i></a> in the version specified by the
  * {@link java.lang.Character Character} class. The category names are those
  * defined in the Standard, both normative and informative.
+ * The script names supported by <code>Pattern</code> are the valid script names
+ * accepted and defined by
+ * {@link java.lang.Character.UnicodeScript#forName(String) UnicodeScript.forName}.
  * The block names supported by <code>Pattern</code> are the valid block names
  * accepted and defined by
  * {@link java.lang.Character.UnicodeBlock#forName(String) UnicodeBlock.forName}.
- *
+ * <p>
  * <a name="jcc"> <p>Categories that behave like the java.lang.Character
  * boolean is<i>methodname</i> methods (except for the deprecated ones) are
  * available through the same <tt>\p{</tt><i>prop</i><tt>}</tt> syntax where
@@ -2488,12 +2505,34 @@
             name = new String(temp, i, j-i-1);
         }
 
-        if (name.startsWith("In")) {
-            node = unicodeBlockPropertyFor(name.substring(2));
+        int i = name.indexOf('=');
+        if (i != -1) {
+            // property construct \p{name=value}
+            String value = name.substring(i + 1);
+            name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
+            if ("sc".equals(name) || "script".equals(name)) {
+                node = unicodeScriptPropertyFor(value);
+            } else if ("blk".equals(name) || "block".equals(name)) {
+                node = unicodeBlockPropertyFor(value);
+            } else if ("gc".equals(name) || "general_category".equals(name)) {
+                node = charPropertyNodeFor(value);
+            } else {
+                throw error("Unknown Unicode property {name=<" + name + ">, "
+                             + "value=<" + value + ">}");
+            }
         } else {
-            if (name.startsWith("Is"))
+            if (name.startsWith("In")) {
+                // \p{inBlockName}
+                node = unicodeBlockPropertyFor(name.substring(2));
+            } else if (name.startsWith("Is")) {
+                // \p{isGeneralCategory} and \p{isScriptName}
                 name = name.substring(2);
-            node = charPropertyNodeFor(name);
+                node = CharPropertyNames.charPropertyFor(name);
+                if (node == null)
+                    node = unicodeScriptPropertyFor(name);
+            } else {
+                node = charPropertyNodeFor(name);
+            }
         }
         if (maybeComplement) {
             if (node instanceof Category || node instanceof Block)
@@ -2503,6 +2542,21 @@
         return node;
     }
 
+
+    /**
+     * Returns a CharProperty matching all characters belong to
+     * a UnicodeScript.
+     */
+    private CharProperty unicodeScriptPropertyFor(String name) {
+        final Character.UnicodeScript script;
+        try {
+            script = Character.UnicodeScript.forName(name);
+        } catch (IllegalArgumentException iae) {
+            throw error("Unknown character script name {" + name + "}");
+        }
+        return new Script(script);
+    }
+
     /**
      * Returns a CharProperty matching all characters in a UnicodeBlock.
      */
@@ -3567,6 +3621,19 @@
     }
 
     /**
+     * Node class that matches a Unicode script
+     */
+    static final class Script extends CharProperty {
+        final Character.UnicodeScript script;
+        Script(Character.UnicodeScript script) {
+            this.script = script;
+        }
+        boolean isSatisfiedBy(int ch) {
+            return script == Character.UnicodeScript.of(ch);
+        }
+    }
+
+    /**
      * Node class that matches a Unicode category.
      */
     static final class Category extends CharProperty {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/lang/Character/CheckScript.java	Tue May 18 15:36:47 2010 -0700
@@ -0,0 +1,105 @@
+/**
+ * @test
+ * @bug 6945564
+ * @summary  Check that the j.l.Character.UnicodeScript
+ * @ignore don't run until #6903266 is integrated
+ */
+
+import java.io.*;
+import java.lang.reflect.*;
+import java.util.*;
+import java.util.regex.*;
+import java.lang.Character.UnicodeScript;
+
+public class CheckScript {
+
+    public static void main(String[] args) throws Exception {
+
+        if (args.length != 1) {
+            System.out.println("java CharacterScript script.txt");
+            System.exit(1);
+        }
+        BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
+        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
+        String line = null;
+        HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
+        while ((line = sbfr.readLine()) != null) {
+            if (line.length() <= 1 || line.charAt(0) == '#') {
+                continue;
+            }
+            m.reset(line);
+            if (m.matches()) {
+                int start = Integer.parseInt(m.group(1), 16);
+                int end = (m.group(2)==null)?start
+                                            :Integer.parseInt(m.group(2), 16);
+                String name = m.group(3).toLowerCase(Locale.ENGLISH);
+                ArrayList<Integer> ranges = scripts.get(name);
+                if (ranges == null) {
+                    ranges = new ArrayList<Integer>();
+                    scripts.put(name, ranges);
+                }
+                ranges.add(start);
+                ranges.add(end);
+            }
+        }
+        sbfr.close();
+        // check all defined ranges
+        Integer[] ZEROSIZEARRAY = new Integer[0];
+        for (String name : scripts.keySet()) {
+            System.out.println("Checking " + name + "...");
+            Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY);
+            Character.UnicodeScript expected =
+                Character.UnicodeScript.forName(name);
+
+            int off = 0;
+            while (off < ranges.length) {
+                int start = ranges[off++];
+                int end = ranges[off++];
+                for (int cp = start; cp <= end; cp++) {
+                    Character.UnicodeScript script =
+                        Character.UnicodeScript.of(cp);
+                    if (script != expected) {
+                        throw new RuntimeException(
+                            "UnicodeScript failed: cp=" +
+                            Integer.toHexString(cp) +
+                            ", of(cp)=<" + script + "> but <" +
+                            expected + "> is expected");
+                   }
+                }
+            }
+        }
+        // check all codepoints
+        for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
+            Character.UnicodeScript script = Character.UnicodeScript.of(cp);
+            if (script == Character.UnicodeScript.UNKNOWN) {
+                if (Character.getType(cp) != Character.UNASSIGNED &&
+                    Character.getType(cp) != Character.SURROGATE &&
+                    Character.getType(cp) != Character.PRIVATE_USE)
+                    throw new RuntimeException(
+                        "UnicodeScript failed: cp=" +
+                        Integer.toHexString(cp) +
+                        ", of(cp)=<" + script + "> but UNKNOWN is expected");
+            } else {
+                Integer[] ranges =
+                    scripts.get(script.name().toLowerCase(Locale.ENGLISH))
+                           .toArray(ZEROSIZEARRAY);
+                int off = 0;
+                boolean found = false;
+                while (off < ranges.length) {
+                    int start = ranges[off++];
+                    int end = ranges[off++];
+                    if (cp >= start && cp <= end)
+                        found = true;
+                }
+                if (!found) {
+                    throw new RuntimeException(
+                        "UnicodeScript failed: cp=" +
+                        Integer.toHexString(cp) +
+                        ", of(cp)=<" + script +
+                        "> but NOT in ranges of this script");
+
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/lang/Character/Scripts.txt	Tue May 18 15:36:47 2010 -0700
@@ -0,0 +1,1972 @@
+# Scripts-5.2.0.txt
+# Date: 2009-08-22, 04:58:43 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2009 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+
+# ================================================
+
+# Property:	Script
+
+#  All code points not explicitly listed for Script
+#  have the value Unknown (Zzzz).
+
+# @missing: 0000..10FFFF; Unknown
+
+# ================================================
+
+0000..001F    ; Common # Cc  [32] <control-0000>..<control-001F>
+0020          ; Common # Zs       SPACE
+0021..0023    ; Common # Po   [3] EXCLAMATION MARK..NUMBER SIGN
+0024          ; Common # Sc       DOLLAR SIGN
+0025..0027    ; Common # Po   [3] PERCENT SIGN..APOSTROPHE
+0028          ; Common # Ps       LEFT PARENTHESIS
+0029          ; Common # Pe       RIGHT PARENTHESIS
+002A          ; Common # Po       ASTERISK
+002B          ; Common # Sm       PLUS SIGN
+002C          ; Common # Po       COMMA
+002D          ; Common # Pd       HYPHEN-MINUS
+002E..002F    ; Common # Po   [2] FULL STOP..SOLIDUS
+0030..0039    ; Common # Nd  [10] DIGIT ZERO..DIGIT NINE
+003A..003B    ; Common # Po   [2] COLON..SEMICOLON
+003C..003E    ; Common # Sm   [3] LESS-THAN SIGN..GREATER-THAN SIGN
+003F..0040    ; Common # Po   [2] QUESTION MARK..COMMERCIAL AT
+005B          ; Common # Ps       LEFT SQUARE BRACKET
+005C          ; Common # Po       REVERSE SOLIDUS
+005D          ; Common # Pe       RIGHT SQUARE BRACKET
+005E          ; Common # Sk       CIRCUMFLEX ACCENT
+005F          ; Common # Pc       LOW LINE
+0060          ; Common # Sk       GRAVE ACCENT
+007B          ; Common # Ps       LEFT CURLY BRACKET
+007C          ; Common # Sm       VERTICAL LINE
+007D          ; Common # Pe       RIGHT CURLY BRACKET
+007E          ; Common # Sm       TILDE
+007F..009F    ; Common # Cc  [33] <control-007F>..<control-009F>
+00A0          ; Common # Zs       NO-BREAK SPACE
+00A1          ; Common # Po       INVERTED EXCLAMATION MARK
+00A2..00A5    ; Common # Sc   [4] CENT SIGN..YEN SIGN
+00A6..00A7    ; Common # So   [2] BROKEN BAR..SECTION SIGN
+00A8          ; Common # Sk       DIAERESIS
+00A9          ; Common # So       COPYRIGHT SIGN
+00AB          ; Common # Pi       LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00AC          ; Common # Sm       NOT SIGN
+00AD          ; Common # Cf       SOFT HYPHEN
+00AE          ; Common # So       REGISTERED SIGN
+00AF          ; Common # Sk       MACRON
+00B0          ; Common # So       DEGREE SIGN
+00B1          ; Common # Sm       PLUS-MINUS SIGN
+00B2..00B3    ; Common # No   [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
+00B4          ; Common # Sk       ACUTE ACCENT
+00B5          ; Common # L&       MICRO SIGN
+00B6          ; Common # So       PILCROW SIGN
+00B7          ; Common # Po       MIDDLE DOT
+00B8          ; Common # Sk       CEDILLA
+00B9          ; Common # No       SUPERSCRIPT ONE
+00BB          ; Common # Pf       RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BC..00BE    ; Common # No   [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
+00BF          ; Common # Po       INVERTED QUESTION MARK
+00D7          ; Common # Sm       MULTIPLICATION SIGN
+00F7          ; Common # Sm       DIVISION SIGN
+02B9..02C1    ; Common # Lm   [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
+02C2..02C5    ; Common # Sk   [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
+02C6..02D1    ; Common # Lm  [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
+02D2..02DF    ; Common # Sk  [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
+02E5..02EB    ; Common # Sk   [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
+02EC          ; Common # Lm       MODIFIER LETTER VOICING
+02ED          ; Common # Sk       MODIFIER LETTER UNASPIRATED
+02EE          ; Common # Lm       MODIFIER LETTER DOUBLE APOSTROPHE
+02EF..02FF    ; Common # Sk  [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
+0374          ; Common # Lm       GREEK NUMERAL SIGN
+037E          ; Common # Po       GREEK QUESTION MARK
+0385          ; Common # Sk       GREEK DIALYTIKA TONOS
+0387          ; Common # Po       GREEK ANO TELEIA
+0589          ; Common # Po       ARMENIAN FULL STOP
+0600..0603    ; Common # Cf   [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
+060C          ; Common # Po       ARABIC COMMA
+061B          ; Common # Po       ARABIC SEMICOLON
+061F          ; Common # Po       ARABIC QUESTION MARK
+0640          ; Common # Lm       ARABIC TATWEEL
+0660..0669    ; Common # Nd  [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+06DD          ; Common # Cf       ARABIC END OF AYAH
+0964..0965    ; Common # Po   [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+0970          ; Common # Po       DEVANAGARI ABBREVIATION SIGN
+0CF1..0CF2    ; Common # So   [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
+0E3F          ; Common # Sc       THAI CURRENCY SYMBOL BAHT
+0FD5..0FD8    ; Common # So   [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
+10FB          ; Common # Po       GEORGIAN PARAGRAPH SEPARATOR
+16EB..16ED    ; Common # Po   [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
+1735..1736    ; Common # Po   [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+1802..1803    ; Common # Po   [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
+1805          ; Common # Po       MONGOLIAN FOUR DOTS
+1CD3          ; Common # Po       VEDIC SIGN NIHSHVASA
+1CE1          ; Common # Mc       VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+1CE9..1CEC    ; Common # Lo   [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
+1CEE..1CF1    ; Common # Lo   [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
+1CF2          ; Common # Mc       VEDIC SIGN ARDHAVISARGA
+2000..200A    ; Common # Zs  [11] EN QUAD..HAIR SPACE
+200B          ; Common # Cf       ZERO WIDTH SPACE
+200E..200F    ; Common # Cf   [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
+2010..2015    ; Common # Pd   [6] HYPHEN..HORIZONTAL BAR
+2016..2017    ; Common # Po   [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
+2018          ; Common # Pi       LEFT SINGLE QUOTATION MARK
+2019          ; Common # Pf       RIGHT SINGLE QUOTATION MARK
+201A          ; Common # Ps       SINGLE LOW-9 QUOTATION MARK
+201B..201C    ; Common # Pi   [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
+201D          ; Common # Pf       RIGHT DOUBLE QUOTATION MARK
+201E          ; Common # Ps       DOUBLE LOW-9 QUOTATION MARK
+201F          ; Common # Pi       DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+2020..2027    ; Common # Po   [8] DAGGER..HYPHENATION POINT
+2028          ; Common # Zl       LINE SEPARATOR
+2029          ; Common # Zp       PARAGRAPH SEPARATOR
+202A..202E    ; Common # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+202F          ; Common # Zs       NARROW NO-BREAK SPACE
+2030..2038    ; Common # Po   [9] PER MILLE SIGN..CARET
+2039          ; Common # Pi       SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A          ; Common # Pf       SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+203B..203E    ; Common # Po   [4] REFERENCE MARK..OVERLINE
+203F..2040    ; Common # Pc   [2] UNDERTIE..CHARACTER TIE
+2041..2043    ; Common # Po   [3] CARET INSERTION POINT..HYPHEN BULLET
+2044          ; Common # Sm       FRACTION SLASH
+2045          ; Common # Ps       LEFT SQUARE BRACKET WITH QUILL
+2046          ; Common # Pe       RIGHT SQUARE BRACKET WITH QUILL
+2047..2051    ; Common # Po  [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
+2052          ; Common # Sm       COMMERCIAL MINUS SIGN
+2053          ; Common # Po       SWUNG DASH
+2054          ; Common # Pc       INVERTED UNDERTIE
+2055..205E    ; Common # Po  [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
+205F          ; Common # Zs       MEDIUM MATHEMATICAL SPACE
+2060..2064    ; Common # Cf   [5] WORD JOINER..INVISIBLE PLUS
+206A..206F    ; Common # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2070          ; Common # No       SUPERSCRIPT ZERO
+2074..2079    ; Common # No   [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
+207A..207C    ; Common # Sm   [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
+207D          ; Common # Ps       SUPERSCRIPT LEFT PARENTHESIS
+207E          ; Common # Pe       SUPERSCRIPT RIGHT PARENTHESIS
+2080..2089    ; Common # No  [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
+208A..208C    ; Common # Sm   [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
+208D          ; Common # Ps       SUBSCRIPT LEFT PARENTHESIS
+208E          ; Common # Pe       SUBSCRIPT RIGHT PARENTHESIS
+20A0..20B8    ; Common # Sc  [25] EURO-CURRENCY SIGN..TENGE SIGN
+2100..2101    ; Common # So   [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
+2102          ; Common # L&       DOUBLE-STRUCK CAPITAL C
+2103..2106    ; Common # So   [4] DEGREE CELSIUS..CADA UNA
+2107          ; Common # L&       EULER CONSTANT
+2108..2109    ; Common # So   [2] SCRUPLE..DEGREE FAHRENHEIT
+210A..2113    ; Common # L&  [10] SCRIPT SMALL G..SCRIPT SMALL L
+2114          ; Common # So       L B BAR SYMBOL
+2115          ; Common # L&       DOUBLE-STRUCK CAPITAL N
+2116..2118    ; Common # So   [3] NUMERO SIGN..SCRIPT CAPITAL P
+2119..211D    ; Common # L&   [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
+211E..2123    ; Common # So   [6] PRESCRIPTION TAKE..VERSICLE
+2124          ; Common # L&       DOUBLE-STRUCK CAPITAL Z
+2125          ; Common # So       OUNCE SIGN
+2127          ; Common # So       INVERTED OHM SIGN
+2128          ; Common # L&       BLACK-LETTER CAPITAL Z
+2129          ; Common # So       TURNED GREEK SMALL LETTER IOTA
+212C..212D    ; Common # L&   [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
+212E          ; Common # So       ESTIMATED SYMBOL
+212F..2131    ; Common # L&   [3] SCRIPT SMALL E..SCRIPT CAPITAL F
+2133..2134    ; Common # L&   [2] SCRIPT CAPITAL M..SCRIPT SMALL O
+2135..2138    ; Common # Lo   [4] ALEF SYMBOL..DALET SYMBOL
+2139          ; Common # L&       INFORMATION SOURCE
+213A..213B    ; Common # So   [2] ROTATED CAPITAL Q..FACSIMILE SIGN
+213C..213F    ; Common # L&   [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
+2140..2144    ; Common # Sm   [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
+2145..2149    ; Common # L&   [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
+214A          ; Common # So       PROPERTY LINE
+214B          ; Common # Sm       TURNED AMPERSAND
+214C..214D    ; Common # So   [2] PER SIGN..AKTIESELSKAB
+214F          ; Common # So       SYMBOL FOR SAMARITAN SOURCE
+2150..215F    ; Common # No  [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
+2189          ; Common # No       VULGAR FRACTION ZERO THIRDS
+2190..2194    ; Common # Sm   [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
+2195..2199    ; Common # So   [5] UP DOWN ARROW..SOUTH WEST ARROW
+219A..219B    ; Common # Sm   [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
+219C..219F    ; Common # So   [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
+21A0          ; Common # Sm       RIGHTWARDS TWO HEADED ARROW
+21A1..21A2    ; Common # So   [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
+21A3          ; Common # Sm       RIGHTWARDS ARROW WITH TAIL
+21A4..21A5    ; Common # So   [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
+21A6          ; Common # Sm       RIGHTWARDS ARROW FROM BAR
+21A7..21AD    ; Common # So   [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
+21AE          ; Common # Sm       LEFT RIGHT ARROW WITH STROKE
+21AF..21CD    ; Common # So  [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
+21CE..21CF    ; Common # Sm   [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
+21D0..21D1    ; Common # So   [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
+21D2          ; Common # Sm       RIGHTWARDS DOUBLE ARROW
+21D3          ; Common # So       DOWNWARDS DOUBLE ARROW
+21D4          ; Common # Sm       LEFT RIGHT DOUBLE ARROW
+21D5..21F3    ; Common # So  [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
+21F4..22FF    ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
+2300..2307    ; Common # So   [8] DIAMETER SIGN..WAVY LINE
+2308..230B    ; Common # Sm   [4] LEFT CEILING..RIGHT FLOOR
+230C..231F    ; Common # So  [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
+2320..2321    ; Common # Sm   [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
+2322..2328    ; Common # So   [7] FROWN..KEYBOARD
+2329          ; Common # Ps       LEFT-POINTING ANGLE BRACKET
+232A          ; Common # Pe       RIGHT-POINTING ANGLE BRACKET
+232B..237B    ; Common # So  [81] ERASE TO THE LEFT..NOT CHECK MARK
+237C          ; Common # Sm       RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
+237D..239A    ; Common # So  [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
+239B..23B3    ; Common # Sm  [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
+23B4..23DB    ; Common # So  [40] TOP SQUARE BRACKET..FUSE
+23DC..23E1    ; Common # Sm   [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
+23E2..23E8    ; Common # So   [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
+2400..2426    ; Common # So  [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+2440..244A    ; Common # So  [11] OCR HOOK..OCR DOUBLE BACKSLASH
+2460..249B    ; Common # No  [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
+249C..24E9    ; Common # So  [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
+24EA..24FF    ; Common # No  [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
+2500..25B6    ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
+25B7          ; Common # Sm       WHITE RIGHT-POINTING TRIANGLE
+25B8..25C0    ; Common # So   [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
+25C1          ; Common # Sm       WHITE LEFT-POINTING TRIANGLE
+25C2..25F7    ; Common # So  [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
+25F8..25FF    ; Common # Sm   [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
+2600..266E    ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
+266F          ; Common # Sm       MUSIC SHARP SIGN
+2670..26CD    ; Common # So  [94] WEST SYRIAC CROSS..DISABLED CAR
+26CF..26E1    ; Common # So  [19] PICK..RESTRICTED LEFT ENTRY-2
+26E3          ; Common # So       HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
+26E8..26FF    ; Common # So  [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
+2701..2704    ; Common # So   [4] UPPER BLADE SCISSORS..WHITE SCISSORS
+2706..2709    ; Common # So   [4] TELEPHONE LOCATION SIGN..ENVELOPE
+270C..2727    ; Common # So  [28] VICTORY HAND..WHITE FOUR POINTED STAR
+2729..274B    ; Common # So  [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
+274D          ; Common # So       SHADOWED WHITE CIRCLE
+274F..2752    ; Common # So   [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
+2756..275E    ; Common # So   [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
+2761..2767    ; Common # So   [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
+2768          ; Common # Ps       MEDIUM LEFT PARENTHESIS ORNAMENT
+2769          ; Common # Pe       MEDIUM RIGHT PARENTHESIS ORNAMENT
+276A          ; Common # Ps       MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+276B          ; Common # Pe       MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+276C          ; Common # Ps       MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+276D          ; Common # Pe       MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+276E          ; Common # Ps       HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+276F          ; Common # Pe       HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2770          ; Common # Ps       HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+2771          ; Common # Pe       HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+2772          ; Common # Ps       LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773          ; Common # Pe       LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+2774          ; Common # Ps       MEDIUM LEFT CURLY BRACKET ORNAMENT
+2775          ; Common # Pe       MEDIUM RIGHT CURLY BRACKET ORNAMENT
+2776..2793    ; Common # No  [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
+2794          ; Common # So       HEAVY WIDE-HEADED RIGHTWARDS ARROW
+2798..27AF    ; Common # So  [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
+27B1..27BE    ; Common # So  [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
+27C0..27C4    ; Common # Sm   [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
+27C5          ; Common # Ps       LEFT S-SHAPED BAG DELIMITER
+27C6          ; Common # Pe       RIGHT S-SHAPED BAG DELIMITER
+27C7..27CA    ; Common # Sm   [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
+27CC          ; Common # Sm       LONG DIVISION
+27D0..27E5    ; Common # Sm  [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
+27E6          ; Common # Ps       MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E7          ; Common # Pe       MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E8          ; Common # Ps       MATHEMATICAL LEFT ANGLE BRACKET
+27E9          ; Common # Pe       MATHEMATICAL RIGHT ANGLE BRACKET
+27EA          ; Common # Ps       MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EB          ; Common # Pe       MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27EC          ; Common # Ps       MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27ED          ; Common # Pe       MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EE          ; Common # Ps       MATHEMATICAL LEFT FLATTENED PARENTHESIS
+27EF          ; Common # Pe       MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+27F0..27FF    ; Common # Sm  [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
+2900..2982    ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
+2983          ; Common # Ps       LEFT WHITE CURLY BRACKET
+2984          ; Common # Pe       RIGHT WHITE CURLY BRACKET
+2985          ; Common # Ps       LEFT WHITE PARENTHESIS
+2986          ; Common # Pe       RIGHT WHITE PARENTHESIS
+2987          ; Common # Ps       Z NOTATION LEFT IMAGE BRACKET
+2988          ; Common # Pe       Z NOTATION RIGHT IMAGE BRACKET
+2989          ; Common # Ps       Z NOTATION LEFT BINDING BRACKET
+298A          ; Common # Pe       Z NOTATION RIGHT BINDING BRACKET
+298B          ; Common # Ps       LEFT SQUARE BRACKET WITH UNDERBAR
+298C          ; Common # Pe       RIGHT SQUARE BRACKET WITH UNDERBAR
+298D          ; Common # Ps       LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298E          ; Common # Pe       RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+298F          ; Common # Ps       LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990          ; Common # Pe       RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2991          ; Common # Ps       LEFT ANGLE BRACKET WITH DOT
+2992          ; Common # Pe       RIGHT ANGLE BRACKET WITH DOT
+2993          ; Common # Ps       LEFT ARC LESS-THAN BRACKET
+2994          ; Common # Pe       RIGHT ARC GREATER-THAN BRACKET
+2995          ; Common # Ps       DOUBLE LEFT ARC GREATER-THAN BRACKET
+2996          ; Common # Pe       DOUBLE RIGHT ARC LESS-THAN BRACKET
+2997          ; Common # Ps       LEFT BLACK TORTOISE SHELL BRACKET
+2998          ; Common # Pe       RIGHT BLACK TORTOISE SHELL BRACKET
+2999..29D7    ; Common # Sm  [63] DOTTED FENCE..BLACK HOURGLASS
+29D8          ; Common # Ps       LEFT WIGGLY FENCE
+29D9          ; Common # Pe       RIGHT WIGGLY FENCE
+29DA          ; Common # Ps       LEFT DOUBLE WIGGLY FENCE
+29DB          ; Common # Pe       RIGHT DOUBLE WIGGLY FENCE
+29DC..29FB    ; Common # Sm  [32] INCOMPLETE INFINITY..TRIPLE PLUS
+29FC          ; Common # Ps       LEFT-POINTING CURVED ANGLE BRACKET
+29FD          ; Common # Pe       RIGHT-POINTING CURVED ANGLE BRACKET
+29FE..2AFF    ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
+2B00..2B2F    ; Common # So  [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
+2B30..2B44    ; Common # Sm  [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
+2B45..2B46    ; Common # So   [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
+2B47..2B4C    ; Common # Sm   [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
+2B50..2B59    ; Common # So  [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
+2E00..2E01    ; Common # Po   [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
+2E02          ; Common # Pi       LEFT SUBSTITUTION BRACKET
+2E03          ; Common # Pf       RIGHT SUBSTITUTION BRACKET
+2E04          ; Common # Pi       LEFT DOTTED SUBSTITUTION BRACKET
+2E05          ; Common # Pf       RIGHT DOTTED SUBSTITUTION BRACKET
+2E06..2E08    ; Common # Po   [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
+2E09          ; Common # Pi       LEFT TRANSPOSITION BRACKET
+2E0A          ; Common # Pf       RIGHT TRANSPOSITION BRACKET
+2E0B          ; Common # Po       RAISED SQUARE
+2E0C          ; Common # Pi       LEFT RAISED OMISSION BRACKET
+2E0D          ; Common # Pf       RIGHT RAISED OMISSION BRACKET
+2E0E..2E16    ; Common # Po   [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
+2E17          ; Common # Pd       DOUBLE OBLIQUE HYPHEN
+2E18..2E19    ; Common # Po   [2] INVERTED INTERROBANG..PALM BRANCH
+2E1A          ; Common # Pd       HYPHEN WITH DIAERESIS
+2E1B          ; Common # Po       TILDE WITH RING ABOVE
+2E1C          ; Common # Pi       LEFT LOW PARAPHRASE BRACKET
+2E1D          ; Common # Pf       RIGHT LOW PARAPHRASE BRACKET
+2E1E..2E1F    ; Common # Po   [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
+2E20          ; Common # Pi       LEFT VERTICAL BAR WITH QUILL
+2E21          ; Common # Pf       RIGHT VERTICAL BAR WITH QUILL
+2E22          ; Common # Ps       TOP LEFT HALF BRACKET
+2E23          ; Common # Pe       TOP RIGHT HALF BRACKET
+2E24          ; Common # Ps       BOTTOM LEFT HALF BRACKET
+2E25          ; Common # Pe       BOTTOM RIGHT HALF BRACKET
+2E26          ; Common # Ps       LEFT SIDEWAYS U BRACKET
+2E27          ; Common # Pe       RIGHT SIDEWAYS U BRACKET
+2E28          ; Common # Ps       LEFT DOUBLE PARENTHESIS
+2E29          ; Common # Pe       RIGHT DOUBLE PARENTHESIS
+2E2A..2E2E    ; Common # Po   [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
+2E2F          ; Common # Lm       VERTICAL TILDE
+2E30..2E31    ; Common # Po   [2] RING POINT..WORD SEPARATOR MIDDLE DOT
+2FF0..2FFB    ; Common # So  [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+3000          ; Common # Zs       IDEOGRAPHIC SPACE
+3001..3003    ; Common # Po   [3] IDEOGRAPHIC COMMA..DITTO MARK
+3004          ; Common # So       JAPANESE INDUSTRIAL STANDARD SYMBOL
+3006          ; Common # Lo       IDEOGRAPHIC CLOSING MARK
+3008          ; Common # Ps       LEFT ANGLE BRACKET
+3009          ; Common # Pe       RIGHT ANGLE BRACKET
+300A          ; Common # Ps       LEFT DOUBLE ANGLE BRACKET
+300B          ; Common # Pe       RIGHT DOUBLE ANGLE BRACKET
+300C          ; Common # Ps       LEFT CORNER BRACKET
+300D          ; Common # Pe       RIGHT CORNER BRACKET
+300E          ; Common # Ps       LEFT WHITE CORNER BRACKET
+300F          ; Common # Pe       RIGHT WHITE CORNER BRACKET
+3010          ; Common # Ps       LEFT BLACK LENTICULAR BRACKET
+3011          ; Common # Pe       RIGHT BLACK LENTICULAR BRACKET
+3012..3013    ; Common # So   [2] POSTAL MARK..GETA MARK
+3014          ; Common # Ps       LEFT TORTOISE SHELL BRACKET
+3015          ; Common # Pe       RIGHT TORTOISE SHELL BRACKET
+3016          ; Common # Ps       LEFT WHITE LENTICULAR BRACKET
+3017          ; Common # Pe       RIGHT WHITE LENTICULAR BRACKET
+3018          ; Common # Ps       LEFT WHITE TORTOISE SHELL BRACKET
+3019          ; Common # Pe       RIGHT WHITE TORTOISE SHELL BRACKET
+301A          ; Common # Ps       LEFT WHITE SQUARE BRACKET
+301B          ; Common # Pe       RIGHT WHITE SQUARE BRACKET
+301C          ; Common # Pd       WAVE DASH
+301D          ; Common # Ps       REVERSED DOUBLE PRIME QUOTATION MARK
+301E..301F    ; Common # Pe   [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+3020          ; Common # So       POSTAL MARK FACE
+3030          ; Common # Pd       WAVY DASH
+3031..3035    ; Common # Lm   [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+3036..3037    ; Common # So   [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
+303C          ; Common # Lo       MASU MARK
+303D          ; Common # Po       PART ALTERNATION MARK
+303E..303F    ; Common # So   [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
+309B..309C    ; Common # Sk   [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+30A0          ; Common # Pd       KATAKANA-HIRAGANA DOUBLE HYPHEN
+30FB          ; Common # Po       KATAKANA MIDDLE DOT
+30FC          ; Common # Lm       KATAKANA-HIRAGANA PROLONGED SOUND MARK
+3190..3191    ; Common # So   [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
+3192..3195    ; Common # No   [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+3196..319F    ; Common # So  [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+31C0..31E3    ; Common # So  [36] CJK STROKE T..CJK STROKE Q
+3220..3229    ; Common # No  [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+322A..3250    ; Common # So  [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN
+3251..325F    ; Common # No  [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
+327F          ; Common # So       KOREAN STANDARD SYMBOL
+3280..3289    ; Common # No  [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+328A..32B0    ; Common # So  [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+32B1..32BF    ; Common # No  [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
+32C0..32CF    ; Common # So  [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN
+3358..33FF    ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL
+4DC0..4DFF    ; Common # So  [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
+A700..A716    ; Common # Sk  [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
+A717..A71F    ; Common # Lm   [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+A720..A721    ; Common # Sk   [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
+A788          ; Common # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A789..A78A    ; Common # Sk   [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
+A830..A835    ; Common # No   [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
+A836..A837    ; Common # So   [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
+A838          ; Common # Sc       NORTH INDIC RUPEE MARK
+A839          ; Common # So       NORTH INDIC QUANTITY MARK
+FD3E          ; Common # Ps       ORNATE LEFT PARENTHESIS
+FD3F          ; Common # Pe       ORNATE RIGHT PARENTHESIS
+FDFD          ; Common # So       ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+FE10..FE16    ; Common # Po   [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
+FE17          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
+FE18          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
+FE19          ; Common # Po       PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+FE30          ; Common # Po       PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
+FE31..FE32    ; Common # Pd   [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
+FE33..FE34    ; Common # Pc   [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
+FE35          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+FE36          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+FE37          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+FE38          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+FE39          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
+FE3A          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
+FE3B          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
+FE3C          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
+FE3D          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
+FE3E          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
+FE3F          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
+FE40          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
+FE41          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
+FE42          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
+FE43          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
+FE44          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
+FE45..FE46    ; Common # Po   [2] SESAME DOT..WHITE SESAME DOT
+FE47          ; Common # Ps       PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
+FE48          ; Common # Pe       PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
+FE49..FE4C    ; Common # Po   [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
+FE4D..FE4F    ; Common # Pc   [3] DASHED LOW LINE..WAVY LOW LINE
+FE50..FE52    ; Common # Po   [3] SMALL COMMA..SMALL FULL STOP
+FE54..FE57    ; Common # Po   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
+FE58          ; Common # Pd       SMALL EM DASH
+FE59          ; Common # Ps       SMALL LEFT PARENTHESIS
+FE5A          ; Common # Pe       SMALL RIGHT PARENTHESIS
+FE5B          ; Common # Ps       SMALL LEFT CURLY BRACKET
+FE5C          ; Common # Pe       SMALL RIGHT CURLY BRACKET
+FE5D          ; Common # Ps       SMALL LEFT TORTOISE SHELL BRACKET
+FE5E          ; Common # Pe       SMALL RIGHT TORTOISE SHELL BRACKET
+FE5F..FE61    ; Common # Po   [3] SMALL NUMBER SIGN..SMALL ASTERISK
+FE62          ; Common # Sm       SMALL PLUS SIGN
+FE63          ; Common # Pd       SMALL HYPHEN-MINUS
+FE64..FE66    ; Common # Sm   [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
+FE68          ; Common # Po       SMALL REVERSE SOLIDUS
+FE69          ; Common # Sc       SMALL DOLLAR SIGN
+FE6A..FE6B    ; Common # Po   [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
+FEFF          ; Common # Cf       ZERO WIDTH NO-BREAK SPACE
+FF01..FF03    ; Common # Po   [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
+FF04          ; Common # Sc       FULLWIDTH DOLLAR SIGN
+FF05..FF07    ; Common # Po   [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
+FF08          ; Common # Ps       FULLWIDTH LEFT PARENTHESIS
+FF09          ; Common # Pe       FULLWIDTH RIGHT PARENTHESIS
+FF0A          ; Common # Po       FULLWIDTH ASTERISK
+FF0B          ; Common # Sm       FULLWIDTH PLUS SIGN
+FF0C          ; Common # Po       FULLWIDTH COMMA
+FF0D          ; Common # Pd       FULLWIDTH HYPHEN-MINUS
+FF0E..FF0F    ; Common # Po   [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
+FF10..FF19    ; Common # Nd  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
+FF1A..FF1B    ; Common # Po   [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
+FF1C..FF1E    ; Common # Sm   [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
+FF1F..FF20    ; Common # Po   [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
+FF3B          ; Common # Ps       FULLWIDTH LEFT SQUARE BRACKET
+FF3C          ; Common # Po       FULLWIDTH REVERSE SOLIDUS
+FF3D          ; Common # Pe       FULLWIDTH RIGHT SQUARE BRACKET
+FF3E          ; Common # Sk       FULLWIDTH CIRCUMFLEX ACCENT
+FF3F          ; Common # Pc       FULLWIDTH LOW LINE
+FF40          ; Common # Sk       FULLWIDTH GRAVE ACCENT
+FF5B          ; Common # Ps       FULLWIDTH LEFT CURLY BRACKET
+FF5C          ; Common # Sm       FULLWIDTH VERTICAL LINE
+FF5D          ; Common # Pe       FULLWIDTH RIGHT CURLY BRACKET
+FF5E          ; Common # Sm       FULLWIDTH TILDE
+FF5F          ; Common # Ps       FULLWIDTH LEFT WHITE PARENTHESIS
+FF60          ; Common # Pe       FULLWIDTH RIGHT WHITE PARENTHESIS
+FF61          ; Common # Po       HALFWIDTH IDEOGRAPHIC FULL STOP
+FF62          ; Common # Ps       HALFWIDTH LEFT CORNER BRACKET
+FF63          ; Common # Pe       HALFWIDTH RIGHT CORNER BRACKET
+FF64..FF65    ; Common # Po   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+FF70          ; Common # Lm       HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF9E..FF9F    ; Common # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+FFE0..FFE1    ; Common # Sc   [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
+FFE2          ; Common # Sm       FULLWIDTH NOT SIGN
+FFE3          ; Common # Sk       FULLWIDTH MACRON
+FFE4          ; Common # So       FULLWIDTH BROKEN BAR
+FFE5..FFE6    ; Common # Sc   [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
+FFE8          ; Common # So       HALFWIDTH FORMS LIGHT VERTICAL
+FFE9..FFEC    ; Common # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+FFED..FFEE    ; Common # So   [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
+FFF9..FFFB    ; Common # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
+FFFC..FFFD    ; Common # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
+10100..10101  ; Common # Po   [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
+10102         ; Common # So       AEGEAN CHECK MARK
+10107..10133  ; Common # No  [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+10137..1013F  ; Common # So   [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
+10190..1019B  ; Common # So  [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
+101D0..101FC  ; Common # So  [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
+1D000..1D0F5  ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
+1D100..1D126  ; Common # So  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
+1D129..1D164  ; Common # So  [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+1D165..1D166  ; Common # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
+1D16A..1D16C  ; Common # So   [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
+1D16D..1D172  ; Common # Mc   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
+1D173..1D17A  ; Common # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+1D183..1D184  ; Common # So   [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
+1D18C..1D1A9  ; Common # So  [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
+1D1AE..1D1DD  ; Common # So  [48] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL PES SUBPUNCTIS
+1D300..1D356  ; Common # So  [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
+1D360..1D371  ; Common # No  [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
+1D400..1D454  ; Common # L&  [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
+1D456..1D49C  ; Common # L&  [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
+1D49E..1D49F  ; Common # L&   [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+1D4A2         ; Common # L&       MATHEMATICAL SCRIPT CAPITAL G
+1D4A5..1D4A6  ; Common # L&   [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+1D4A9..1D4AC  ; Common # L&   [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+1D4AE..1D4B9  ; Common # L&  [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
+1D4BB         ; Common # L&       MATHEMATICAL SCRIPT SMALL F
+1D4BD..1D4C3  ; Common # L&   [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+1D4C5..1D505  ; Common # L&  [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
+1D507..1D50A  ; Common # L&   [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+1D50D..1D514  ; Common # L&   [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+1D516..1D51C  ; Common # L&   [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+1D51E..1D539  ; Common # L&  [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+1D53B..1D53E  ; Common # L&   [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+1D540..1D544  ; Common # L&   [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+1D546         ; Common # L&       MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+1D54A..1D550  ; Common # L&   [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+1D552..1D6A5  ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+1D6A8..1D6C0  ; Common # L&  [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
+1D6C1         ; Common # Sm       MATHEMATICAL BOLD NABLA
+1D6C2..1D6DA  ; Common # L&  [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
+1D6DB         ; Common # Sm       MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+1D6DC..1D6FA  ; Common # L&  [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
+1D6FB         ; Common # Sm       MATHEMATICAL ITALIC NABLA
+1D6FC..1D714  ; Common # L&  [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
+1D715         ; Common # Sm       MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+1D716..1D734  ; Common # L&  [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
+1D735         ; Common # Sm       MATHEMATICAL BOLD ITALIC NABLA
+1D736..1D74E  ; Common # L&  [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
+1D74F         ; Common # Sm       MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+1D750..1D76E  ; Common # L&  [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
+1D76F         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD NABLA
+1D770..1D788  ; Common # L&  [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
+1D789         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+1D78A..1D7A8  ; Common # L&  [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
+1D7A9         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
+1D7AA..1D7C2  ; Common # L&  [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
+1D7C3         ; Common # Sm       MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+1D7C4..1D7CB  ; Common # L&   [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
+1D7CE..1D7FF  ; Common # Nd  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1F000..1F02B  ; Common # So  [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
+1F030..1F093  ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
+1F100..1F10A  ; Common # No  [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
+1F110..1F12E  ; Common # So  [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
+1F131         ; Common # So       SQUARED LATIN CAPITAL LETTER B
+1F13D         ; Common # So       SQUARED LATIN CAPITAL LETTER N
+1F13F         ; Common # So       SQUARED LATIN CAPITAL LETTER P
+1F142         ; Common # So       SQUARED LATIN CAPITAL LETTER S
+1F146         ; Common # So       SQUARED LATIN CAPITAL LETTER W
+1F14A..1F14E  ; Common # So   [5] SQUARED HV..SQUARED PPV
+1F157         ; Common # So       NEGATIVE CIRCLED LATIN CAPITAL LETTER H
+1F15F         ; Common # So       NEGATIVE CIRCLED LATIN CAPITAL LETTER P
+1F179         ; Common # So       NEGATIVE SQUARED LATIN CAPITAL LETTER J
+1F17B..1F17C  ; Common # So   [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
+1F17F         ; Common # So       NEGATIVE SQUARED LATIN CAPITAL LETTER P
+1F18A..1F18D  ; Common # So   [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
+1F190         ; Common # So       SQUARE DJ
+1F210..1F231  ; Common # So  [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
+1F240..1F248  ; Common # So   [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+E0001         ; Common # Cf       LANGUAGE TAG
+E0020..E007F  ; Common # Cf  [96] TAG SPACE..CANCEL TAG
+
+# Total code points: 5395
+
+# ================================================
+
+0041..005A    ; Latin # L&  [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+0061..007A    ; Latin # L&  [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+00AA          ; Latin # L&       FEMININE ORDINAL INDICATOR
+00BA          ; Latin # L&       MASCULINE ORDINAL INDICATOR
+00C0..00D6    ; Latin # L&  [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8..00F6    ; Latin # L&  [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
+00F8..01BA    ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
+01BB          ; Latin # Lo       LATIN LETTER TWO WITH STROKE
+01BC..01BF    ; Latin # L&   [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
+01C0..01C3    ; Latin # Lo   [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
+01C4..0293    ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
+0294          ; Latin # Lo       LATIN LETTER GLOTTAL STOP
+0295..02AF    ; Latin # L&  [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
+02B0..02B8    ; Latin # Lm   [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
+02E0..02E4    ; Latin # Lm   [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+1D00..1D25    ; Latin # L&  [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN
+1D2C..1D5C    ; Latin # Lm  [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN
+1D62..1D65    ; Latin # L&   [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V
+1D6B..1D77    ; Latin # L&  [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
+1D79..1D9A    ; Latin # L&  [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
+1D9B..1DBE    ; Latin # Lm  [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH
+1E00..1EFF    ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
+2071          ; Latin # Lm       SUPERSCRIPT LATIN SMALL LETTER I
+207F          ; Latin # Lm       SUPERSCRIPT LATIN SMALL LETTER N
+2090..2094    ; Latin # Lm   [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
+212A..212B    ; Latin # L&   [2] KELVIN SIGN..ANGSTROM SIGN
+2132          ; Latin # L&       TURNED CAPITAL F
+214E          ; Latin # L&       TURNED SMALL F
+2160..2182    ; Latin # Nl  [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
+2183..2184    ; Latin # L&   [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
+2185..2188    ; Latin # Nl   [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
+2C60..2C7C    ; Latin # L&  [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
+2C7D          ; Latin # Lm       MODIFIER LETTER CAPITAL V
+2C7E..2C7F    ; Latin # L&   [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
+A722..A76F    ; Latin # L&  [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
+A770          ; Latin # Lm       MODIFIER LETTER US
+A771..A787    ; Latin # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
+A78B..A78C    ; Latin # L&   [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
+A7FB..A7FF    ; Latin # Lo   [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
+FB00..FB06    ; Latin # L&   [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
+FF21..FF3A    ; Latin # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
+FF41..FF5A    ; Latin # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+
+# Total code points: 1244
+
+# ================================================
+
+0370..0373    ; Greek # L&   [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
+0375          ; Greek # Sk       GREEK LOWER NUMERAL SIGN
+0376..0377    ; Greek # L&   [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
+037A          ; Greek # Lm       GREEK YPOGEGRAMMENI
+037B..037D    ; Greek # L&   [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0384          ; Greek # Sk       GREEK TONOS
+0386          ; Greek # L&       GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388..038A    ; Greek # L&   [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
+038C          ; Greek # L&       GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E..03A1    ; Greek # L&  [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
+03A3..03E1    ; Greek # L&  [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI
+03F0..03F5    ; Greek # L&   [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL
+03F6          ; Greek # Sm       GREEK REVERSED LUNATE EPSILON SYMBOL
+03F7..03FF    ; Greek # L&   [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+1D26..1D2A    ; Greek # L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
+1D5D..1D61    ; Greek # Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
+1D66..1D6A    ; Greek # L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
+1DBF          ; Greek # Lm       MODIFIER LETTER SMALL THETA
+1F00..1F15    ; Greek # L&  [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
+1F18..1F1D    ; Greek # L&   [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F20..1F45    ; Greek # L&  [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
+1F48..1F4D    ; Greek # L&   [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50..1F57    ; Greek # L&   [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F59          ; Greek # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B          ; Greek # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D          ; Greek # L&       GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F..1F7D    ; Greek # L&  [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
+1F80..1FB4    ; Greek # L&  [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6..1FBC    ; Greek # L&   [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBD          ; Greek # Sk       GREEK KORONIS
+1FBE          ; Greek # L&       GREEK PROSGEGRAMMENI
+1FBF..1FC1    ; Greek # Sk   [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
+1FC2..1FC4    ; Greek # L&   [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6..1FCC    ; Greek # L&   [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCD..1FCF    ; Greek # Sk   [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
+1FD0..1FD3    ; Greek # L&   [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6..1FDB    ; Greek # L&   [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
+1FDD..1FDF    ; Greek # Sk   [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+1FE0..1FEC    ; Greek # L&  [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
+1FED..1FEF    ; Greek # Sk   [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
+1FF2..1FF4    ; Greek # L&   [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6..1FFC    ; Greek # L&   [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFD..1FFE    ; Greek # Sk   [2] GREEK OXIA..GREEK DASIA
+2126          ; Greek # L&       OHM SIGN
+10140..10174  ; Greek # Nl  [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
+10175..10178  ; Greek # No   [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
+10179..10189  ; Greek # So  [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
+1018A         ; Greek # No       GREEK ZERO SIGN
+1D200..1D241  ; Greek # So  [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
+1D242..1D244  ; Greek # Mn   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+1D245         ; Greek # So       GREEK MUSICAL LEIMMA
+
+# Total code points: 511
+
+# ================================================
+
+0400..0481    ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA
+0482          ; Cyrillic # So       CYRILLIC THOUSANDS SIGN
+0483..0484    ; Cyrillic # Mn   [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
+0487          ; Cyrillic # Mn       COMBINING CYRILLIC POKRYTIE
+0488..0489    ; Cyrillic # Me   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+048A..0525    ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
+1D2B          ; Cyrillic # L&       CYRILLIC LETTER SMALL CAPITAL EL
+1D78          ; Cyrillic # Lm       MODIFIER LETTER CYRILLIC EN
+2DE0..2DFF    ; Cyrillic # Mn  [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+A640..A65F    ; Cyrillic # L&  [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
+A662..A66D    ; Cyrillic # L&  [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
+A66E          ; Cyrillic # Lo       CYRILLIC LETTER MULTIOCULAR O
+A66F          ; Cyrillic # Mn       COMBINING CYRILLIC VZMET
+A670..A672    ; Cyrillic # Me   [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
+A673          ; Cyrillic # Po       SLAVONIC ASTERISK
+A67C..A67D    ; Cyrillic # Mn   [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
+A67E          ; Cyrillic # Po       CYRILLIC KAVYKA
+A67F          ; Cyrillic # Lm       CYRILLIC PAYEROK
+A680..A697    ; Cyrillic # L&  [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
+
+# Total code points: 404
+
+# ================================================
+
+0531..0556    ; Armenian # L&  [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
+0559          ; Armenian # Lm       ARMENIAN MODIFIER LETTER LEFT HALF RING
+055A..055F    ; Armenian # Po   [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
+0561..0587    ; Armenian # L&  [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
+058A          ; Armenian # Pd       ARMENIAN HYPHEN
+FB13..FB17    ; Armenian # L&   [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
+
+# Total code points: 90
+
+# ================================================
+
+0591..05BD    ; Hebrew # Mn  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+05BE          ; Hebrew # Pd       HEBREW PUNCTUATION MAQAF
+05BF          ; Hebrew # Mn       HEBREW POINT RAFE
+05C0          ; Hebrew # Po       HEBREW PUNCTUATION PASEQ
+05C1..05C2    ; Hebrew # Mn   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C3          ; Hebrew # Po       HEBREW PUNCTUATION SOF PASUQ
+05C4..05C5    ; Hebrew # Mn   [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C6          ; Hebrew # Po       HEBREW PUNCTUATION NUN HAFUKHA
+05C7          ; Hebrew # Mn       HEBREW POINT QAMATS QATAN
+05D0..05EA    ; Hebrew # Lo  [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
+05F0..05F2    ; Hebrew # Lo   [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
+05F3..05F4    ; Hebrew # Po   [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
+FB1D          ; Hebrew # Lo       HEBREW LETTER YOD WITH HIRIQ
+FB1E          ; Hebrew # Mn       HEBREW POINT JUDEO-SPANISH VARIKA
+FB1F..FB28    ; Hebrew # Lo  [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
+FB29          ; Hebrew # Sm       HEBREW LETTER ALTERNATIVE PLUS SIGN
+FB2A..FB36    ; Hebrew # Lo  [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
+FB38..FB3C    ; Hebrew # Lo   [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+FB3E          ; Hebrew # Lo       HEBREW LETTER MEM WITH DAGESH
+FB40..FB41    ; Hebrew # Lo   [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+FB43..FB44    ; Hebrew # Lo   [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+FB46..FB4F    ; Hebrew # Lo  [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
+
+# Total code points: 133
+
+# ================================================
+
+0606..0608    ; Arabic # Sm   [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
+0609..060A    ; Arabic # Po   [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
+060B          ; Arabic # Sc       AFGHANI SIGN
+060D          ; Arabic # Po       ARABIC DATE SEPARATOR
+060E..060F    ; Arabic # So   [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
+0610..061A    ; Arabic # Mn  [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+061E          ; Arabic # Po       ARABIC TRIPLE DOT PUNCTUATION MARK
+0621..063F    ; Arabic # Lo  [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
+0641..064A    ; Arabic # Lo  [10] ARABIC LETTER FEH..ARABIC LETTER YEH
+0656..065E    ; Arabic # Mn   [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
+066A..066D    ; Arabic # Po   [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
+066E..066F    ; Arabic # Lo   [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
+0671..06D3    ; Arabic # Lo  [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
+06D4          ; Arabic # Po       ARABIC FULL STOP
+06D5          ; Arabic # Lo       ARABIC LETTER AE
+06D6..06DC    ; Arabic # Mn   [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+06DE          ; Arabic # Me       ARABIC START OF RUB EL HIZB
+06DF..06E4    ; Arabic # Mn   [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+06E5..06E6    ; Arabic # Lm   [2] ARABIC SMALL WAW..ARABIC SMALL YEH
+06E7..06E8    ; Arabic # Mn   [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+06E9          ; Arabic # So       ARABIC PLACE OF SAJDAH
+06EA..06ED    ; Arabic # Mn   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+06EE..06EF    ; Arabic # Lo   [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
+06F0..06F9    ; Arabic # Nd  [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
+06FA..06FC    ; Arabic # Lo   [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
+06FD..06FE    ; Arabic # So   [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
+06FF          ; Arabic # Lo       ARABIC LETTER HEH WITH INVERTED V
+0750..077F    ; Arabic # Lo  [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
+FB50..FBB1    ; Arabic # Lo  [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+FBD3..FD3D    ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
+FD50..FD8F    ; Arabic # Lo  [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
+FD92..FDC7    ; Arabic # Lo  [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+FDF0..FDFB    ; Arabic # Lo  [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
+FDFC          ; Arabic # Sc       RIAL SIGN
+FE70..FE74    ; Arabic # Lo   [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
+FE76..FEFC    ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+10E60..10E7E  ; Arabic # No  [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
+
+# Total code points: 1030
+
+# ================================================
+
+0700..070D    ; Syriac # Po  [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
+070F          ; Syriac # Cf       SYRIAC ABBREVIATION MARK
+0710          ; Syriac # Lo       SYRIAC LETTER ALAPH
+0711          ; Syriac # Mn       SYRIAC LETTER SUPERSCRIPT ALAPH
+0712..072F    ; Syriac # Lo  [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
+0730..074A    ; Syriac # Mn  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+074D..074F    ; Syriac # Lo   [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
+
+# Total code points: 77
+
+# ================================================
+
+0780..07A5    ; Thaana # Lo  [38] THAANA LETTER HAA..THAANA LETTER WAAVU
+07A6..07B0    ; Thaana # Mn  [11] THAANA ABAFILI..THAANA SUKUN
+07B1          ; Thaana # Lo       THAANA LETTER NAA
+
+# Total code points: 50
+
+# ================================================
+
+0900..0902    ; Devanagari # Mn   [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
+0903          ; Devanagari # Mc       DEVANAGARI SIGN VISARGA
+0904..0939    ; Devanagari # Lo  [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
+093C          ; Devanagari # Mn       DEVANAGARI SIGN NUKTA
+093D          ; Devanagari # Lo       DEVANAGARI SIGN AVAGRAHA
+093E..0940    ; Devanagari # Mc   [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
+0941..0948    ; Devanagari # Mn   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+0949..094C    ; Devanagari # Mc   [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
+094D          ; Devanagari # Mn       DEVANAGARI SIGN VIRAMA
+094E          ; Devanagari # Mc       DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
+0950          ; Devanagari # Lo       DEVANAGARI OM
+0953..0955    ; Devanagari # Mn   [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E
+0958..0961    ; Devanagari # Lo  [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
+0962..0963    ; Devanagari # Mn   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0966..096F    ; Devanagari # Nd  [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+0971          ; Devanagari # Lm       DEVANAGARI SIGN HIGH SPACING DOT
+0972          ; Devanagari # Lo       DEVANAGARI LETTER CANDRA A
+0979..097F    ; Devanagari # Lo   [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
+A8E0..A8F1    ; Devanagari # Mn  [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+A8F2..A8F7    ; Devanagari # Lo   [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
+A8F8..A8FA    ; Devanagari # Po   [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
+A8FB          ; Devanagari # Lo       DEVANAGARI HEADSTROKE
+
+# Total code points: 140
+
+# ================================================
+
+0981          ; Bengali # Mn       BENGALI SIGN CANDRABINDU
+0982..0983    ; Bengali # Mc   [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
+0985..098C    ; Bengali # Lo   [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
+098F..0990    ; Bengali # Lo   [2] BENGALI LETTER E..BENGALI LETTER AI
+0993..09A8    ; Bengali # Lo  [22] BENGALI LETTER O..BENGALI LETTER NA
+09AA..09B0    ; Bengali # Lo   [7] BENGALI LETTER PA..BENGALI LETTER RA
+09B2          ; Bengali # Lo       BENGALI LETTER LA
+09B6..09B9    ; Bengali # Lo   [4] BENGALI LETTER SHA..BENGALI LETTER HA
+09BC          ; Bengali # Mn       BENGALI SIGN NUKTA
+09BD          ; Bengali # Lo       BENGALI SIGN AVAGRAHA
+09BE..09C0    ; Bengali # Mc   [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
+09C1..09C4    ; Bengali # Mn   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09C7..09C8    ; Bengali # Mc   [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+09CB..09CC    ; Bengali # Mc   [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
+09CD          ; Bengali # Mn       BENGALI SIGN VIRAMA
+09CE          ; Bengali # Lo       BENGALI LETTER KHANDA TA
+09D7          ; Bengali # Mc       BENGALI AU LENGTH MARK
+09DC..09DD    ; Bengali # Lo   [2] BENGALI LETTER RRA..BENGALI LETTER RHA
+09DF..09E1    ; Bengali # Lo   [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
+09E2..09E3    ; Bengali # Mn   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+09E6..09EF    ; Bengali # Nd  [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
+09F0..09F1    ; Bengali # Lo   [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
+09F2..09F3    ; Bengali # Sc   [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
+09F4..09F9    ; Bengali # No   [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
+09FA          ; Bengali # So       BENGALI ISSHAR
+09FB          ; Bengali # Sc       BENGALI GANDA MARK
+
+# Total code points: 92
+
+# ================================================
+
+0A01..0A02    ; Gurmukhi # Mn   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+0A03          ; Gurmukhi # Mc       GURMUKHI SIGN VISARGA
+0A05..0A0A    ; Gurmukhi # Lo   [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
+0A0F..0A10    ; Gurmukhi # Lo   [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
+0A13..0A28    ; Gurmukhi # Lo  [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
+0A2A..0A30    ; Gurmukhi # Lo   [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
+0A32..0A33    ; Gurmukhi # Lo   [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
+0A35..0A36    ; Gurmukhi # Lo   [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
+0A38..0A39    ; Gurmukhi # Lo   [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
+0A3C          ; Gurmukhi # Mn       GURMUKHI SIGN NUKTA
+0A3E..0A40    ; Gurmukhi # Mc   [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
+0A41..0A42    ; Gurmukhi # Mn   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48    ; Gurmukhi # Mn   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4D    ; Gurmukhi # Mn   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A51          ; Gurmukhi # Mn       GURMUKHI SIGN UDAAT
+0A59..0A5C    ; Gurmukhi # Lo   [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
+0A5E          ; Gurmukhi # Lo       GURMUKHI LETTER FA
+0A66..0A6F    ; Gurmukhi # Nd  [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
+0A70..0A71    ; Gurmukhi # Mn   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A72..0A74    ; Gurmukhi # Lo   [3] GURMUKHI IRI..GURMUKHI EK ONKAR
+0A75          ; Gurmukhi # Mn       GURMUKHI SIGN YAKASH
+
+# Total code points: 79
+
+# ================================================
+
+0A81..0A82    ; Gujarati # Mn   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+0A83          ; Gujarati # Mc       GUJARATI SIGN VISARGA
+0A85..0A8D    ; Gujarati # Lo   [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
+0A8F..0A91    ; Gujarati # Lo   [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
+0A93..0AA8    ; Gujarati # Lo  [22] GUJARATI LETTER O..GUJARATI LETTER NA
+0AAA..0AB0    ; Gujarati # Lo   [7] GUJARATI LETTER PA..GUJARATI LETTER RA
+0AB2..0AB3    ; Gujarati # Lo   [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
+0AB5..0AB9    ; Gujarati # Lo   [5] GUJARATI LETTER VA..GUJARATI LETTER HA
+0ABC          ; Gujarati # Mn       GUJARATI SIGN NUKTA
+0ABD          ; Gujarati # Lo       GUJARATI SIGN AVAGRAHA
+0ABE..0AC0    ; Gujarati # Mc   [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
+0AC1..0AC5    ; Gujarati # Mn   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8    ; Gujarati # Mn   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0AC9          ; Gujarati # Mc       GUJARATI VOWEL SIGN CANDRA O
+0ACB..0ACC    ; Gujarati # Mc   [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
+0ACD          ; Gujarati # Mn       GUJARATI SIGN VIRAMA
+0AD0          ; Gujarati # Lo       GUJARATI OM
+0AE0..0AE1    ; Gujarati # Lo   [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
+0AE2..0AE3    ; Gujarati # Mn   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AE6..0AEF    ; Gujarati # Nd  [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+0AF1          ; Gujarati # Sc       GUJARATI RUPEE SIGN
+
+# Total code points: 83
+
+# ================================================
+
+0B01          ; Oriya # Mn       ORIYA SIGN CANDRABINDU
+0B02..0B03    ; Oriya # Mc   [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
+0B05..0B0C    ; Oriya # Lo   [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
+0B0F..0B10    ; Oriya # Lo   [2] ORIYA LETTER E..ORIYA LETTER AI
+0B13..0B28    ; Oriya # Lo  [22] ORIYA LETTER O..ORIYA LETTER NA
+0B2A..0B30    ; Oriya # Lo   [7] ORIYA LETTER PA..ORIYA LETTER RA
+0B32..0B33    ; Oriya # Lo   [2] ORIYA LETTER LA..ORIYA LETTER LLA
+0B35..0B39    ; Oriya # Lo   [5] ORIYA LETTER VA..ORIYA LETTER HA
+0B3C          ; Oriya # Mn       ORIYA SIGN NUKTA
+0B3D          ; Oriya # Lo       ORIYA SIGN AVAGRAHA
+0B3E          ; Oriya # Mc       ORIYA VOWEL SIGN AA
+0B3F          ; Oriya # Mn       ORIYA VOWEL SIGN I
+0B40          ; Oriya # Mc       ORIYA VOWEL SIGN II
+0B41..0B44    ; Oriya # Mn   [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+0B47..0B48    ; Oriya # Mc   [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+0B4B..0B4C    ; Oriya # Mc   [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
+0B4D          ; Oriya # Mn       ORIYA SIGN VIRAMA
+0B56          ; Oriya # Mn       ORIYA AI LENGTH MARK
+0B57          ; Oriya # Mc       ORIYA AU LENGTH MARK
+0B5C..0B5D    ; Oriya # Lo   [2] ORIYA LETTER RRA..ORIYA LETTER RHA
+0B5F..0B61    ; Oriya # Lo   [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
+0B62..0B63    ; Oriya # Mn   [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+0B66..0B6F    ; Oriya # Nd  [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
+0B70          ; Oriya # So       ORIYA ISSHAR
+0B71          ; Oriya # Lo       ORIYA LETTER WA
+
+# Total code points: 84
+
+# ================================================
+
+0B82          ; Tamil # Mn       TAMIL SIGN ANUSVARA
+0B83          ; Tamil # Lo       TAMIL SIGN VISARGA
+0B85..0B8A    ; Tamil # Lo   [6] TAMIL LETTER A..TAMIL LETTER UU
+0B8E..0B90    ; Tamil # Lo   [3] TAMIL LETTER E..TAMIL LETTER AI
+0B92..0B95    ; Tamil # Lo   [4] TAMIL LETTER O..TAMIL LETTER KA
+0B99..0B9A    ; Tamil # Lo   [2] TAMIL LETTER NGA..TAMIL LETTER CA
+0B9C          ; Tamil # Lo       TAMIL LETTER JA
+0B9E..0B9F    ; Tamil # Lo   [2] TAMIL LETTER NYA..TAMIL LETTER TTA
+0BA3..0BA4    ; Tamil # Lo   [2] TAMIL LETTER NNA..TAMIL LETTER TA
+0BA8..0BAA    ; Tamil # Lo   [3] TAMIL LETTER NA..TAMIL LETTER PA
+0BAE..0BB9    ; Tamil # Lo  [12] TAMIL LETTER MA..TAMIL LETTER HA
+0BBE..0BBF    ; Tamil # Mc   [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
+0BC0          ; Tamil # Mn       TAMIL VOWEL SIGN II
+0BC1..0BC2    ; Tamil # Mc   [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
+0BC6..0BC8    ; Tamil # Mc   [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+0BCA..0BCC    ; Tamil # Mc   [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
+0BCD          ; Tamil # Mn       TAMIL SIGN VIRAMA
+0BD0          ; Tamil # Lo       TAMIL OM
+0BD7          ; Tamil # Mc       TAMIL AU LENGTH MARK
+0BE6..0BEF    ; Tamil # Nd  [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+0BF0..0BF2    ; Tamil # No   [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
+0BF3..0BF8    ; Tamil # So   [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
+0BF9          ; Tamil # Sc       TAMIL RUPEE SIGN
+0BFA          ; Tamil # So       TAMIL NUMBER SIGN
+
+# Total code points: 72
+
+# ================================================
+
+0C01..0C03    ; Telugu # Mc   [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+0C05..0C0C    ; Telugu # Lo   [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
+0C0E..0C10    ; Telugu # Lo   [3] TELUGU LETTER E..TELUGU LETTER AI
+0C12..0C28    ; Telugu # Lo  [23] TELUGU LETTER O..TELUGU LETTER NA
+0C2A..0C33    ; Telugu # Lo  [10] TELUGU LETTER PA..TELUGU LETTER LLA
+0C35..0C39    ; Telugu # Lo   [5] TELUGU LETTER VA..TELUGU LETTER HA
+0C3D          ; Telugu # Lo       TELUGU SIGN AVAGRAHA
+0C3E..0C40    ; Telugu # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C41..0C44    ; Telugu # Mc   [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
+0C46..0C48    ; Telugu # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4D    ; Telugu # Mn   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+0C55..0C56    ; Telugu # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C58..0C59    ; Telugu # Lo   [2] TELUGU LETTER TSA..TELUGU LETTER DZA
+0C60..0C61    ; Telugu # Lo   [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
+0C62..0C63    ; Telugu # Mn   [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+0C66..0C6F    ; Telugu # Nd  [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
+0C78..0C7E    ; Telugu # No   [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
+0C7F          ; Telugu # So       TELUGU SIGN TUUMU
+
+# Total code points: 93
+
+# ================================================
+
+0C82..0C83    ; Kannada # Mc   [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+0C85..0C8C    ; Kannada # Lo   [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
+0C8E..0C90    ; Kannada # Lo   [3] KANNADA LETTER E..KANNADA LETTER AI
+0C92..0CA8    ; Kannada # Lo  [23] KANNADA LETTER O..KANNADA LETTER NA
+0CAA..0CB3    ; Kannada # Lo  [10] KANNADA LETTER PA..KANNADA LETTER LLA
+0CB5..0CB9    ; Kannada # Lo   [5] KANNADA LETTER VA..KANNADA LETTER HA
+0CBC          ; Kannada # Mn       KANNADA SIGN NUKTA
+0CBD          ; Kannada # Lo       KANNADA SIGN AVAGRAHA
+0CBE          ; Kannada # Mc       KANNADA VOWEL SIGN AA
+0CBF          ; Kannada # Mn       KANNADA VOWEL SIGN I
+0CC0..0CC4    ; Kannada # Mc   [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
+0CC6          ; Kannada # Mn       KANNADA VOWEL SIGN E
+0CC7..0CC8    ; Kannada # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB    ; Kannada # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+0CCC..0CCD    ; Kannada # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+0CD5..0CD6    ; Kannada # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0CDE          ; Kannada # Lo       KANNADA LETTER FA
+0CE0..0CE1    ; Kannada # Lo   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
+0CE2..0CE3    ; Kannada # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0CE6..0CEF    ; Kannada # Nd  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+
+# Total code points: 84
+
+# ================================================
+
+0D02..0D03    ; Malayalam # Mc   [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D05..0D0C    ; Malayalam # Lo   [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
+0D0E..0D10    ; Malayalam # Lo   [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
+0D12..0D28    ; Malayalam # Lo  [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
+0D2A..0D39    ; Malayalam # Lo  [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
+0D3D          ; Malayalam # Lo       MALAYALAM SIGN AVAGRAHA
+0D3E..0D40    ; Malayalam # Mc   [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
+0D41..0D44    ; Malayalam # Mn   [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+0D46..0D48    ; Malayalam # Mc   [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+0D4A..0D4C    ; Malayalam # Mc   [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
+0D4D          ; Malayalam # Mn       MALAYALAM SIGN VIRAMA
+0D57          ; Malayalam # Mc       MALAYALAM AU LENGTH MARK
+0D60..0D61    ; Malayalam # Lo   [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
+0D62..0D63    ; Malayalam # Mn   [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+0D66..0D6F    ; Malayalam # Nd  [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
+0D70..0D75    ; Malayalam # No   [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
+0D79          ; Malayalam # So       MALAYALAM DATE MARK
+0D7A..0D7F    ; Malayalam # Lo   [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
+
+# Total code points: 95
+
+# ================================================
+
+0D82..0D83    ; Sinhala # Mc   [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+0D85..0D96    ; Sinhala # Lo  [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
+0D9A..0DB1    ; Sinhala # Lo  [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
+0DB3..0DBB    ; Sinhala # Lo   [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
+0DBD          ; Sinhala # Lo       SINHALA LETTER DANTAJA LAYANNA
+0DC0..0DC6    ; Sinhala # Lo   [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
+0DCA          ; Sinhala # Mn       SINHALA SIGN AL-LAKUNA
+0DCF..0DD1    ; Sinhala # Mc   [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
+0DD2..0DD4    ; Sinhala # Mn   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6          ; Sinhala # Mn       SINHALA VOWEL SIGN DIGA PAA-PILLA
+0DD8..0DDF    ; Sinhala # Mc   [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
+0DF2..0DF3    ; Sinhala # Mc   [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+0DF4          ; Sinhala # Po       SINHALA PUNCTUATION KUNDDALIYA
+
+# Total code points: 80
+
+# ================================================
+
+0E01..0E30    ; Thai # Lo  [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A
+0E31          ; Thai # Mn       THAI CHARACTER MAI HAN-AKAT
+0E32..0E33    ; Thai # Lo   [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
+0E34..0E3A    ; Thai # Mn   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E40..0E45    ; Thai # Lo   [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
+0E46          ; Thai # Lm       THAI CHARACTER MAIYAMOK
+0E47..0E4E    ; Thai # Mn   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+0E4F          ; Thai # Po       THAI CHARACTER FONGMAN
+0E50..0E59    ; Thai # Nd  [10] THAI DIGIT ZERO..THAI DIGIT NINE
+0E5A..0E5B    ; Thai # Po   [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
+
+# Total code points: 86
+
+# ================================================
+
+0E81..0E82    ; Lao # Lo   [2] LAO LETTER KO..LAO LETTER KHO SUNG
+0E84          ; Lao # Lo       LAO LETTER KHO TAM
+0E87..0E88    ; Lao # Lo   [2] LAO LETTER NGO..LAO LETTER CO
+0E8A          ; Lao # Lo       LAO LETTER SO TAM
+0E8D          ; Lao # Lo       LAO LETTER NYO
+0E94..0E97    ; Lao # Lo   [4] LAO LETTER DO..LAO LETTER THO TAM
+0E99..0E9F    ; Lao # Lo   [7] LAO LETTER NO..LAO LETTER FO SUNG
+0EA1..0EA3    ; Lao # Lo   [3] LAO LETTER MO..LAO LETTER LO LING
+0EA5          ; Lao # Lo       LAO LETTER LO LOOT
+0EA7          ; Lao # Lo       LAO LETTER WO
+0EAA..0EAB    ; Lao # Lo   [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG
+0EAD..0EB0    ; Lao # Lo   [4] LAO LETTER O..LAO VOWEL SIGN A
+0EB1          ; Lao # Mn       LAO VOWEL SIGN MAI KAN
+0EB2..0EB3    ; Lao # Lo   [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
+0EB4..0EB9    ; Lao # Mn   [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
+0EBB..0EBC    ; Lao # Mn   [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
+0EBD          ; Lao # Lo       LAO SEMIVOWEL SIGN NYO
+0EC0..0EC4    ; Lao # Lo   [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
+0EC6          ; Lao # Lm       LAO KO LA
+0EC8..0ECD    ; Lao # Mn   [6] LAO TONE MAI EK..LAO NIGGAHITA
+0ED0..0ED9    ; Lao # Nd  [10] LAO DIGIT ZERO..LAO DIGIT NINE
+0EDC..0EDD    ; Lao # Lo   [2] LAO HO NO..LAO HO MO
+
+# Total code points: 65
+
+# ================================================
+
+0F00          ; Tibetan # Lo       TIBETAN SYLLABLE OM
+0F01..0F03    ; Tibetan # So   [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
+0F04..0F12    ; Tibetan # Po  [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD
+0F13..0F17    ; Tibetan # So   [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
+0F18..0F19    ; Tibetan # Mn   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F1A..0F1F    ; Tibetan # So   [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
+0F20..0F29    ; Tibetan # Nd  [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
+0F2A..0F33    ; Tibetan # No  [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
+0F34          ; Tibetan # So       TIBETAN MARK BSDUS RTAGS
+0F35          ; Tibetan # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
+0F36          ; Tibetan # So       TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
+0F37          ; Tibetan # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F38          ; Tibetan # So       TIBETAN MARK CHE MGO
+0F39          ; Tibetan # Mn       TIBETAN MARK TSA -PHRU
+0F3A          ; Tibetan # Ps       TIBETAN MARK GUG RTAGS GYON
+0F3B          ; Tibetan # Pe       TIBETAN MARK GUG RTAGS GYAS
+0F3C          ; Tibetan # Ps       TIBETAN MARK ANG KHANG GYON
+0F3D          ; Tibetan # Pe       TIBETAN MARK ANG KHANG GYAS
+0F3E..0F3F    ; Tibetan # Mc   [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
+0F40..0F47    ; Tibetan # Lo   [8] TIBETAN LETTER KA..TIBETAN LETTER JA
+0F49..0F6C    ; Tibetan # Lo  [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
+0F71..0F7E    ; Tibetan # Mn  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F7F          ; Tibetan # Mc       TIBETAN SIGN RNAM BCAD
+0F80..0F84    ; Tibetan # Mn   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+0F85          ; Tibetan # Po       TIBETAN MARK PALUTA
+0F86..0F87    ; Tibetan # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0F88..0F8B    ; Tibetan # Lo   [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
+0F90..0F97    ; Tibetan # Mn   [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC    ; Tibetan # Mn  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+0FBE..0FC5    ; Tibetan # So   [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
+0FC6          ; Tibetan # Mn       TIBETAN SYMBOL PADMA GDAN
+0FC7..0FCC    ; Tibetan # So   [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
+0FCE..0FCF    ; Tibetan # So   [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
+0FD0..0FD4    ; Tibetan # Po   [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
+
+# Total code points: 201
+
+# ================================================
+
+1000..102A    ; Myanmar # Lo  [43] MYANMAR LETTER KA..MYANMAR LETTER AU
+102B..102C    ; Myanmar # Mc   [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
+102D..1030    ; Myanmar # Mn   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1031          ; Myanmar # Mc       MYANMAR VOWEL SIGN E
+1032..1037    ; Myanmar # Mn   [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
+1038          ; Myanmar # Mc       MYANMAR SIGN VISARGA
+1039..103A    ; Myanmar # Mn   [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+103B..103C    ; Myanmar # Mc   [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
+103D..103E    ; Myanmar # Mn   [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+103F          ; Myanmar # Lo       MYANMAR LETTER GREAT SA
+1040..1049    ; Myanmar # Nd  [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
+104A..104F    ; Myanmar # Po   [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
+1050..1055    ; Myanmar # Lo   [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL
+1056..1057    ; Myanmar # Mc   [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
+1058..1059    ; Myanmar # Mn   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+105A..105D    ; Myanmar # Lo   [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE
+105E..1060    ; Myanmar # Mn   [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+1061          ; Myanmar # Lo       MYANMAR LETTER SGAW KAREN SHA
+1062..1064    ; Myanmar # Mc   [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
+1065..1066    ; Myanmar # Lo   [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA
+1067..106D    ; Myanmar # Mc   [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
+106E..1070    ; Myanmar # Lo   [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA
+1071..1074    ; Myanmar # Mn   [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+1075..1081    ; Myanmar # Lo  [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA
+1082          ; Myanmar # Mn       MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+1083..1084    ; Myanmar # Mc   [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
+1085..1086    ; Myanmar # Mn   [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+1087..108C    ; Myanmar # Mc   [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
+108D          ; Myanmar # Mn       MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+108E          ; Myanmar # Lo       MYANMAR LETTER RUMAI PALAUNG FA
+108F          ; Myanmar # Mc       MYANMAR SIGN RUMAI PALAUNG TONE-5
+1090..1099    ; Myanmar # Nd  [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
+109A..109C    ; Myanmar # Mc   [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
+109D          ; Myanmar # Mn       MYANMAR VOWEL SIGN AITON AI
+109E..109F    ; Myanmar # So   [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
+AA60..AA6F    ; Myanmar # Lo  [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
+AA70          ; Myanmar # Lm       MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
+AA71..AA76    ; Myanmar # Lo   [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
+AA77..AA79    ; Myanmar # So   [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
+AA7A          ; Myanmar # Lo       MYANMAR LETTER AITON RA
+AA7B          ; Myanmar # Mc       MYANMAR SIGN PAO KAREN TONE
+
+# Total code points: 188
+
+# ================================================
+
+10A0..10C5    ; Georgian # L&  [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
+10D0..10FA    ; Georgian # Lo  [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
+10FC          ; Georgian # Lm       MODIFIER LETTER GEORGIAN NAR
+2D00..2D25    ; Georgian # L&  [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
+
+# Total code points: 120
+
+# ================================================
+
+1100..11FF    ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
+3131..318E    ; Hangul # Lo  [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
+3200..321E    ; Hangul # So  [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
+3260..327E    ; Hangul # So  [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
+A960..A97C    ; Hangul # Lo  [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
+AC00..D7A3    ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
+D7B0..D7C6    ; Hangul # Lo  [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+D7CB..D7FB    ; Hangul # Lo  [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
+FFA0..FFBE    ; Hangul # Lo  [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
+FFC2..FFC7    ; Hangul # Lo   [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+FFCA..FFCF    ; Hangul # Lo   [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+FFD2..FFD7    ; Hangul # Lo   [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+FFDA..FFDC    ; Hangul # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+
+# Total code points: 11737
+
+# ================================================
+
+1200..1248    ; Ethiopic # Lo  [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
+124A..124D    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
+1250..1256    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
+1258          ; Ethiopic # Lo       ETHIOPIC SYLLABLE QHWA
+125A..125D    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
+1260..1288    ; Ethiopic # Lo  [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
+128A..128D    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
+1290..12B0    ; Ethiopic # Lo  [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
+12B2..12B5    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
+12B8..12BE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
+12C0          ; Ethiopic # Lo       ETHIOPIC SYLLABLE KXWA
+12C2..12C5    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
+12C8..12D6    ; Ethiopic # Lo  [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
+12D8..1310    ; Ethiopic # Lo  [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
+1312..1315    ; Ethiopic # Lo   [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
+1318..135A    ; Ethiopic # Lo  [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
+135F          ; Ethiopic # Mn       ETHIOPIC COMBINING GEMINATION MARK
+1360          ; Ethiopic # So       ETHIOPIC SECTION MARK
+1361..1368    ; Ethiopic # Po   [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
+1369..137C    ; Ethiopic # No  [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
+1380..138F    ; Ethiopic # Lo  [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
+1390..1399    ; Ethiopic # So  [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
+2D80..2D96    ; Ethiopic # Lo  [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
+2DA0..2DA6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
+2DA8..2DAE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
+2DB0..2DB6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
+2DB8..2DBE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
+2DC0..2DC6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
+2DC8..2DCE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
+2DD0..2DD6    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
+2DD8..2DDE    ; Ethiopic # Lo   [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
+
+# Total code points: 461
+
+# ================================================
+
+13A0..13F4    ; Cherokee # Lo  [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
+
+# Total code points: 85
+
+# ================================================
+
+1400          ; Canadian_Aboriginal # Pd       CANADIAN SYLLABICS HYPHEN
+1401..166C    ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
+166D..166E    ; Canadian_Aboriginal # Po   [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
+166F..167F    ; Canadian_Aboriginal # Lo  [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
+18B0..18F5    ; Canadian_Aboriginal # Lo  [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
+
+# Total code points: 710
+
+# ================================================
+
+1680          ; Ogham # Zs       OGHAM SPACE MARK
+1681..169A    ; Ogham # Lo  [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
+169B          ; Ogham # Ps       OGHAM FEATHER MARK
+169C          ; Ogham # Pe       OGHAM REVERSED FEATHER MARK
+
+# Total code points: 29
+
+# ================================================
+
+16A0..16EA    ; Runic # Lo  [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
+16EE..16F0    ; Runic # Nl   [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
+
+# Total code points: 78
+
+# ================================================
+
+1780..17B3    ; Khmer # Lo  [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
+17B4..17B5    ; Khmer # Cf   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+17B6          ; Khmer # Mc       KHMER VOWEL SIGN AA
+17B7..17BD    ; Khmer # Mn   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17BE..17C5    ; Khmer # Mc   [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
+17C6          ; Khmer # Mn       KHMER SIGN NIKAHIT
+17C7..17C8    ; Khmer # Mc   [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
+17C9..17D3    ; Khmer # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+17D4..17D6    ; Khmer # Po   [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
+17D7          ; Khmer # Lm       KHMER SIGN LEK TOO
+17D8..17DA    ; Khmer # Po   [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT
+17DB          ; Khmer # Sc       KHMER CURRENCY SYMBOL RIEL
+17DC          ; Khmer # Lo       KHMER SIGN AVAKRAHASANYA
+17DD          ; Khmer # Mn       KHMER SIGN ATTHACAN
+17E0..17E9    ; Khmer # Nd  [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
+17F0..17F9    ; Khmer # No  [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
+19E0..19FF    ; Khmer # So  [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
+
+# Total code points: 146
+
+# ================================================
+
+1800..1801    ; Mongolian # Po   [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS
+1804          ; Mongolian # Po       MONGOLIAN COLON
+1806          ; Mongolian # Pd       MONGOLIAN TODO SOFT HYPHEN
+1807..180A    ; Mongolian # Po   [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
+180B..180D    ; Mongolian # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180E          ; Mongolian # Zs       MONGOLIAN VOWEL SEPARATOR
+1810..1819    ; Mongolian # Nd  [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
+1820..1842    ; Mongolian # Lo  [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
+1843          ; Mongolian # Lm       MONGOLIAN LETTER TODO LONG VOWEL SIGN
+1844..1877    ; Mongolian # Lo  [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
+1880..18A8    ; Mongolian # Lo  [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
+18A9          ; Mongolian # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
+18AA          ; Mongolian # Lo       MONGOLIAN LETTER MANCHU ALI GALI LHA
+
+# Total code points: 153
+
+# ================================================
+
+3041..3096    ; Hiragana # Lo  [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
+309D..309E    ; Hiragana # Lm   [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
+309F          ; Hiragana # Lo       HIRAGANA DIGRAPH YORI
+1F200         ; Hiragana # So       SQUARE HIRAGANA HOKA
+
+# Total code points: 90
+
+# ================================================
+
+30A1..30FA    ; Katakana # Lo  [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+30FD..30FE    ; Katakana # Lm   [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK
+30FF          ; Katakana # Lo       KATAKANA DIGRAPH KOTO
+31F0..31FF    ; Katakana # Lo  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
+32D0..32FE    ; Katakana # So  [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO
+3300..3357    ; Katakana # So  [88] SQUARE APAATO..SQUARE WATTO
+FF66..FF6F    ; Katakana # Lo  [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
+FF71..FF9D    ; Katakana # Lo  [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+
+# Total code points: 299
+
+# ================================================
+
+3105..312D    ; Bopomofo # Lo  [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
+31A0..31B7    ; Bopomofo # Lo  [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
+
+# Total code points: 65
+
+# ================================================
+
+2E80..2E99    ; Han # So  [26] CJK RADICAL REPEAT..CJK RADICAL RAP
+2E9B..2EF3    ; Han # So  [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
+2F00..2FD5    ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+3005          ; Han # Lm       IDEOGRAPHIC ITERATION MARK
+3007          ; Han # Nl       IDEOGRAPHIC NUMBER ZERO
+3021..3029    ; Han # Nl   [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
+3038..303A    ; Han # Nl   [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
+303B          ; Han # Lm       VERTICAL IDEOGRAPHIC ITERATION MARK
+3400..4DB5    ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
+4E00..9FCB    ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
+F900..FA2D    ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
+FA30..FA6D    ; Han # Lo  [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
+FA70..FAD9    ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+20000..2A6D6  ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
+2A700..2B734  ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+2F800..2FA1D  ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+
+# Total code points: 75738
+
+# ================================================
+
+A000..A014    ; Yi # Lo  [21] YI SYLLABLE IT..YI SYLLABLE E
+A015          ; Yi # Lm       YI SYLLABLE WU
+A016..A48C    ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
+A490..A4C6    ; Yi # So  [55] YI RADICAL QOT..YI RADICAL KE
+
+# Total code points: 1220
+
+# ================================================
+
+10300..1031E  ; Old_Italic # Lo  [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
+10320..10323  ; Old_Italic # No   [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
+
+# Total code points: 35
+
+# ================================================
+
+10330..10340  ; Gothic # Lo  [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
+10341         ; Gothic # Nl       GOTHIC LETTER NINETY
+10342..10349  ; Gothic # Lo   [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
+1034A         ; Gothic # Nl       GOTHIC LETTER NINE HUNDRED
+
+# Total code points: 27
+
+# ================================================
+
+10400..1044F  ; Deseret # L&  [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
+
+# Total code points: 80
+
+# ================================================
+
+0300..036F    ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+0485..0486    ; Inherited # Mn   [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
+064B..0655    ; Inherited # Mn  [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
+0670          ; Inherited # Mn       ARABIC LETTER SUPERSCRIPT ALEF
+0951..0952    ; Inherited # Mn   [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
+1CD0..1CD2    ; Inherited # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+1CD4..1CE0    ; Inherited # Mn  [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+1CE2..1CE8    ; Inherited # Mn   [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CED          ; Inherited # Mn       VEDIC SIGN TIRYAK
+1DC0..1DE6    ; Inherited # Mn  [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
+1DFD..1DFF    ; Inherited # Mn   [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+200C..200D    ; Inherited # Cf   [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+20D0..20DC    ; Inherited # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20DD..20E0    ; Inherited # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+20E1          ; Inherited # Mn       COMBINING LEFT RIGHT ARROW ABOVE
+20E2..20E4    ; Inherited # Me   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+20E5..20F0    ; Inherited # Mn  [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
+302A..302F    ; Inherited # Mn   [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
+3099..309A    ; Inherited # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+FE00..FE0F    ; Inherited # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+FE20..FE26    ; Inherited # Mn   [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
+101FD         ; Inherited # Mn       PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
+1D167..1D169  ; Inherited # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+1D17B..1D182  ; Inherited # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+1D185..1D18B  ; Inherited # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+1D1AA..1D1AD  ; Inherited # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+E0100..E01EF  ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+
+# Total code points: 523
+
+# ================================================
+
+1700..170C    ; Tagalog # Lo  [13] TAGALOG LETTER A..TAGALOG LETTER YA
+170E..1711    ; Tagalog # Lo   [4] TAGALOG LETTER LA..TAGALOG LETTER HA
+1712..1714    ; Tagalog # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+
+# Total code points: 20
+
+# ================================================
+
+1720..1731    ; Hanunoo # Lo  [18] HANUNOO LETTER A..HANUNOO LETTER HA
+1732..1734    ; Hanunoo # Mn   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+
+# Total code points: 21
+
+# ================================================
+
+1740..1751    ; Buhid # Lo  [18] BUHID LETTER A..BUHID LETTER HA
+1752..1753    ; Buhid # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+
+# Total code points: 20
+
+# ================================================
+
+1760..176C    ; Tagbanwa # Lo  [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
+176E..1770    ; Tagbanwa # Lo   [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
+1772..1773    ; Tagbanwa # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+
+# Total code points: 18
+
+# ================================================
+
+1900..191C    ; Limbu # Lo  [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
+1920..1922    ; Limbu # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1923..1926    ; Limbu # Mc   [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
+1927..1928    ; Limbu # Mn   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1929..192B    ; Limbu # Mc   [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
+1930..1931    ; Limbu # Mc   [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
+1932          ; Limbu # Mn       LIMBU SMALL LETTER ANUSVARA
+1933..1938    ; Limbu # Mc   [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
+1939..193B    ; Limbu # Mn   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+1940          ; Limbu # So       LIMBU SIGN LOO
+1944..1945    ; Limbu # Po   [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+1946..194F    ; Limbu # Nd  [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
+
+# Total code points: 66
+
+# ================================================
+
+1950..196D    ; Tai_Le # Lo  [30] TAI LE LETTER KA..TAI LE LETTER AI
+1970..1974    ; Tai_Le # Lo   [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
+
+# Total code points: 35
+
+# ================================================
+
+10000..1000B  ; Linear_B # Lo  [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
+1000D..10026  ; Linear_B # Lo  [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
+10028..1003A  ; Linear_B # Lo  [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
+1003C..1003D  ; Linear_B # Lo   [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
+1003F..1004D  ; Linear_B # Lo  [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
+10050..1005D  ; Linear_B # Lo  [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
+10080..100FA  ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
+
+# Total code points: 211
+
+# ================================================
+
+10380..1039D  ; Ugaritic # Lo  [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
+1039F         ; Ugaritic # Po       UGARITIC WORD DIVIDER
+
+# Total code points: 31
+
+# ================================================
+
+10450..1047F  ; Shavian # Lo  [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW
+
+# Total code points: 48
+
+# ================================================
+
+10480..1049D  ; Osmanya # Lo  [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO
+104A0..104A9  ; Osmanya # Nd  [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
+
+# Total code points: 40
+
+# ================================================
+
+10800..10805  ; Cypriot # Lo   [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
+10808         ; Cypriot # Lo       CYPRIOT SYLLABLE JO
+1080A..10835  ; Cypriot # Lo  [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
+10837..10838  ; Cypriot # Lo   [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
+1083C         ; Cypriot # Lo       CYPRIOT SYLLABLE ZA
+1083F         ; Cypriot # Lo       CYPRIOT SYLLABLE ZO
+
+# Total code points: 55
+
+# ================================================
+
+2800..28FF    ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
+
+# Total code points: 256
+
+# ================================================
+
+1A00..1A16    ; Buginese # Lo  [23] BUGINESE LETTER KA..BUGINESE LETTER HA
+1A17..1A18    ; Buginese # Mn   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A19..1A1B    ; Buginese # Mc   [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
+1A1E..1A1F    ; Buginese # Po   [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
+
+# Total code points: 30
+
+# ================================================
+
+03E2..03EF    ; Coptic # L&  [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI
+2C80..2CE4    ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
+2CE5..2CEA    ; Coptic # So   [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
+2CEB..2CEE    ; Coptic # L&   [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
+2CEF..2CF1    ; Coptic # Mn   [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+2CF9..2CFC    ; Coptic # Po   [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
+2CFD          ; Coptic # No       COPTIC FRACTION ONE HALF
+2CFE..2CFF    ; Coptic # Po   [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
+
+# Total code points: 135
+
+# ================================================
+
+1980..19AB    ; New_Tai_Lue # Lo  [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
+19B0..19C0    ; New_Tai_Lue # Mc  [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
+19C1..19C7    ; New_Tai_Lue # Lo   [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
+19C8..19C9    ; New_Tai_Lue # Mc   [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
+19D0..19DA    ; New_Tai_Lue # Nd  [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
+19DE..19DF    ; New_Tai_Lue # Po   [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
+
+# Total code points: 83
+
+# ================================================
+
+2C00..2C2E    ; Glagolitic # L&  [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C30..2C5E    ; Glagolitic # L&  [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
+
+# Total code points: 94
+
+# ================================================
+
+2D30..2D65    ; Tifinagh # Lo  [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
+2D6F          ; Tifinagh # Lm       TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+
+# Total code points: 55
+
+# ================================================
+
+A800..A801    ; Syloti_Nagri # Lo   [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
+A802          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN DVISVARA
+A803..A805    ; Syloti_Nagri # Lo   [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
+A806          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN HASANTA
+A807..A80A    ; Syloti_Nagri # Lo   [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
+A80B          ; Syloti_Nagri # Mn       SYLOTI NAGRI SIGN ANUSVARA
+A80C..A822    ; Syloti_Nagri # Lo  [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
+A823..A824    ; Syloti_Nagri # Mc   [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
+A825..A826    ; Syloti_Nagri # Mn   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+A827          ; Syloti_Nagri # Mc       SYLOTI NAGRI VOWEL SIGN OO
+A828..A82B    ; Syloti_Nagri # So   [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
+
+# Total code points: 44
+
+# ================================================
+
+103A0..103C3  ; Old_Persian # Lo  [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
+103C8..103CF  ; Old_Persian # Lo   [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
+103D0         ; Old_Persian # Po       OLD PERSIAN WORD DIVIDER
+103D1..103D5  ; Old_Persian # Nl   [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
+
+# Total code points: 50
+
+# ================================================
+
+10A00         ; Kharoshthi # Lo       KHAROSHTHI LETTER A
+10A01..10A03  ; Kharoshthi # Mn   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06  ; Kharoshthi # Mn   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F  ; Kharoshthi # Mn   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+10A10..10A13  ; Kharoshthi # Lo   [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
+10A15..10A17  ; Kharoshthi # Lo   [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
+10A19..10A33  ; Kharoshthi # Lo  [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
+10A38..10A3A  ; Kharoshthi # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+10A3F         ; Kharoshthi # Mn       KHAROSHTHI VIRAMA
+10A40..10A47  ; Kharoshthi # No   [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
+10A50..10A58  ; Kharoshthi # Po   [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
+
+# Total code points: 65
+
+# ================================================
+
+1B00..1B03    ; Balinese # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+1B04          ; Balinese # Mc       BALINESE SIGN BISAH
+1B05..1B33    ; Balinese # Lo  [47] BALINESE LETTER AKARA..BALINESE LETTER HA
+1B34          ; Balinese # Mn       BALINESE SIGN REREKAN
+1B35          ; Balinese # Mc       BALINESE VOWEL SIGN TEDUNG
+1B36..1B3A    ; Balinese # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3B          ; Balinese # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
+1B3C          ; Balinese # Mn       BALINESE VOWEL SIGN LA LENGA
+1B3D..1B41    ; Balinese # Mc   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
+1B42          ; Balinese # Mn       BALINESE VOWEL SIGN PEPET
+1B43..1B44    ; Balinese # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+1B45..1B4B    ; Balinese # Lo   [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
+1B50..1B59    ; Balinese # Nd  [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
+1B5A..1B60    ; Balinese # Po   [7] BALINESE PANTI..BALINESE PAMENENG
+1B61..1B6A    ; Balinese # So  [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
+1B6B..1B73    ; Balinese # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1B74..1B7C    ; Balinese # So   [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
+
+# Total code points: 121
+
+# ================================================
+
+12000..1236E  ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
+12400..12462  ; Cuneiform # Nl  [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
+12470..12473  ; Cuneiform # Po   [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
+
+# Total code points: 982
+
+# ================================================
+
+10900..10915  ; Phoenician # Lo  [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
+10916..1091B  ; Phoenician # No   [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
+1091F         ; Phoenician # Po       PHOENICIAN WORD SEPARATOR
+
+# Total code points: 29
+
+# ================================================
+
+A840..A873    ; Phags_Pa # Lo  [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
+A874..A877    ; Phags_Pa # Po   [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
+
+# Total code points: 56
+
+# ================================================
+
+07C0..07C9    ; Nko # Nd  [10] NKO DIGIT ZERO..NKO DIGIT NINE
+07CA..07EA    ; Nko # Lo  [33] NKO LETTER A..NKO LETTER JONA RA
+07EB..07F3    ; Nko # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+07F4..07F5    ; Nko # Lm   [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
+07F6          ; Nko # So       NKO SYMBOL OO DENNEN
+07F7..07F9    ; Nko # Po   [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
+07FA          ; Nko # Lm       NKO LAJANYALAN
+
+# Total code points: 59
+
+# ================================================
+
+1B80..1B81    ; Sundanese # Mn   [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+1B82          ; Sundanese # Mc       SUNDANESE SIGN PANGWISAD
+1B83..1BA0    ; Sundanese # Lo  [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
+1BA1          ; Sundanese # Mc       SUNDANESE CONSONANT SIGN PAMINGKAL
+1BA2..1BA5    ; Sundanese # Mn   [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+1BA6..1BA7    ; Sundanese # Mc   [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
+1BA8..1BA9    ; Sundanese # Mn   [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAA          ; Sundanese # Mc       SUNDANESE SIGN PAMAAEH
+1BAE..1BAF    ; Sundanese # Lo   [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
+1BB0..1BB9    ; Sundanese # Nd  [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
+
+# Total code points: 55
+
+# ================================================
+
+1C00..1C23    ; Lepcha # Lo  [36] LEPCHA LETTER KA..LEPCHA LETTER A
+1C24..1C2B    ; Lepcha # Mc   [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
+1C2C..1C33    ; Lepcha # Mn   [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+1C34..1C35    ; Lepcha # Mc   [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
+1C36..1C37    ; Lepcha # Mn   [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+1C3B..1C3F    ; Lepcha # Po   [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
+1C40..1C49    ; Lepcha # Nd  [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
+1C4D..1C4F    ; Lepcha # Lo   [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
+
+# Total code points: 74
+
+# ================================================
+
+1C50..1C59    ; Ol_Chiki # Nd  [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
+1C5A..1C77    ; Ol_Chiki # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
+1C78..1C7D    ; Ol_Chiki # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
+1C7E..1C7F    ; Ol_Chiki # Po   [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+
+# Total code points: 48
+
+# ================================================
+
+A500..A60B    ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
+A60C          ; Vai # Lm       VAI SYLLABLE LENGTHENER
+A60D..A60F    ; Vai # Po   [3] VAI COMMA..VAI QUESTION MARK
+A610..A61F    ; Vai # Lo  [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
+A620..A629    ; Vai # Nd  [10] VAI DIGIT ZERO..VAI DIGIT NINE
+A62A..A62B    ; Vai # Lo   [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
+
+# Total code points: 300
+
+# ================================================
+
+A880..A881    ; Saurashtra # Mc   [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
+A882..A8B3    ; Saurashtra # Lo  [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
+A8B4..A8C3    ; Saurashtra # Mc  [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
+A8C4          ; Saurashtra # Mn       SAURASHTRA SIGN VIRAMA
+A8CE..A8CF    ; Saurashtra # Po   [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+A8D0..A8D9    ; Saurashtra # Nd  [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
+
+# Total code points: 81
+
+# ================================================
+
+A900..A909    ; Kayah_Li # Nd  [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
+A90A..A925    ; Kayah_Li # Lo  [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
+A926..A92D    ; Kayah_Li # Mn   [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+A92E..A92F    ; Kayah_Li # Po   [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
+
+# Total code points: 48
+
+# ================================================
+
+A930..A946    ; Rejang # Lo  [23] REJANG LETTER KA..REJANG LETTER A
+A947..A951    ; Rejang # Mn  [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A952..A953    ; Rejang # Mc   [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+A95F          ; Rejang # Po       REJANG SECTION MARK
+
+# Total code points: 37
+
+# ================================================
+
+10280..1029C  ; Lycian # Lo  [29] LYCIAN LETTER A..LYCIAN LETTER X
+
+# Total code points: 29
+
+# ================================================
+
+102A0..102D0  ; Carian # Lo  [49] CARIAN LETTER A..CARIAN LETTER UUU3
+
+# Total code points: 49
+
+# ================================================
+
+10920..10939  ; Lydian # Lo  [26] LYDIAN LETTER A..LYDIAN LETTER C
+1093F         ; Lydian # Po       LYDIAN TRIANGULAR MARK
+
+# Total code points: 27
+
+# ================================================
+
+AA00..AA28    ; Cham # Lo  [41] CHAM LETTER A..CHAM LETTER HA
+AA29..AA2E    ; Cham # Mn   [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+AA2F..AA30    ; Cham # Mc   [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
+AA31..AA32    ; Cham # Mn   [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+AA33..AA34    ; Cham # Mc   [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
+AA35..AA36    ; Cham # Mn   [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+AA40..AA42    ; Cham # Lo   [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
+AA43          ; Cham # Mn       CHAM CONSONANT SIGN FINAL NG
+AA44..AA4B    ; Cham # Lo   [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
+AA4C          ; Cham # Mn       CHAM CONSONANT SIGN FINAL M
+AA4D          ; Cham # Mc       CHAM CONSONANT SIGN FINAL H
+AA50..AA59    ; Cham # Nd  [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
+AA5C..AA5F    ; Cham # Po   [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
+
+# Total code points: 83
+
+# ================================================
+
+1A20..1A54    ; Tai_Tham # Lo  [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
+1A55          ; Tai_Tham # Mc       TAI THAM CONSONANT SIGN MEDIAL RA
+1A56          ; Tai_Tham # Mn       TAI THAM CONSONANT SIGN MEDIAL LA
+1A57          ; Tai_Tham # Mc       TAI THAM CONSONANT SIGN LA TANG LAI
+1A58..1A5E    ; Tai_Tham # Mn   [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+1A60          ; Tai_Tham # Mn       TAI THAM SIGN SAKOT
+1A61          ; Tai_Tham # Mc       TAI THAM VOWEL SIGN A
+1A62          ; Tai_Tham # Mn       TAI THAM VOWEL SIGN MAI SAT
+1A63..1A64    ; Tai_Tham # Mc   [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
+1A65..1A6C    ; Tai_Tham # Mn   [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+1A6D..1A72    ; Tai_Tham # Mc   [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
+1A73..1A7C    ; Tai_Tham # Mn  [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
+1A7F          ; Tai_Tham # Mn       TAI THAM COMBINING CRYPTOGRAMMIC DOT
+1A80..1A89    ; Tai_Tham # Nd  [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
+1A90..1A99    ; Tai_Tham # Nd  [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
+1AA0..1AA6    ; Tai_Tham # Po   [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
+1AA7          ; Tai_Tham # Lm       TAI THAM SIGN MAI YAMOK
+1AA8..1AAD    ; Tai_Tham # Po   [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
+
+# Total code points: 127
+
+# ================================================
+
+AA80..AAAF    ; Tai_Viet # Lo  [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O
+AAB0          ; Tai_Viet # Mn       TAI VIET MAI KANG
+AAB1          ; Tai_Viet # Lo       TAI VIET VOWEL AA
+AAB2..AAB4    ; Tai_Viet # Mn   [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+AAB5..AAB6    ; Tai_Viet # Lo   [2] TAI VIET VOWEL E..TAI VIET VOWEL O
+AAB7..AAB8    ; Tai_Viet # Mn   [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+AAB9..AABD    ; Tai_Viet # Lo   [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
+AABE..AABF    ; Tai_Viet # Mn   [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
+AAC0          ; Tai_Viet # Lo       TAI VIET TONE MAI NUENG
+AAC1          ; Tai_Viet # Mn       TAI VIET TONE MAI THO
+AAC2          ; Tai_Viet # Lo       TAI VIET TONE MAI SONG
+AADB..AADC    ; Tai_Viet # Lo   [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
+AADD          ; Tai_Viet # Lm       TAI VIET SYMBOL SAM
+AADE..AADF    ; Tai_Viet # Po   [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
+
+# Total code points: 72
+
+# ================================================
+
+10B00..10B35  ; Avestan # Lo  [54] AVESTAN LETTER A..AVESTAN LETTER HE
+10B39..10B3F  ; Avestan # Po   [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
+
+# Total code points: 61
+
+# ================================================
+
+13000..1342E  ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+
+# Total code points: 1071
+
+# ================================================
+
+0800..0815    ; Samaritan # Lo  [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
+0816..0819    ; Samaritan # Mn   [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
+081A          ; Samaritan # Lm       SAMARITAN MODIFIER LETTER EPENTHETIC YUT
+081B..0823    ; Samaritan # Mn   [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+0824          ; Samaritan # Lm       SAMARITAN MODIFIER LETTER SHORT A
+0825..0827    ; Samaritan # Mn   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+0828          ; Samaritan # Lm       SAMARITAN MODIFIER LETTER I
+0829..082D    ; Samaritan # Mn   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
+0830..083E    ; Samaritan # Po  [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
+
+# Total code points: 61
+
+# ================================================
+
+A4D0..A4F7    ; Lisu # Lo  [40] LISU LETTER BA..LISU LETTER OE
+A4F8..A4FD    ; Lisu # Lm   [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
+A4FE..A4FF    ; Lisu # Po   [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
+
+# Total code points: 48
+
+# ================================================
+
+A6A0..A6E5    ; Bamum # Lo  [70] BAMUM LETTER A..BAMUM LETTER KI
+A6E6..A6EF    ; Bamum # Nl  [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
+A6F0..A6F1    ; Bamum # Mn   [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A6F2..A6F7    ; Bamum # Po   [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
+
+# Total code points: 88
+
+# ================================================
+
+A980..A982    ; Javanese # Mn   [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+A983          ; Javanese # Mc       JAVANESE SIGN WIGNYAN
+A984..A9B2    ; Javanese # Lo  [47] JAVANESE LETTER A..JAVANESE LETTER HA
+A9B3          ; Javanese # Mn       JAVANESE SIGN CECAK TELU
+A9B4..A9B5    ; Javanese # Mc   [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
+A9B6..A9B9    ; Javanese # Mn   [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+A9BA..A9BB    ; Javanese # Mc   [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
+A9BC          ; Javanese # Mn       JAVANESE VOWEL SIGN PEPET
+A9BD..A9C0    ; Javanese # Mc   [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
+A9C1..A9CD    ; Javanese # Po  [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
+A9CF          ; Javanese # Lm       JAVANESE PANGRANGKEP
+A9D0..A9D9    ; Javanese # Nd  [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
+A9DE..A9DF    ; Javanese # Po   [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
+
+# Total code points: 91
+
+# ================================================
+
+ABC0..ABE2    ; Meetei_Mayek # Lo  [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
+ABE3..ABE4    ; Meetei_Mayek # Mc   [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
+ABE5          ; Meetei_Mayek # Mn       MEETEI MAYEK VOWEL SIGN ANAP
+ABE6..ABE7    ; Meetei_Mayek # Mc   [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
+ABE8          ; Meetei_Mayek # Mn       MEETEI MAYEK VOWEL SIGN UNAP
+ABE9..ABEA    ; Meetei_Mayek # Mc   [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
+ABEB          ; Meetei_Mayek # Po       MEETEI MAYEK CHEIKHEI
+ABEC          ; Meetei_Mayek # Mc       MEETEI MAYEK LUM IYEK
+ABED          ; Meetei_Mayek # Mn       MEETEI MAYEK APUN IYEK
+ABF0..ABF9    ; Meetei_Mayek # Nd  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
+
+# Total code points: 56
+
+# ================================================
+
+10840..10855  ; Imperial_Aramaic # Lo  [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW
+10857         ; Imperial_Aramaic # Po       IMPERIAL ARAMAIC SECTION SIGN
+10858..1085F  ; Imperial_Aramaic # No   [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
+
+# Total code points: 31
+
+# ================================================
+
+10A60..10A7C  ; Old_South_Arabian # Lo  [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
+10A7D..10A7E  ; Old_South_Arabian # No   [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
+10A7F         ; Old_South_Arabian # Po       OLD SOUTH ARABIAN NUMERIC INDICATOR
+
+# Total code points: 32
+
+# ================================================
+
+10B40..10B55  ; Inscriptional_Parthian # Lo  [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
+10B58..10B5F  ; Inscriptional_Parthian # No   [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
+
+# Total code points: 30
+
+# ================================================
+
+10B60..10B72  ; Inscriptional_Pahlavi # Lo  [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
+10B78..10B7F  ; Inscriptional_Pahlavi # No   [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
+
+# Total code points: 27
+
+# ================================================
+
+10C00..10C48  ; Old_Turkic # Lo  [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
+
+# Total code points: 73
+
+# ================================================
+
+11080..11081  ; Kaithi # Mn   [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
+11082         ; Kaithi # Mc       KAITHI SIGN VISARGA
+11083..110AF  ; Kaithi # Lo  [45] KAITHI LETTER A..KAITHI LETTER HA
+110B0..110B2  ; Kaithi # Mc   [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
+110B3..110B6  ; Kaithi # Mn   [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+110B7..110B8  ; Kaithi # Mc   [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
+110B9..110BA  ; Kaithi # Mn   [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+110BB..110BC  ; Kaithi # Po   [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
+110BD         ; Kaithi # Cf       KAITHI NUMBER SIGN
+110BE..110C1  ; Kaithi # Po   [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+
+# Total code points: 66
+
+# EOF
--- a/jdk/test/java/util/regex/RegExTest.java	Tue May 18 13:12:46 2010 -0700
+++ b/jdk/test/java/util/regex/RegExTest.java	Tue May 18 15:36:47 2010 -0700
@@ -32,7 +32,7 @@
  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
- * 6350801 6676425 6878475 6919132 6931676
+ * 6350801 6676425 6878475 6919132 6931676 6948903
  */
 
 import java.util.regex.*;
@@ -135,7 +135,7 @@
         surrogatesInClassTest();
         namedGroupCaptureTest();
         nonBmpClassComplementTest();
-
+        unicodePropertiesTest();
         if (failure)
             throw new RuntimeException("Failure in the RE handling.");
         else
@@ -3515,7 +3515,7 @@
         report("NamedGroupCapture");
     }
 
-    // This is for bug 6919132
+    // This is for bug 6969132
     private static void nonBmpClassComplementTest() throws Exception {
         Pattern p = Pattern.compile("\\P{Lu}");
         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
@@ -3539,4 +3539,79 @@
         report("NonBmpClassComplement");
     }
 
+    private static void unicodePropertiesTest() throws Exception {
+        // different forms
+        if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
+            !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
+            !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
+            !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
+            !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
+            !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
+            !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
+            failCount++;
+
+        Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
+        Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
+        Matcher lastSM  = common;
+        Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
+
+        Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
+        Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
+        Matcher lastBM = latin;
+        Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
+
+        for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
+            if (cp >= 0x30000 && (cp & 0x70) == 0){
+                continue;  // only pick couple code points, they are the same
+            }
+
+            // Unicode Script
+            Character.UnicodeScript script = Character.UnicodeScript.of(cp);
+            Matcher m;
+            String str = new String(Character.toChars(cp));
+            if (script == lastScript) {
+                 m = lastSM;
+                 m.reset(str);
+            } else {
+                 m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
+            }
+            if (!m.matches()) {
+                failCount++;
+            }
+            Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
+            other.reset(str);
+            if (other.matches()) {
+                failCount++;
+            }
+            lastSM = m;
+            lastScript = script;
+
+            // Unicode Block
+            Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
+            if (block == null) {
+                //System.out.printf("Not a Block: cp=%x%n", cp);
+                continue;
+            }
+            if (block == lastBlock) {
+                 m = lastBM;
+                 m.reset(str);
+            } else {
+                 m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
+            }
+            if (!m.matches()) {
+                failCount++;
+            }
+            other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
+            other.reset(str);
+            if (other.matches()) {
+                failCount++;
+            }
+            lastBM = m;
+            lastBlock = block;
+        }
+        report("unicodeProperties");
+    }
 }