author | sherman |
Tue, 18 May 2010 15:36:47 -0700 | |
changeset 5610 | fd2427610c7f |
parent 5609 | cc144006eb2a |
child 5611 | b4ef86f105ec |
--- a/jdk/make/java/java/FILES_java.gmk Tue May 18 13:12:46 2010 -0700 +++ b/jdk/make/java/java/FILES_java.gmk Tue May 18 15:36:47 2010 -0700 @@ -34,6 +34,7 @@ java/lang/Thread.java \ java/lang/Character.java \ java/lang/CharacterData.java \ + java/lang/CharacterName.java \ sun/misc/ASCIICaseInsensitiveComparator.java \ sun/misc/VM.java \ sun/misc/Signal.java \
--- a/jdk/make/java/java/Makefile Tue May 18 13:12:46 2010 -0700 +++ b/jdk/make/java/java/Makefile Tue May 18 15:36:47 2010 -0700 @@ -385,6 +385,27 @@ $(RM) $(GENSRCDIR)/java/lang/CharacterDataPrivateUse.java # +# Rules to generate classes/java/lang/uniName.dat +# + + + +UNINAME = $(CLASSBINDIR)/java/lang/uniName.dat +GENERATEUNINAME_JARFILE = $(BUILDTOOLJARDIR)/generatecharacter.jar + +build: $(UNINAME) + +$(UNINAME): $(UNICODEDATA)/UnicodeData.txt \ + $(GENERATECHARACTER_JARFILE) + @$(prep-target) + $(BOOT_JAVA_CMD) -classpath $(GENERATECHARACTER_JARFILE) \ + build.tools.generatecharacter.CharacterName \ + $(UNICODEDATA)/UnicodeData.txt $(UNINAME) + +clean:: + $(RM) $(UNINAME) + +# # End of rules to create $(GENSRCDIR)/java/lang/CharacterDataXX.java #
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/make/tools/UnicodeData/Scripts.txt Tue May 18 15:36:47 2010 -0700 @@ -0,0 +1,1972 @@ +# Scripts-5.2.0.txt +# Date: 2009-08-22, 04:58:43 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2009 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +# Property: Script + +# All code points not explicitly listed for Script +# have the value Unknown (Zzzz). + +# @missing: 0000..10FFFF; Unknown + +# ================================================ + +0000..001F ; Common # Cc [32] <control-0000>..<control-001F> +0020 ; Common # Zs SPACE +0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Common # Sc DOLLAR SIGN +0025..0027 ; Common # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Common # Ps LEFT PARENTHESIS +0029 ; Common # Pe RIGHT PARENTHESIS +002A ; Common # Po ASTERISK +002B ; Common # Sm PLUS SIGN +002C ; Common # Po COMMA +002D ; Common # Pd HYPHEN-MINUS +002E..002F ; Common # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Common # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Common # Po [2] COLON..SEMICOLON +003C..003E ; Common # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Common # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Common # Ps LEFT SQUARE BRACKET +005C ; Common # Po REVERSE SOLIDUS +005D ; Common # Pe RIGHT SQUARE BRACKET +005E ; Common # Sk CIRCUMFLEX ACCENT +005F ; Common # Pc LOW LINE +0060 ; Common # Sk GRAVE ACCENT +007B ; Common # Ps LEFT CURLY BRACKET +007C ; Common # Sm VERTICAL LINE +007D ; Common # Pe RIGHT CURLY BRACKET +007E ; Common # Sm TILDE +007F..009F ; Common # Cc [33] <control-007F>..<control-009F> +00A0 ; Common # Zs NO-BREAK SPACE +00A1 ; Common # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN +00A6..00A7 ; Common # So [2] BROKEN BAR..SECTION SIGN +00A8 ; Common # Sk DIAERESIS +00A9 ; Common # So COPYRIGHT SIGN +00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Common # Sm NOT SIGN +00AD ; Common # Cf SOFT HYPHEN +00AE ; Common # So REGISTERED SIGN +00AF ; Common # Sk MACRON +00B0 ; Common # So DEGREE SIGN +00B1 ; Common # Sm PLUS-MINUS SIGN +00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; Common # Sk ACUTE ACCENT +00B5 ; Common # L& MICRO SIGN +00B6 ; Common # So PILCROW SIGN +00B7 ; Common # Po MIDDLE DOT +00B8 ; Common # Sk CEDILLA +00B9 ; Common # No SUPERSCRIPT ONE +00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; Common # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; Common # Po INVERTED QUESTION MARK +00D7 ; Common # Sm MULTIPLICATION SIGN +00F7 ; Common # Sm DIVISION SIGN +02B9..02C1 ; Common # Lm [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Common # Lm MODIFIER LETTER VOICING +02ED ; Common # Sk MODIFIER LETTER UNASPIRATED +02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Common # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0374 ; Common # Lm GREEK NUMERAL SIGN +037E ; Common # Po GREEK QUESTION MARK +0385 ; Common # Sk GREEK DIALYTIKA TONOS +0387 ; Common # Po GREEK ANO TELEIA +0589 ; Common # Po ARMENIAN FULL STOP +0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +060C ; Common # Po ARABIC COMMA +061B ; Common # Po ARABIC SEMICOLON +061F ; Common # Po ARABIC QUESTION MARK +0640 ; Common # Lm ARABIC TATWEEL +0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +06DD ; Common # Cf ARABIC END OF AYAH +0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN +0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT +0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR +16EB..16ED ; Common # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Common # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1802..1803 ; Common # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP +1805 ; Common # Po MONGOLIAN FOUR DOTS +1CD3 ; Common # Po VEDIC SIGN NIHSHVASA +1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF2 ; Common # Mc VEDIC SIGN ARDHAVISARGA +2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE +200B ; Common # Cf ZERO WIDTH SPACE +200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2010..2015 ; Common # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Common # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Common # Pi LEFT SINGLE QUOTATION MARK +2019 ; Common # Pf RIGHT SINGLE QUOTATION MARK +201A ; Common # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Common # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Common # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Common # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Common # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Common # Po [8] DAGGER..HYPHENATION POINT +2028 ; Common # Zl LINE SEPARATOR +2029 ; Common # Zp PARAGRAPH SEPARATOR +202A..202E ; Common # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +202F ; Common # Zs NARROW NO-BREAK SPACE +2030..2038 ; Common # Po [9] PER MILLE SIGN..CARET +2039 ; Common # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Common # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Common # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; Common # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; Common # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Common # Sm FRACTION SLASH +2045 ; Common # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Common # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Common # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Common # Sm COMMERCIAL MINUS SIGN +2053 ; Common # Po SWUNG DASH +2054 ; Common # Pc INVERTED UNDERTIE +2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; Common # Zs MEDIUM MATHEMATICAL SPACE +2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS +206A..206F ; Common # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2070 ; Common # No SUPERSCRIPT ZERO +2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Common # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Common # Pe SUPERSCRIPT RIGHT PARENTHESIS +2080..2089 ; Common # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS +20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN +2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; Common # L& DOUBLE-STRUCK CAPITAL C +2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA +2107 ; Common # L& EULER CONSTANT +2108..2109 ; Common # So [2] SCRUPLE..DEGREE FAHRENHEIT +210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2114 ; Common # So L B BAR SYMBOL +2115 ; Common # L& DOUBLE-STRUCK CAPITAL N +2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P +2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE +2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z +2125 ; Common # So OUNCE SIGN +2127 ; Common # So INVERTED OHM SIGN +2128 ; Common # L& BLACK-LETTER CAPITAL Z +2129 ; Common # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Common # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212E ; Common # So ESTIMATED SYMBOL +212F..2131 ; Common # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Common # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Common # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Common # L& INFORMATION SOURCE +213A..213B ; Common # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; Common # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Common # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Common # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; Common # So PROPERTY LINE +214B ; Common # Sm TURNED AMPERSAND +214C..214D ; Common # So [2] PER SIGN..AKTIESELSKAB +214F ; Common # So SYMBOL FOR SAMARITAN SOURCE +2150..215F ; Common # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; Common # No VULGAR FRACTION ZERO THIRDS +2190..2194 ; Common # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Common # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Common # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Common # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Common # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Common # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Common # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Common # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Common # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Common # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Common # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Common # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Common # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Common # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Common # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Common # So DOWNWARDS DOUBLE ARROW +21D4 ; Common # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE +2308..230B ; Common # Sm [4] LEFT CEILING..RIGHT FLOOR +230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Common # So [7] FROWN..KEYBOARD +2329 ; Common # Ps LEFT-POINTING ANGLE BRACKET +232A ; Common # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Common # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Common # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Common # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL +2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA..24FF ; Common # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Common # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Common # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Common # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Common # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Common # Sm MUSIC SHARP SIGN +2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR +26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2 +26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE +26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS +2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE +270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR +2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274D ; Common # So SHADOWED WHITE CIRCLE +274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE +2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET +2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Common # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Common # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Common # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Common # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Common # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Common # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Common # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Common # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Common # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW +2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW +27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW +27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE +27CC ; Common # Sm LONG DIVISION +27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Common # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Common # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Common # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Common # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Common # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Common # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Common # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Common # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Common # Ps LEFT WHITE CURLY BRACKET +2984 ; Common # Pe RIGHT WHITE CURLY BRACKET +2985 ; Common # Ps LEFT WHITE PARENTHESIS +2986 ; Common # Pe RIGHT WHITE PARENTHESIS +2987 ; Common # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Common # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Common # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Common # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Common # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Common # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Common # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Common # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Common # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Common # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Common # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Common # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Common # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Common # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Common # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Common # Ps LEFT WIGGLY FENCE +29D9 ; Common # Pe RIGHT WIGGLY FENCE +29DA ; Common # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Common # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Common # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Common # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Common # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Common # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B50..2B59 ; Common # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE +2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Common # Po RAISED SQUARE +2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Common # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Common # Pd HYPHEN WITH DIAERESIS +2E1B ; Common # Po TILDE WITH RING ABOVE +2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Common # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Common # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Common # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Common # Ps TOP LEFT HALF BRACKET +2E23 ; Common # Pe TOP RIGHT HALF BRACKET +2E24 ; Common # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Common # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Common # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Common # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Common # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Common # Lm VERTICAL TILDE +2E30..2E31 ; Common # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3000 ; Common # Zs IDEOGRAPHIC SPACE +3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3006 ; Common # Lo IDEOGRAPHIC CLOSING MARK +3008 ; Common # Ps LEFT ANGLE BRACKET +3009 ; Common # Pe RIGHT ANGLE BRACKET +300A ; Common # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Common # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Common # Ps LEFT CORNER BRACKET +300D ; Common # Pe RIGHT CORNER BRACKET +300E ; Common # Ps LEFT WHITE CORNER BRACKET +300F ; Common # Pe RIGHT WHITE CORNER BRACKET +3010 ; Common # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Common # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Common # So [2] POSTAL MARK..GETA MARK +3014 ; Common # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Common # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Common # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Common # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Common # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Common # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Common # Ps LEFT WHITE SQUARE BRACKET +301B ; Common # Pe RIGHT WHITE SQUARE BRACKET +301C ; Common # Pd WAVE DASH +301D ; Common # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Common # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Common # So POSTAL MARK FACE +3030 ; Common # Pd WAVY DASH +3031..3035 ; Common # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; Common # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303C ; Common # Lo MASU MARK +303D ; Common # Po PART ALTERNATION MARK +303E..303F ; Common # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +309B..309C ; Common # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Common # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FB ; Common # Po KATAKANA MIDDLE DOT +30FC ; Common # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q +3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3250 ; Common # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +327F ; Common # So KOREAN STANDARD SYMBOL +3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN +3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL +4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Common # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Common # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Common # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Common # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Common # Sc NORTH INDIC RUPEE MARK +A839 ; Common # So NORTH INDIC QUANTITY MARK +FD3E ; Common # Ps ORNATE LEFT PARENTHESIS +FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS +FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM +FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; Common # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; Common # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; Common # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; Common # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; Common # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; Common # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; Common # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; Common # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Common # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; Common # Pd SMALL EM DASH +FE59 ; Common # Ps SMALL LEFT PARENTHESIS +FE5A ; Common # Pe SMALL RIGHT PARENTHESIS +FE5B ; Common # Ps SMALL LEFT CURLY BRACKET +FE5C ; Common # Pe SMALL RIGHT CURLY BRACKET +FE5D ; Common # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; Common # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; Common # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; Common # Sm SMALL PLUS SIGN +FE63 ; Common # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Common # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Common # Po SMALL REVERSE SOLIDUS +FE69 ; Common # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; Common # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FEFF ; Common # Cf ZERO WIDTH NO-BREAK SPACE +FF01..FF03 ; Common # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; Common # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; Common # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; Common # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; Common # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; Common # Po FULLWIDTH ASTERISK +FF0B ; Common # Sm FULLWIDTH PLUS SIGN +FF0C ; Common # Po FULLWIDTH COMMA +FF0D ; Common # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; Common # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; Common # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; Common # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; Common # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; Common # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3B ; Common # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; Common # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; Common # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; Common # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; Common # Pc FULLWIDTH LOW LINE +FF40 ; Common # Sk FULLWIDTH GRAVE ACCENT +FF5B ; Common # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; Common # Sm FULLWIDTH VERTICAL LINE +FF5D ; Common # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; Common # Sm FULLWIDTH TILDE +FF5F ; Common # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; Common # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; Common # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Common # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Common # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Common # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF70 ; Common # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Common # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE0..FFE1 ; Common # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; Common # Sm FULLWIDTH NOT SIGN +FFE3 ; Common # Sk FULLWIDTH MACRON +FFE4 ; Common # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; Common # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; Common # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10100..10101 ; Common # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT +10102 ; Common # So AEGEAN CHECK MARK +10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN +101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; Common # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; Common # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; Common # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1DD ; Common # So [48] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL PES SUBPUNCTIS +1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D371 ; Common # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE +1D400..1D454 ; Common # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Common # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Common # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Common # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Common # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Common # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Common # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Common # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Common # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Common # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Common # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Common # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Common # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Common # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Common # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Common # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Common # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Common # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Common # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Common # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Common # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Common # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Common # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Common # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Common # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Common # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Common # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Common # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Common # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Common # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Common # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Common # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ +1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B +1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N +1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P +1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S +1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W +1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV +1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H +1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P +1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J +1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M +1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA +1F190 ; Common # So SQUARE DJ +1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253 +1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +E0001 ; Common # Cf LANGUAGE TAG +E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 5395 + +# ================================================ + +0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Latin # L& FEMININE ORDINAL INDICATOR +00BA ; Latin # L& MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Latin # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN +1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN +1D62..1D65 ; Latin # L& [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V +1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH +1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP +2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Latin # L& TURNED CAPITAL F +214E ; Latin # L& TURNED SMALL F +2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C60..2C7C ; Latin # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J +2C7D ; Latin # Lm MODIFIER LETTER CAPITAL V +2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL +A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Latin # Lm MODIFIER LETTER US +A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO +A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M +FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z + +# Total code points: 1244 + +# ================================================ + +0370..0373 ; Greek # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0375 ; Greek # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Greek # Lm GREEK YPOGEGRAMMENI +037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0384 ; Greek # Sk GREEK TONOS +0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Greek # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Greek # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03E1 ; Greek # L& [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI +03F0..03F5 ; Greek # L& [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F6 ; Greek # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI +1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI +1D66..1D6A ; Greek # L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI +1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA +1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Greek # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Greek # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Greek # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Greek # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Greek # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Greek # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; Greek # Sk GREEK KORONIS +1FBE ; Greek # L& GREEK PROSGEGRAMMENI +1FBF..1FC1 ; Greek # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; Greek # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Greek # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; Greek # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; Greek # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Greek # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; Greek # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; Greek # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; Greek # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; Greek # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA +2126 ; Greek # L& OHM SIGN +10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A ; Greek # No GREEK ZERO SIGN +1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D245 ; Greek # So GREEK MUSICAL LEIMMA + +# Total code points: 511 + +# ================================================ + +0400..0481 ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA +0482 ; Cyrillic # So CYRILLIC THOUSANDS SIGN +0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION +0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE +0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER +1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL +1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN +2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN +A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A673 ; Cyrillic # Po SLAVONIC ASTERISK +A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67E ; Cyrillic # Po CYRILLIC KAVYKA +A67F ; Cyrillic # Lm CYRILLIC PAYEROK +A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE + +# Total code points: 404 + +# ================================================ + +0531..0556 ; Armenian # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +058A ; Armenian # Pd ARMENIAN HYPHEN +FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH + +# Total code points: 90 + +# ================================================ + +0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE ; Hebrew # Pd HEBREW PUNCTUATION MAQAF +05BF ; Hebrew # Mn HEBREW POINT RAFE +05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ +05C1..05C2 ; Hebrew # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ +05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA +05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05F0..05F2 ; Hebrew # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +FB1D ; Hebrew # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; Hebrew # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; Hebrew # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; Hebrew # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; Hebrew # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Hebrew # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Hebrew # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Hebrew # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Hebrew # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + +# Total code points: 133 + +# ================================================ + +0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; Arabic # Sc AFGHANI SIGN +060D ; Arabic # Po ARABIC DATE SEPARATOR +060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK +0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS +066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; Arabic # Po ARABIC FULL STOP +06D5 ; Arabic # Lo ARABIC LETTER AE +06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DE ; Arabic # Me ARABIC START OF RUB EL HIZB +06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06E9 ; Arabic # So ARABIC PLACE OF SAJDAH +06EA..06ED ; Arabic # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; Arabic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; Arabic # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; Arabic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V +0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; Arabic # Sc RIAL SIGN +FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS + +# Total code points: 1030 + +# ================================================ + +0700..070D ; Syriac # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F ; Syriac # Cf SYRIAC ABBREVIATION MARK +0710 ; Syriac # Lo SYRIAC LETTER ALAPH +0711 ; Syriac # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE + +# Total code points: 77 + +# ================================================ + +0780..07A5 ; Thaana # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07A6..07B0 ; Thaana # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; Thaana # Lo THAANA LETTER NAA + +# Total code points: 50 + +# ================================================ + +0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA +093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA +094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E +0950 ; Devanagari # Lo DEVANAGARI OM +0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E +0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A +0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA +A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE + +# Total code points: 140 + +# ================================================ + +0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Bengali # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Bengali # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Bengali # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Bengali # Lo BENGALI LETTER LA +09B6..09B9 ; Bengali # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; Bengali # Mn BENGALI SIGN NUKTA +09BD ; Bengali # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; Bengali # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Bengali # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Bengali # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Bengali # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; Bengali # Mn BENGALI SIGN VIRAMA +09CE ; Bengali # Lo BENGALI LETTER KHANDA TA +09D7 ; Bengali # Mc BENGALI AU LENGTH MARK +09DC..09DD ; Bengali # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Bengali # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; Bengali # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; Bengali # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; Bengali # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; Bengali # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; Bengali # So BENGALI ISSHAR +09FB ; Bengali # Sc BENGALI GANDA MARK + +# Total code points: 92 + +# ================================================ + +0A01..0A02 ; Gurmukhi # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Gurmukhi # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Gurmukhi # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Gurmukhi # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Gurmukhi # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Gurmukhi # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Gurmukhi # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Gurmukhi # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Gurmukhi # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; Gurmukhi # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; Gurmukhi # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Gurmukhi # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Gurmukhi # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; Gurmukhi # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Gurmukhi # Lo GURMUKHI LETTER FA +0A66..0A6F ; Gurmukhi # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; Gurmukhi # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; Gurmukhi # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; Gurmukhi # Mn GURMUKHI SIGN YAKASH + +# Total code points: 79 + +# ================================================ + +0A81..0A82 ; Gujarati # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Gujarati # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Gujarati # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Gujarati # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Gujarati # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Gujarati # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Gujarati # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Gujarati # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; Gujarati # Mn GUJARATI SIGN NUKTA +0ABD ; Gujarati # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Gujarati # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Gujarati # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Gujarati # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Gujarati # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; Gujarati # Mn GUJARATI SIGN VIRAMA +0AD0 ; Gujarati # Lo GUJARATI OM +0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN + +# Total code points: 83 + +# ================================================ + +0B01 ; Oriya # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Oriya # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Oriya # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Oriya # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Oriya # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Oriya # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Oriya # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Oriya # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; Oriya # Mn ORIYA SIGN NUKTA +0B3D ; Oriya # Lo ORIYA SIGN AVAGRAHA +0B3E ; Oriya # Mc ORIYA VOWEL SIGN AA +0B3F ; Oriya # Mn ORIYA VOWEL SIGN I +0B40 ; Oriya # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Oriya # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; Oriya # Mn ORIYA SIGN VIRAMA +0B56 ; Oriya # Mn ORIYA AI LENGTH MARK +0B57 ; Oriya # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; Oriya # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; Oriya # So ORIYA ISSHAR +0B71 ; Oriya # Lo ORIYA LETTER WA + +# Total code points: 84 + +# ================================================ + +0B82 ; Tamil # Mn TAMIL SIGN ANUSVARA +0B83 ; Tamil # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Tamil # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Tamil # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Tamil # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Tamil # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Tamil # Lo TAMIL LETTER JA +0B9E..0B9F ; Tamil # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Tamil # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Tamil # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Tamil # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; Tamil # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Tamil # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Tamil # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Tamil # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Tamil # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; Tamil # Mn TAMIL SIGN VIRAMA +0BD0 ; Tamil # Lo TAMIL OM +0BD7 ; Tamil # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; Tamil # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Tamil # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; Tamil # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; Tamil # Sc TAMIL RUPEE SIGN +0BFA ; Tamil # So TAMIL NUMBER SIGN + +# Total code points: 72 + +# ================================================ + +0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C33 ; Telugu # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA +0C35..0C39 ; Telugu # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA +0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Telugu # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C59 ; Telugu # Lo [2] TELUGU LETTER TSA..TELUGU LETTER DZA +0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; Telugu # So TELUGU SIGN TUUMU + +# Total code points: 93 + +# ================================================ + +0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Kannada # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Kannada # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Kannada # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; Kannada # Mn KANNADA SIGN NUKTA +0CBD ; Kannada # Lo KANNADA SIGN AVAGRAHA +0CBE ; Kannada # Mc KANNADA VOWEL SIGN AA +0CBF ; Kannada # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Kannada # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Kannada # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Kannada # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDE ; Kannada # Lo KANNADA LETTER FA +0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE + +# Total code points: 84 + +# ================================================ + +0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA +0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA +0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA +0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK +0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS +0D79 ; Malayalam # So MALAYALAM DATE MARK +0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K + +# Total code points: 95 + +# ================================================ + +0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Sinhala # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Sinhala # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Sinhala # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; Sinhala # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; Sinhala # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA + +# Total code points: 80 + +# ================================================ + +0E01..0E30 ; Thai # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; Thai # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; Thai # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; Thai # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; Thai # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Thai # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; Thai # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E4F ; Thai # Po THAI CHARACTER FONGMAN +0E50..0E59 ; Thai # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; Thai # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT + +# Total code points: 86 + +# ================================================ + +0E81..0E82 ; Lao # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Lao # Lo LAO LETTER KHO TAM +0E87..0E88 ; Lao # Lo [2] LAO LETTER NGO..LAO LETTER CO +0E8A ; Lao # Lo LAO LETTER SO TAM +0E8D ; Lao # Lo LAO LETTER NYO +0E94..0E97 ; Lao # Lo [4] LAO LETTER DO..LAO LETTER THO TAM +0E99..0E9F ; Lao # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG +0EA1..0EA3 ; Lao # Lo [3] LAO LETTER MO..LAO LETTER LO LING +0EA5 ; Lao # Lo LAO LETTER LO LOOT +0EA7 ; Lao # Lo LAO LETTER WO +0EAA..0EAB ; Lao # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG +0EAD..0EB0 ; Lao # Lo [4] LAO LETTER O..LAO VOWEL SIGN A +0EB1 ; Lao # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; Lao # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EB9 ; Lao # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Lao # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0EBD ; Lao # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Lao # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Lao # Lm LAO KO LA +0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDD ; Lao # Lo [2] LAO HO NO..LAO HO MO + +# Total code points: 65 + +# ================================================ + +0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13..0F17 ; Tibetan # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; Tibetan # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; Tibetan # So TIBETAN MARK BSDUS RTAGS +0F35 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F36 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38 ; Tibetan # So TIBETAN MARK CHE MGO +0F39 ; Tibetan # Mn TIBETAN MARK TSA -PHRU +0F3A ; Tibetan # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; Tibetan # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; Tibetan # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; Tibetan # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; Tibetan # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; Tibetan # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Tibetan # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; Tibetan # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Tibetan # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F85 ; Tibetan # Po TIBETAN MARK PALUTA +0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS +0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA + +# Total code points: 201 + +# ================================================ + +1000..102A ; Myanmar # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Myanmar # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Myanmar # Mc MYANMAR VOWEL SIGN E +1032..1037 ; Myanmar # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; Myanmar # Mc MYANMAR SIGN VISARGA +1039..103A ; Myanmar # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; Myanmar # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Myanmar # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; Myanmar # Lo MYANMAR LETTER GREAT SA +1040..1049 ; Myanmar # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; Myanmar # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; Myanmar # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; Myanmar # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; Myanmar # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; Myanmar # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Myanmar # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Myanmar # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Myanmar # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Myanmar # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; Myanmar # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; Myanmar # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Myanmar # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Myanmar # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; Myanmar # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Myanmar # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; Myanmar # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI +109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA +AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE + +# Total code points: 188 + +# ================================================ + +10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10D0..10FA ; Georgian # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR +2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE + +# Total code points: 120 + +# ================================================ + +1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN +3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U +A960..A97C ; Hangul # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +AC00..D7A3 ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Hangul # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Hangul # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +FFA0..FFBE ; Hangul # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I + +# Total code points: 11737 + +# ================================================ + +1200..1248 ; Ethiopic # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Ethiopic # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Ethiopic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Ethiopic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Ethiopic # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK +1360 ; Ethiopic # So ETHIOPIC SECTION MARK +1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +2D80..2D96 ; Ethiopic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO + +# Total code points: 461 + +# ================================================ + +13A0..13F4 ; Cherokee # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV + +# Total code points: 85 + +# ================================================ + +1400 ; Canadian_Aboriginal # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D..166E ; Canadian_Aboriginal # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP +166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S + +# Total code points: 710 + +# ================================================ + +1680 ; Ogham # Zs OGHAM SPACE MARK +1681..169A ; Ogham # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; Ogham # Ps OGHAM FEATHER MARK +169C ; Ogham # Pe OGHAM REVERSED FEATHER MARK + +# Total code points: 29 + +# ================================================ + +16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL + +# Total code points: 78 + +# ================================================ + +1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; Khmer # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; Khmer # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Khmer # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Khmer # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; Khmer # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D4..17D6 ; Khmer # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; Khmer # Lm KHMER SIGN LEK TOO +17D8..17DA ; Khmer # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; Khmer # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; Khmer # Lo KHMER SIGN AVAKRAHASANYA +17DD ; Khmer # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; Khmer # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; Khmer # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +19E0..19FF ; Khmer # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC + +# Total code points: 146 + +# ================================================ + +1800..1801 ; Mongolian # Po [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS +1804 ; Mongolian # Po MONGOLIAN COLON +1806 ; Mongolian # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Mongolian # Zs MONGOLIAN VOWEL SEPARATOR +1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA +1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA + +# Total code points: 153 + +# ================================================ + +3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI +1F200 ; Hiragana # So SQUARE HIRAGANA HOKA + +# Total code points: 90 + +# ================================================ + +30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FD..30FE ; Katakana # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK +30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO +31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO +3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO +FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N + +# Total code points: 299 + +# ================================================ + +3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH +31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H + +# Total code points: 65 + +# ================================================ + +2E80..2E99 ; Han # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Han # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +3005 ; Han # Lm IDEOGRAPHIC ITERATION MARK +3007 ; Han # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FCB ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +F900..FA2D ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D +FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 75738 + +# ================================================ + +A000..A014 ; Yi # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; Yi # Lm YI SYLLABLE WU +A016..A48C ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE + +# Total code points: 1220 + +# ================================================ + +10300..1031E ; Old_Italic # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU +10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY + +# Total code points: 35 + +# ================================================ + +10330..10340 ; Gothic # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341 ; Gothic # Nl GOTHIC LETTER NINETY +10342..10349 ; Gothic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Gothic # Nl GOTHIC LETTER NINE HUNDRED + +# Total code points: 27 + +# ================================================ + +10400..1044F ; Deseret # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW + +# Total code points: 80 + +# ================================================ + +0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA +064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW +0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF +0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA +1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Inherited # Mn VEDIC SIGN TIRYAK +1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z +1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON +101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 523 + +# ================================================ + +1700..170C ; Tagalog # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA +170E..1711 ; Tagalog # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA +1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA + +# Total code points: 20 + +# ================================================ + +1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1732..1734 ; Hanunoo # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD + +# Total code points: 21 + +# ================================================ + +1740..1751 ; Buhid # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; Buhid # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U + +# Total code points: 20 + +# ================================================ + +1760..176C ; Tagbanwa # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Tagbanwa # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; Tagbanwa # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U + +# Total code points: 18 + +# ================================================ + +1900..191C ; Limbu # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA +1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Limbu # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Limbu # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Limbu # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Limbu # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; Limbu # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1940 ; Limbu # So LIMBU SIGN LOO +1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE + +# Total code points: 66 + +# ================================================ + +1950..196D ; Tai_Le # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Tai_Le # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 + +# Total code points: 35 + +# ================================================ + +10000..1000B ; Linear_B # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Linear_B # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Linear_B # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Linear_B # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Linear_B # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Linear_B # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 + +# Total code points: 211 + +# ================================================ + +10380..1039D ; Ugaritic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; Ugaritic # Po UGARITIC WORD DIVIDER + +# Total code points: 31 + +# ================================================ + +10450..1047F ; Shavian # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW + +# Total code points: 48 + +# ================================================ + +10480..1049D ; Osmanya # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO +104A0..104A9 ; Osmanya # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE + +# Total code points: 40 + +# ================================================ + +10800..10805 ; Cypriot # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Cypriot # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Cypriot # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Cypriot # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Cypriot # Lo CYPRIOT SYLLABLE ZA +1083F ; Cypriot # Lo CYPRIOT SYLLABLE ZO + +# Total code points: 55 + +# ================================================ + +2800..28FF ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 + +# Total code points: 256 + +# ================================================ + +1A00..1A16 ; Buginese # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; Buginese # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1B ; Buginese # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE +1A1E..1A1F ; Buginese # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION + +# Total code points: 30 + +# ================================================ + +03E2..03EF ; Coptic # L& [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI +2C80..2CE4 ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; Coptic # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER + +# Total code points: 135 + +# ================================================ + +1980..19AB ; New_Tai_Lue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY +19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B +19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 +19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE +19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV + +# Total code points: 83 + +# ================================================ + +2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE + +# Total code points: 94 + +# ================================================ + +2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK + +# Total code points: 55 + +# ================================================ + +A800..A801 ; Syloti_Nagri # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A802 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; Syloti_Nagri # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; Syloti_Nagri # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; Syloti_Nagri # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 + +# Total code points: 44 + +# ================================================ + +103A0..103C3 ; Old_Persian # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Old_Persian # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; Old_Persian # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; Old_Persian # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED + +# Total code points: 50 + +# ================================================ + +10A00 ; Kharoshthi # Lo KHAROSHTHI LETTER A +10A01..10A03 ; Kharoshthi # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Kharoshthi # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Kharoshthi # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; Kharoshthi # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Kharoshthi # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A33 ; Kharoshthi # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA +10A38..10A3A ; Kharoshthi # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Kharoshthi # Mn KHAROSHTHI VIRAMA +10A40..10A47 ; Kharoshthi # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND +10A50..10A58 ; Kharoshthi # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES + +# Total code points: 65 + +# ================================================ + +1B00..1B03 ; Balinese # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Balinese # Mc BALINESE SIGN BISAH +1B05..1B33 ; Balinese # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; Balinese # Mn BALINESE SIGN REREKAN +1B35 ; Balinese # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Balinese # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Balinese # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Balinese # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4B ; Balinese # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK +1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING + +# Total code points: 121 + +# ================================================ + +12000..1236E ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM +12400..12462 ; Cuneiform # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER +12470..12473 ; Cuneiform # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON + +# Total code points: 982 + +# ================================================ + +10900..10915 ; Phoenician # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; Phoenician # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; Phoenician # Po PHOENICIAN WORD SEPARATOR + +# Total code points: 29 + +# ================================================ + +A840..A873 ; Phags_Pa # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD + +# Total code points: 56 + +# ================================================ + +07C0..07C9 ; Nko # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; Nko # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; Nko # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Nko # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; Nko # So NKO SYMBOL OO DENNEN +07F7..07F9 ; Nko # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; Nko # Lm NKO LAJANYALAN + +# Total code points: 59 + +# ================================================ + +1B80..1B81 ; Sundanese # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Sundanese # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Sundanese # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Sundanese # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Sundanese # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH +1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE + +# Total code points: 55 + +# ================================================ + +1C00..1C23 ; Lepcha # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Lepcha # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Lepcha # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Lepcha # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; Lepcha # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C3B..1C3F ; Lepcha # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; Lepcha # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; Lepcha # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA + +# Total code points: 74 + +# ================================================ + +1C50..1C59 ; Ol_Chiki # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; Ol_Chiki # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Ol_Chiki # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; Ol_Chiki # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD + +# Total code points: 48 + +# ================================================ + +A500..A60B ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Vai # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; Vai # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; Vai # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; Vai # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO + +# Total code points: 300 + +# ================================================ + +A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA +A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE + +# Total code points: 81 + +# ================================================ + +A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A92E..A92F ; Kayah_Li # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA + +# Total code points: 48 + +# ================================================ + +A930..A946 ; Rejang # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; Rejang # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; Rejang # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; Rejang # Po REJANG SECTION MARK + +# Total code points: 37 + +# ================================================ + +10280..1029C ; Lycian # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X + +# Total code points: 29 + +# ================================================ + +102A0..102D0 ; Carian # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 + +# Total code points: 49 + +# ================================================ + +10920..10939 ; Lydian # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; Lydian # Po LYDIAN TRIANGULAR MARK + +# Total code points: 27 + +# ================================================ + +AA00..AA28 ; Cham # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; Cham # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Cham # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Cham # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Cham # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Cham # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; Cham # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; Cham # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; Cham # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; Cham # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Cham # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; Cham # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; Cham # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA + +# Total code points: 83 + +# ================================================ + +1A20..1A54 ; Tai_Tham # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Tai_Tham # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Tai_Tham # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Tai_Tham # Mn TAI THAM SIGN SAKOT +1A61 ; Tai_Tham # Mc TAI THAM VOWEL SIGN A +1A62 ; Tai_Tham # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Tai_Tham # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Tai_Tham # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Tai_Tham # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; Tai_Tham # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Tai_Tham # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; Tai_Tham # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Tai_Tham # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; Tai_Tham # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; Tai_Tham # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; Tai_Tham # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG + +# Total code points: 127 + +# ================================================ + +AA80..AAAF ; Tai_Viet # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O +AAB0 ; Tai_Viet # Mn TAI VIET MAI KANG +AAB1 ; Tai_Viet # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; Tai_Viet # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; Tai_Viet # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; Tai_Viet # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; Tai_Viet # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; Tai_Viet # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; Tai_Viet # Lo TAI VIET TONE MAI NUENG +AAC1 ; Tai_Viet # Mn TAI VIET TONE MAI THO +AAC2 ; Tai_Viet # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Tai_Viet # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Tai_Viet # Lm TAI VIET SYMBOL SAM +AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI + +# Total code points: 72 + +# ================================================ + +10B00..10B35 ; Avestan # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; Avestan # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION + +# Total code points: 61 + +# ================================================ + +13000..1342E ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 + +# Total code points: 1071 + +# ================================================ + +0800..0815 ; Samaritan # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; Samaritan # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; Samaritan # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Samaritan # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Samaritan # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Samaritan # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Samaritan # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; Samaritan # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0830..083E ; Samaritan # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU + +# Total code points: 61 + +# ================================================ + +A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP + +# Total code points: 48 + +# ================================================ + +A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK + +# Total code points: 88 + +# ================================================ + +A980..A982 ; Javanese # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Javanese # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Javanese # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; Javanese # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; Javanese # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Javanese # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC ; Javanese # Mn JAVANESE VOWEL SIGN PEPET +A9BD..A9C0 ; Javanese # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON +A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; Javanese # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN + +# Total code points: 91 + +# ================================================ + +ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; Meetei_Mayek # Po MEETEI MAYEK CHEIKHEI +ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK +ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE + +# Total code points: 56 + +# ================================================ + +10840..10855 ; Imperial_Aramaic # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW +10857 ; Imperial_Aramaic # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; Imperial_Aramaic # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND + +# Total code points: 31 + +# ================================================ + +10A60..10A7C ; Old_South_Arabian # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; Old_South_Arabian # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; Old_South_Arabian # Po OLD SOUTH ARABIAN NUMERIC INDICATOR + +# Total code points: 32 + +# ================================================ + +10B40..10B55 ; Inscriptional_Parthian # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; Inscriptional_Parthian # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND + +# Total code points: 30 + +# ================================================ + +10B60..10B72 ; Inscriptional_Pahlavi # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; Inscriptional_Pahlavi # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND + +# Total code points: 27 + +# ================================================ + +10C00..10C48 ; Old_Turkic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH + +# Total code points: 73 + +# ================================================ + +11080..11081 ; Kaithi # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; Kaithi # Mc KAITHI SIGN VISARGA +11083..110AF ; Kaithi # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Kaithi # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Kaithi # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Kaithi # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; Kaithi # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; Kaithi # Cf KAITHI NUMBER SIGN +110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA + +# Total code points: 66 + +# EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/make/tools/src/build/tools/generatecharacter/CharacterName.java Tue May 18 15:36:47 2010 -0700 @@ -0,0 +1,100 @@ +package build.tools.generatecharacter; + +import java.io.*; +import java.nio.*; +import java.util.*; +import java.util.zip.*; + +public class CharacterName { + + public static void main(String[] args) { + FileReader reader = null; + try { + if (args.length != 2) { + System.err.println("Usage: java CharacterName UniocdeData.txt uniName.dat"); + System.exit(1); + } + + reader = new FileReader(args[0]); + BufferedReader bfr = new BufferedReader(reader); + String line = null; + + StringBuilder namePool = new StringBuilder(); + byte[] cpPoolBytes = new byte[0x100000]; + ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes); + int lastCp = 0; + int cpNum = 0; + + while ((line = bfr.readLine()) != null) { + if (line.startsWith("#")) + continue; + UnicodeSpec spec = UnicodeSpec.parse(line); + if (spec != null) { + int cp = spec.getCodePoint(); + String name = spec.getName(); + cpNum++; + if (name.equals("<control>") && spec.getOldName() != null) { + if (spec.getOldName().length() != 0) + name = spec.getOldName(); + else + continue; + } else if (name.startsWith("<")) { + /* + 3400 <CJK Ideograph Extension A, First> + 4db5 <CJK Ideograph Extension A, Last> + 4e00 <CJK Ideograph, First> + 9fc3 <CJK Ideograph, Last> + ac00 <Hangul Syllable, First> + d7a3 <Hangul Syllable, Last> + d800 <Non Private Use High Surrogate, First> + db7f <Non Private Use High Surrogate, Last> + db80 <Private Use High Surrogate, First> + dbff <Private Use High Surrogate, Last> + dc00 <Low Surrogate, First> + dfff <Low Surrogate, Last> + e000 <Private Use, First> + f8ff <Private Use, Last> + 20000 <CJK Ideograph Extension B, First> + 2a6d6 <CJK Ideograph Extension B, Last> + f0000 <Plane 15 Private Use, First> + ffffd <Plane 15 Private Use, Last> + */ + continue; + } + + if (cp == lastCp + 1) { + cpBB.put((byte)name.length()); + } else { + cpBB.put((byte)0); // segment start flag + cpBB.putInt((name.length() << 24) | (cp & 0xffffff)); + } + namePool.append(name); + lastCp = cp; + } + } + + byte[] namePoolBytes = namePool.toString().getBytes("ASCII"); + int cpLen = cpBB.position(); + int total = cpLen + namePoolBytes.length; + + DataOutputStream dos = new DataOutputStream( + new DeflaterOutputStream( + new FileOutputStream(args[1]))); + dos.writeInt(total); // total + dos.writeInt(cpLen); // nameOff + dos.write(cpPoolBytes, 0, cpLen); + dos.write(namePoolBytes); + dos.close(); + + } catch (Throwable e) { + System.out.println("Unexpected exception:"); + e.printStackTrace(); + } finally { + if (reader != null) { + try { + reader.close(); + } catch (Throwable ee) { ee.printStackTrace(); } + } + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/make/tools/src/build/tools/generatecharacter/CharacterScript.java Tue May 18 15:36:47 2010 -0700 @@ -0,0 +1,214 @@ +import java.util.regex.*; +import java.util.*; +import java.io.*; + +public class CharacterScript { + + // generate the code needed for j.l.C.UnicodeScript + static void fortest(String fmt, Object... o) { + //System.out.printf(fmt, o); + } + + static void print(String fmt, Object... o) { + System.out.printf(fmt, o); + } + + static void debug(String fmt, Object... o) { + //System.out.printf(fmt, o); + } + + public static void main(String args[]){ + try { + if (args.length != 1) { + System.out.println("java CharacterScript script.txt out"); + System.exit(1); + } + + int i, j; + BufferedReader sbfr = new BufferedReader(new FileReader(args[0])); + HashMap<String,Integer> scriptMap = new HashMap<String,Integer>(); + String line = null; + + Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); + + int prevS = -1; + int prevE = -1; + String prevN = null; + int[][] scripts = new int[1024][3]; + int scriptSize = 0; + + while ((line = sbfr.readLine()) != null) { + if (line.length() <= 1 || line.charAt(0) == '#') { + continue; + } + m.reset(line); + if (m.matches()) { + int start = Integer.parseInt(m.group(1), 16); + int end = (m.group(2)==null)?start + :Integer.parseInt(m.group(2), 16); + String name = m.group(3); + if (name.equals(prevN) && start == prevE + 1) { + prevE = end; + } else { + if (prevS != -1) { + if (scriptMap.get(prevN) == null) { + scriptMap.put(prevN, scriptMap.size()); + } + scripts[scriptSize][0] = prevS; + scripts[scriptSize][1] = prevE; + scripts[scriptSize][2] = scriptMap.get(prevN); + scriptSize++; + } + debug("%x-%x\t%s%n", prevS, prevE, prevN); + prevS = start; prevE = end; prevN = name; + } + } else { + debug("Warning: Unrecognized line <%s>%n", line); + } + } + + //last one. + if (scriptMap.get(prevN) == null) { + scriptMap.put(prevN, scriptMap.size()); + } + scripts[scriptSize][0] = prevS; + scripts[scriptSize][1] = prevE; + scripts[scriptSize][2] = scriptMap.get(prevN); + scriptSize++; + + debug("%x-%x\t%s%n", prevS, prevE, prevN); + debug("-----------------%n"); + debug("Total scripts=%s%n", scriptMap.size()); + debug("-----------------%n%n"); + + String[] names = new String[scriptMap.size()]; + for (String name: scriptMap.keySet()) { + names[scriptMap.get(name).intValue()] = name; + } + + for (j = 0; j < scriptSize; j++) { + for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) { + String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);; + if (cp > 0xffff) + System.out.printf("%05X %s%n", cp, name); + else + System.out.printf("%05X %s%n", cp, name); + } + } + + Arrays.sort(scripts, 0, scriptSize, + new Comparator<int[]>() { + public int compare(int[] a1, int[] a2) { + return a1[0] - a2[0]; + } + public boolean compare(Object obj) { + return obj == this; + } + }); + + + + // Consolidation: there are lots of "reserved" code points + // embedded in those otherwise "sequential" blocks. + // To make the lookup table smaller, we combine those + // separated segments with the assumption that the lookup + // implementation checks + // Character.getType() != Character.UNASSIGNED + // first (return UNKNOWN for unassigned) + + ArrayList<int[]> list = new ArrayList(); + list.add(scripts[0]); + + int[] last = scripts[0]; + for (i = 1; i < scriptSize; i++) { + if (scripts[i][0] != (last[1] + 1)) { + + boolean isNotUnassigned = false; + for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) { + if (Character.getType(cp) != Character.UNASSIGNED) { + isNotUnassigned = true; + debug("Warning: [%x] is ASSIGNED but in NON script%n", cp); + break; + } + } + if (isNotUnassigned) { + // surrogates only? + int[] a = new int[3]; + a[0] = last[1] + 1; + a[1] = scripts[i][0] - 1; + a[2] = -1; // unknown + list.add(a); + } else { + if (last[2] == scripts[i][2]) { + //combine + last[1] = scripts[i][1]; + continue; + } else { + // expand last + last[1] = scripts[i][0] - 1; + } + } + } + list.add(scripts[i]); + last = scripts[i]; + } + + for (i = 0; i < list.size(); i++) { + int[] a = (int[])list.get(i); + String name = "UNKNOWN"; + if (a[2] != -1) + name = names[a[2]].toUpperCase(Locale.US); + debug("0x%05x, 0x%05x %s%n", a[0], a[1], name); + } + debug("--->total=%d%n", list.size()); + + + //////////////////OUTPUT////////////////////////////////// + print("public class Scripts {%n%n"); + print(" public static enum UnicodeScript {%n"); + for (i = 0; i < names.length; i++) { + print(" /**%n * Unicode script \"%s\".%n */%n", names[i]); + print(" %s,%n%n", names[i].toUpperCase(Locale.US)); + } + print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n"); + + + // lookup table + print(" private static final int[] scriptStarts = {%n"); + for (int[] a : list) { + String name = "UNKNOWN"; + if (a[2] != -1) + name = names[a[2]].toUpperCase(Locale.US); + if (a[0] < 0x10000) + print(" 0x%04X, // %04X..%04X; %s%n", + a[0], a[0], a[1], name); + else + print(" 0x%05X, // %05X..%05X; %s%n", + a[0], a[0], a[1], name); + } + last = list.get(list.size() -1); + if (last[1] != Character.MAX_CODE_POINT) + print(" 0x%05X // %05X..%06X; %s%n", + last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT, + "UNKNOWN"); + print("%n };%n%n"); + + print(" private static final UnicodeScript[] scripts = {%n"); + for (int[] a : list) { + String name = "UNKNOWN"; + if (a[2] != -1) + name = names[a[2]].toUpperCase(Locale.US); + print(" %s,%n", name); + } + + if (last[1] != Character.MAX_CODE_POINT) + print(" UNKNOWN%n"); + print(" };%n"); + print(" }%n"); + print("}%n"); + + } catch (Exception e) { + e.printStackTrace(); + } + } +}
--- a/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java Tue May 18 13:12:46 2010 -0700 +++ b/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java Tue May 18 15:36:47 2010 -0700 @@ -35,6 +35,8 @@ import java.io.FileWriter; import java.io.File; +import build.tools.generatecharacter.CharacterName; + /** * This program generates the source code for the class java.lang.Character. * It also generates native C code that can perform the same operations.
--- a/jdk/src/share/classes/java/lang/Character.java Tue May 18 13:12:46 2010 -0700 +++ b/jdk/src/share/classes/java/lang/Character.java Tue May 18 15:36:47 2010 -0700 @@ -24,6 +24,7 @@ */ package java.lang; +import java.util.Arrays; import java.util.Map; import java.util.HashMap; import java.util.Locale; @@ -2547,6 +2548,1241 @@ /** + * A family of character subsets representing the character scripts + * defined in the <a href="http://www.unicode.org/reports/tr24/"> + * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode + * character is assigned to a single Unicode script, either a specific + * script, such as {@link Character.UnicodeScript#LATIN Latin}, or + * one of the following three special values, + * {@link Character.UnicodeScript#INHERITED Inherited}, + * {@link Character.UnicodeScript#COMMON Common} or + * {@link Character.UnicodeScript#UNKNOWN Unknown}. + * + * @since 1.7 + */ + public static enum UnicodeScript { + /** + * Unicode script "Common". + */ + COMMON, + + /** + * Unicode script "Latin". + */ + LATIN, + + /** + * Unicode script "Greek". + */ + GREEK, + + /** + * Unicode script "Cyrillic". + */ + CYRILLIC, + + /** + * Unicode script "Armenian". + */ + ARMENIAN, + + /** + * Unicode script "Hebrew". + */ + HEBREW, + + /** + * Unicode script "Arabic". + */ + ARABIC, + + /** + * Unicode script "Syriac". + */ + SYRIAC, + + /** + * Unicode script "Thaana". + */ + THAANA, + + /** + * Unicode script "Devanagari". + */ + DEVANAGARI, + + /** + * Unicode script "Bengali". + */ + BENGALI, + + /** + * Unicode script "Gurmukhi". + */ + GURMUKHI, + + /** + * Unicode script "Gujarati". + */ + GUJARATI, + + /** + * Unicode script "Oriya". + */ + ORIYA, + + /** + * Unicode script "Tamil". + */ + TAMIL, + + /** + * Unicode script "Telugu". + */ + TELUGU, + + /** + * Unicode script "Kannada". + */ + KANNADA, + + /** + * Unicode script "Malayalam". + */ + MALAYALAM, + + /** + * Unicode script "Sinhala". + */ + SINHALA, + + /** + * Unicode script "Thai". + */ + THAI, + + /** + * Unicode script "Lao". + */ + LAO, + + /** + * Unicode script "Tibetan". + */ + TIBETAN, + + /** + * Unicode script "Myanmar". + */ + MYANMAR, + + /** + * Unicode script "Georgian". + */ + GEORGIAN, + + /** + * Unicode script "Hangul". + */ + HANGUL, + + /** + * Unicode script "Ethiopic". + */ + ETHIOPIC, + + /** + * Unicode script "Cherokee". + */ + CHEROKEE, + + /** + * Unicode script "Canadian_Aboriginal". + */ + CANADIAN_ABORIGINAL, + + /** + * Unicode script "Ogham". + */ + OGHAM, + + /** + * Unicode script "Runic". + */ + RUNIC, + + /** + * Unicode script "Khmer". + */ + KHMER, + + /** + * Unicode script "Mongolian". + */ + MONGOLIAN, + + /** + * Unicode script "Hiragana". + */ + HIRAGANA, + + /** + * Unicode script "Katakana". + */ + KATAKANA, + + /** + * Unicode script "Bopomofo". + */ + BOPOMOFO, + + /** + * Unicode script "Han". + */ + HAN, + + /** + * Unicode script "Yi". + */ + YI, + + /** + * Unicode script "Old_Italic". + */ + OLD_ITALIC, + + /** + * Unicode script "Gothic". + */ + GOTHIC, + + /** + * Unicode script "Deseret". + */ + DESERET, + + /** + * Unicode script "Inherited". + */ + INHERITED, + + /** + * Unicode script "Tagalog". + */ + TAGALOG, + + /** + * Unicode script "Hanunoo". + */ + HANUNOO, + + /** + * Unicode script "Buhid". + */ + BUHID, + + /** + * Unicode script "Tagbanwa". + */ + TAGBANWA, + + /** + * Unicode script "Limbu". + */ + LIMBU, + + /** + * Unicode script "Tai_Le". + */ + TAI_LE, + + /** + * Unicode script "Linear_B". + */ + LINEAR_B, + + /** + * Unicode script "Ugaritic". + */ + UGARITIC, + + /** + * Unicode script "Shavian". + */ + SHAVIAN, + + /** + * Unicode script "Osmanya". + */ + OSMANYA, + + /** + * Unicode script "Cypriot". + */ + CYPRIOT, + + /** + * Unicode script "Braille". + */ + BRAILLE, + + /** + * Unicode script "Buginese". + */ + BUGINESE, + + /** + * Unicode script "Coptic". + */ + COPTIC, + + /** + * Unicode script "New_Tai_Lue". + */ + NEW_TAI_LUE, + + /** + * Unicode script "Glagolitic". + */ + GLAGOLITIC, + + /** + * Unicode script "Tifinagh". + */ + TIFINAGH, + + /** + * Unicode script "Syloti_Nagri". + */ + SYLOTI_NAGRI, + + /** + * Unicode script "Old_Persian". + */ + OLD_PERSIAN, + + /** + * Unicode script "Kharoshthi". + */ + KHAROSHTHI, + + /** + * Unicode script "Balinese". + */ + BALINESE, + + /** + * Unicode script "Cuneiform". + */ + CUNEIFORM, + + /** + * Unicode script "Phoenician". + */ + PHOENICIAN, + + /** + * Unicode script "Phags_Pa". + */ + PHAGS_PA, + + /** + * Unicode script "Nko". + */ + NKO, + + /** + * Unicode script "Sundanese". + */ + SUNDANESE, + + /** + * Unicode script "Lepcha". + */ + LEPCHA, + + /** + * Unicode script "Ol_Chiki". + */ + OL_CHIKI, + + /** + * Unicode script "Vai". + */ + VAI, + + /** + * Unicode script "Saurashtra". + */ + SAURASHTRA, + + /** + * Unicode script "Kayah_Li". + */ + KAYAH_LI, + + /** + * Unicode script "Rejang". + */ + REJANG, + + /** + * Unicode script "Lycian". + */ + LYCIAN, + + /** + * Unicode script "Carian". + */ + CARIAN, + + /** + * Unicode script "Lydian". + */ + LYDIAN, + + /** + * Unicode script "Cham". + */ + CHAM, + + /** + * Unicode script "Tai_Tham". + */ + TAI_THAM, + + /** + * Unicode script "Tai_Viet". + */ + TAI_VIET, + + /** + * Unicode script "Avestan". + */ + AVESTAN, + + /** + * Unicode script "Egyptian_Hieroglyphs". + */ + EGYPTIAN_HIEROGLYPHS, + + /** + * Unicode script "Samaritan". + */ + SAMARITAN, + + /** + * Unicode script "Lisu". + */ + LISU, + + /** + * Unicode script "Bamum". + */ + BAMUM, + + /** + * Unicode script "Javanese". + */ + JAVANESE, + + /** + * Unicode script "Meetei_Mayek". + */ + MEETEI_MAYEK, + + /** + * Unicode script "Imperial_Aramaic". + */ + IMPERIAL_ARAMAIC, + + /** + * Unicode script "Old_South_Arabian". + */ + OLD_SOUTH_ARABIAN, + + /** + * Unicode script "Inscriptional_Parthian". + */ + INSCRIPTIONAL_PARTHIAN, + + /** + * Unicode script "Inscriptional_Pahlavi". + */ + INSCRIPTIONAL_PAHLAVI, + + /** + * Unicode script "Old_Turkic". + */ + OLD_TURKIC, + + /** + * Unicode script "Kaithi". + */ + KAITHI, + + /** + * Unicode script "Unknown". + */ + UNKNOWN; + + private static final int[] scriptStarts = { + 0x0000, // 0000..0040; COMMON + 0x0041, // 0041..005A; LATIN + 0x005B, // 005B..0060; COMMON + 0x0061, // 0061..007A; LATIN + 0x007B, // 007B..00A9; COMMON + 0x00AA, // 00AA..00AA; LATIN + 0x00AB, // 00AB..00B9; COMMON + 0x00BA, // 00BA..00BA; LATIN + 0x00BB, // 00BB..00BF; COMMON + 0x00C0, // 00C0..00D6; LATIN + 0x00D7, // 00D7..00D7; COMMON + 0x00D8, // 00D8..00F6; LATIN + 0x00F7, // 00F7..00F7; COMMON + 0x00F8, // 00F8..02B8; LATIN + 0x02B9, // 02B9..02DF; COMMON + 0x02E0, // 02E0..02E4; LATIN + 0x02E5, // 02E5..02FF; COMMON + 0x0300, // 0300..036F; INHERITED + 0x0370, // 0370..0373; GREEK + 0x0374, // 0374..0374; COMMON + 0x0375, // 0375..037D; GREEK + 0x037E, // 037E..0383; COMMON + 0x0384, // 0384..0384; GREEK + 0x0385, // 0385..0385; COMMON + 0x0386, // 0386..0386; GREEK + 0x0387, // 0387..0387; COMMON + 0x0388, // 0388..03E1; GREEK + 0x03E2, // 03E2..03EF; COPTIC + 0x03F0, // 03F0..03FF; GREEK + 0x0400, // 0400..0484; CYRILLIC + 0x0485, // 0485..0486; INHERITED + 0x0487, // 0487..0530; CYRILLIC + 0x0531, // 0531..0588; ARMENIAN + 0x0589, // 0589..0589; COMMON + 0x058A, // 058A..0590; ARMENIAN + 0x0591, // 0591..05FF; HEBREW + 0x0600, // 0600..0605; COMMON + 0x0606, // 0606..060B; ARABIC + 0x060C, // 060C..060C; COMMON + 0x060D, // 060D..061A; ARABIC + 0x061B, // 061B..061D; COMMON + 0x061E, // 061E..061E; ARABIC + 0x061F, // 061F..0620; COMMON + 0x0621, // 0621..063F; ARABIC + 0x0640, // 0640..0640; COMMON + 0x0641, // 0641..064A; ARABIC + 0x064B, // 064B..0655; INHERITED + 0x0656, // 0656..065F; ARABIC + 0x0660, // 0660..0669; COMMON + 0x066A, // 066A..066F; ARABIC + 0x0670, // 0670..0670; INHERITED + 0x0671, // 0671..06DC; ARABIC + 0x06DD, // 06DD..06DD; COMMON + 0x06DE, // 06DE..06FF; ARABIC + 0x0700, // 0700..074F; SYRIAC + 0x0750, // 0750..077F; ARABIC + 0x0780, // 0780..07BF; THAANA + 0x07C0, // 07C0..07FF; NKO + 0x0800, // 0800..08FF; SAMARITAN + 0x0900, // 0900..0950; DEVANAGARI + 0x0951, // 0951..0952; INHERITED + 0x0953, // 0953..0963; DEVANAGARI + 0x0964, // 0964..0965; COMMON + 0x0966, // 0966..096F; DEVANAGARI + 0x0970, // 0970..0970; COMMON + 0x0971, // 0971..0980; DEVANAGARI + 0x0981, // 0981..0A00; BENGALI + 0x0A01, // 0A01..0A80; GURMUKHI + 0x0A81, // 0A81..0B00; GUJARATI + 0x0B01, // 0B01..0B81; ORIYA + 0x0B82, // 0B82..0C00; TAMIL + 0x0C01, // 0C01..0C81; TELUGU + 0x0C82, // 0C82..0CF0; KANNADA + 0x0CF1, // 0CF1..0D01; COMMON + 0x0D02, // 0D02..0D81; MALAYALAM + 0x0D82, // 0D82..0E00; SINHALA + 0x0E01, // 0E01..0E3E; THAI + 0x0E3F, // 0E3F..0E3F; COMMON + 0x0E40, // 0E40..0E80; THAI + 0x0E81, // 0E81..0EFF; LAO + 0x0F00, // 0F00..0FD4; TIBETAN + 0x0FD5, // 0FD5..0FFF; COMMON + 0x1000, // 1000..109F; MYANMAR + 0x10A0, // 10A0..10FA; GEORGIAN + 0x10FB, // 10FB..10FB; COMMON + 0x10FC, // 10FC..10FF; GEORGIAN + 0x1100, // 1100..11FF; HANGUL + 0x1200, // 1200..139F; ETHIOPIC + 0x13A0, // 13A0..13FF; CHEROKEE + 0x1400, // 1400..167F; CANADIAN_ABORIGINAL + 0x1680, // 1680..169F; OGHAM + 0x16A0, // 16A0..16EA; RUNIC + 0x16EB, // 16EB..16ED; COMMON + 0x16EE, // 16EE..16FF; RUNIC + 0x1700, // 1700..171F; TAGALOG + 0x1720, // 1720..1734; HANUNOO + 0x1735, // 1735..173F; COMMON + 0x1740, // 1740..175F; BUHID + 0x1760, // 1760..177F; TAGBANWA + 0x1780, // 1780..17FF; KHMER + 0x1800, // 1800..1801; MONGOLIAN + 0x1802, // 1802..1803; COMMON + 0x1804, // 1804..1804; MONGOLIAN + 0x1805, // 1805..1805; COMMON + 0x1806, // 1806..18AF; MONGOLIAN + 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL + 0x1900, // 1900..194F; LIMBU + 0x1950, // 1950..197F; TAI_LE + 0x1980, // 1980..19DF; NEW_TAI_LUE + 0x19E0, // 19E0..19FF; KHMER + 0x1A00, // 1A00..1A1F; BUGINESE + 0x1A20, // 1A20..1AFF; TAI_THAM + 0x1B00, // 1B00..1B7F; BALINESE + 0x1B80, // 1B80..1BFF; SUNDANESE + 0x1C00, // 1C00..1C4F; LEPCHA + 0x1C50, // 1C50..1CCF; OL_CHIKI + 0x1CD0, // 1CD0..1CD2; INHERITED + 0x1CD3, // 1CD3..1CD3; COMMON + 0x1CD4, // 1CD4..1CE0; INHERITED + 0x1CE1, // 1CE1..1CE1; COMMON + 0x1CE2, // 1CE2..1CE8; INHERITED + 0x1CE9, // 1CE9..1CEC; COMMON + 0x1CED, // 1CED..1CED; INHERITED + 0x1CEE, // 1CEE..1CFF; COMMON + 0x1D00, // 1D00..1D25; LATIN + 0x1D26, // 1D26..1D2A; GREEK + 0x1D2B, // 1D2B..1D2B; CYRILLIC + 0x1D2C, // 1D2C..1D5C; LATIN + 0x1D5D, // 1D5D..1D61; GREEK + 0x1D62, // 1D62..1D65; LATIN + 0x1D66, // 1D66..1D6A; GREEK + 0x1D6B, // 1D6B..1D77; LATIN + 0x1D78, // 1D78..1D78; CYRILLIC + 0x1D79, // 1D79..1DBE; LATIN + 0x1DBF, // 1DBF..1DBF; GREEK + 0x1DC0, // 1DC0..1DFF; INHERITED + 0x1E00, // 1E00..1EFF; LATIN + 0x1F00, // 1F00..1FFF; GREEK + 0x2000, // 2000..200B; COMMON + 0x200C, // 200C..200D; INHERITED + 0x200E, // 200E..2070; COMMON + 0x2071, // 2071..2073; LATIN + 0x2074, // 2074..207E; COMMON + 0x207F, // 207F..207F; LATIN + 0x2080, // 2080..208F; COMMON + 0x2090, // 2090..209F; LATIN + 0x20A0, // 20A0..20CF; COMMON + 0x20D0, // 20D0..20FF; INHERITED + 0x2100, // 2100..2125; COMMON + 0x2126, // 2126..2126; GREEK + 0x2127, // 2127..2129; COMMON + 0x212A, // 212A..212B; LATIN + 0x212C, // 212C..2131; COMMON + 0x2132, // 2132..2132; LATIN + 0x2133, // 2133..214D; COMMON + 0x214E, // 214E..214E; LATIN + 0x214F, // 214F..215F; COMMON + 0x2160, // 2160..2188; LATIN + 0x2189, // 2189..27FF; COMMON + 0x2800, // 2800..28FF; BRAILLE + 0x2900, // 2900..2BFF; COMMON + 0x2C00, // 2C00..2C5F; GLAGOLITIC + 0x2C60, // 2C60..2C7F; LATIN + 0x2C80, // 2C80..2CFF; COPTIC + 0x2D00, // 2D00..2D2F; GEORGIAN + 0x2D30, // 2D30..2D7F; TIFINAGH + 0x2D80, // 2D80..2DDF; ETHIOPIC + 0x2DE0, // 2DE0..2DFF; CYRILLIC + 0x2E00, // 2E00..2E7F; COMMON + 0x2E80, // 2E80..2FEF; HAN + 0x2FF0, // 2FF0..3004; COMMON + 0x3005, // 3005..3005; HAN + 0x3006, // 3006..3006; COMMON + 0x3007, // 3007..3007; HAN + 0x3008, // 3008..3020; COMMON + 0x3021, // 3021..3029; HAN + 0x302A, // 302A..302F; INHERITED + 0x3030, // 3030..3037; COMMON + 0x3038, // 3038..303B; HAN + 0x303C, // 303C..3040; COMMON + 0x3041, // 3041..3098; HIRAGANA + 0x3099, // 3099..309A; INHERITED + 0x309B, // 309B..309C; COMMON + 0x309D, // 309D..309F; HIRAGANA + 0x30A0, // 30A0..30A0; COMMON + 0x30A1, // 30A1..30FA; KATAKANA + 0x30FB, // 30FB..30FC; COMMON + 0x30FD, // 30FD..3104; KATAKANA + 0x3105, // 3105..3130; BOPOMOFO + 0x3131, // 3131..318F; HANGUL + 0x3190, // 3190..319F; COMMON + 0x31A0, // 31A0..31BF; BOPOMOFO + 0x31C0, // 31C0..31EF; COMMON + 0x31F0, // 31F0..31FF; KATAKANA + 0x3200, // 3200..321F; HANGUL + 0x3220, // 3220..325F; COMMON + 0x3260, // 3260..327E; HANGUL + 0x327F, // 327F..32CF; COMMON + 0x32D0, // 32D0..3357; KATAKANA + 0x3358, // 3358..33FF; COMMON + 0x3400, // 3400..4DBF; HAN + 0x4DC0, // 4DC0..4DFF; COMMON + 0x4E00, // 4E00..9FFF; HAN + 0xA000, // A000..A4CF; YI + 0xA4D0, // A4D0..A4FF; LISU + 0xA500, // A500..A63F; VAI + 0xA640, // A640..A69F; CYRILLIC + 0xA6A0, // A6A0..A6FF; BAMUM + 0xA700, // A700..A721; COMMON + 0xA722, // A722..A787; LATIN + 0xA788, // A788..A78A; COMMON + 0xA78B, // A78B..A7FF; LATIN + 0xA800, // A800..A82F; SYLOTI_NAGRI + 0xA830, // A830..A83F; COMMON + 0xA840, // A840..A87F; PHAGS_PA + 0xA880, // A880..A8DF; SAURASHTRA + 0xA8E0, // A8E0..A8FF; DEVANAGARI + 0xA900, // A900..A92F; KAYAH_LI + 0xA930, // A930..A95F; REJANG + 0xA960, // A960..A97F; HANGUL + 0xA980, // A980..A9FF; JAVANESE + 0xAA00, // AA00..AA5F; CHAM + 0xAA60, // AA60..AA7F; MYANMAR + 0xAA80, // AA80..ABBF; TAI_VIET + 0xABC0, // ABC0..ABFF; MEETEI_MAYEK + 0xAC00, // AC00..D7FB; HANGUL + 0xD7FC, // D7FC..F8FF; UNKNOWN + 0xF900, // F900..FAFF; HAN + 0xFB00, // FB00..FB12; LATIN + 0xFB13, // FB13..FB1C; ARMENIAN + 0xFB1D, // FB1D..FB4F; HEBREW + 0xFB50, // FB50..FD3D; ARABIC + 0xFD3E, // FD3E..FD4F; COMMON + 0xFD50, // FD50..FDFC; ARABIC + 0xFDFD, // FDFD..FDFF; COMMON + 0xFE00, // FE00..FE0F; INHERITED + 0xFE10, // FE10..FE1F; COMMON + 0xFE20, // FE20..FE2F; INHERITED + 0xFE30, // FE30..FE6F; COMMON + 0xFE70, // FE70..FEFE; ARABIC + 0xFEFF, // FEFF..FF20; COMMON + 0xFF21, // FF21..FF3A; LATIN + 0xFF3B, // FF3B..FF40; COMMON + 0xFF41, // FF41..FF5A; LATIN + 0xFF5B, // FF5B..FF65; COMMON + 0xFF66, // FF66..FF6F; KATAKANA + 0xFF70, // FF70..FF70; COMMON + 0xFF71, // FF71..FF9D; KATAKANA + 0xFF9E, // FF9E..FF9F; COMMON + 0xFFA0, // FFA0..FFDF; HANGUL + 0xFFE0, // FFE0..FFFF; COMMON + 0x10000, // 10000..100FF; LINEAR_B + 0x10100, // 10100..1013F; COMMON + 0x10140, // 10140..1018F; GREEK + 0x10190, // 10190..101FC; COMMON + 0x101FD, // 101FD..1027F; INHERITED + 0x10280, // 10280..1029F; LYCIAN + 0x102A0, // 102A0..102FF; CARIAN + 0x10300, // 10300..1032F; OLD_ITALIC + 0x10330, // 10330..1037F; GOTHIC + 0x10380, // 10380..1039F; UGARITIC + 0x103A0, // 103A0..103FF; OLD_PERSIAN + 0x10400, // 10400..1044F; DESERET + 0x10450, // 10450..1047F; SHAVIAN + 0x10480, // 10480..107FF; OSMANYA + 0x10800, // 10800..1083F; CYPRIOT + 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC + 0x10900, // 10900..1091F; PHOENICIAN + 0x10920, // 10920..109FF; LYDIAN + 0x10A00, // 10A00..10A5F; KHAROSHTHI + 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN + 0x10B00, // 10B00..10B3F; AVESTAN + 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN + 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI + 0x10C00, // 10C00..10E5F; OLD_TURKIC + 0x10E60, // 10E60..1107F; ARABIC + 0x11080, // 11080..11FFF; KAITHI + 0x12000, // 12000..12FFF; CUNEIFORM + 0x13000, // 13000..1CFFF; EGYPTIAN_HIEROGLYPHS + 0x1D000, // 1D000..1D166; COMMON + 0x1D167, // 1D167..1D169; INHERITED + 0x1D16A, // 1D16A..1D17A; COMMON + 0x1D17B, // 1D17B..1D182; INHERITED + 0x1D183, // 1D183..1D184; COMMON + 0x1D185, // 1D185..1D18B; INHERITED + 0x1D18C, // 1D18C..1D1A9; COMMON + 0x1D1AA, // 1D1AA..1D1AD; INHERITED + 0x1D1AE, // 1D1AE..1D1FF; COMMON + 0x1D200, // 1D200..1D2FF; GREEK + 0x1D300, // 1D300..1F1FF; COMMON + 0x1F200, // 1F200..1F20F; HIRAGANA + 0x1F210, // 1F210..1FFFF; COMMON + 0x20000, // 20000..E0000; HAN + 0xE0001, // E0001..E00FF; COMMON + 0xE0100, // E0100..E01EF; INHERITED + 0xE01F0 // E01F0..10FFFF; UNKNOWN + + }; + + private static final UnicodeScript[] scripts = { + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + INHERITED, + GREEK, + COMMON, + GREEK, + COMMON, + GREEK, + COMMON, + GREEK, + COMMON, + GREEK, + COPTIC, + GREEK, + CYRILLIC, + INHERITED, + CYRILLIC, + ARMENIAN, + COMMON, + ARMENIAN, + HEBREW, + COMMON, + ARABIC, + COMMON, + ARABIC, + COMMON, + ARABIC, + COMMON, + ARABIC, + COMMON, + ARABIC, + INHERITED, + ARABIC, + COMMON, + ARABIC, + INHERITED, + ARABIC, + COMMON, + ARABIC, + SYRIAC, + ARABIC, + THAANA, + NKO, + SAMARITAN, + DEVANAGARI, + INHERITED, + DEVANAGARI, + COMMON, + DEVANAGARI, + COMMON, + DEVANAGARI, + BENGALI, + GURMUKHI, + GUJARATI, + ORIYA, + TAMIL, + TELUGU, + KANNADA, + COMMON, + MALAYALAM, + SINHALA, + THAI, + COMMON, + THAI, + LAO, + TIBETAN, + COMMON, + MYANMAR, + GEORGIAN, + COMMON, + GEORGIAN, + HANGUL, + ETHIOPIC, + CHEROKEE, + CANADIAN_ABORIGINAL, + OGHAM, + RUNIC, + COMMON, + RUNIC, + TAGALOG, + HANUNOO, + COMMON, + BUHID, + TAGBANWA, + KHMER, + MONGOLIAN, + COMMON, + MONGOLIAN, + COMMON, + MONGOLIAN, + CANADIAN_ABORIGINAL, + LIMBU, + TAI_LE, + NEW_TAI_LUE, + KHMER, + BUGINESE, + TAI_THAM, + BALINESE, + SUNDANESE, + LEPCHA, + OL_CHIKI, + INHERITED, + COMMON, + INHERITED, + COMMON, + INHERITED, + COMMON, + INHERITED, + COMMON, + LATIN, + GREEK, + CYRILLIC, + LATIN, + GREEK, + LATIN, + GREEK, + LATIN, + CYRILLIC, + LATIN, + GREEK, + INHERITED, + LATIN, + GREEK, + COMMON, + INHERITED, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + INHERITED, + COMMON, + GREEK, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + BRAILLE, + COMMON, + GLAGOLITIC, + LATIN, + COPTIC, + GEORGIAN, + TIFINAGH, + ETHIOPIC, + CYRILLIC, + COMMON, + HAN, + COMMON, + HAN, + COMMON, + HAN, + COMMON, + HAN, + INHERITED, + COMMON, + HAN, + COMMON, + HIRAGANA, + INHERITED, + COMMON, + HIRAGANA, + COMMON, + KATAKANA, + COMMON, + KATAKANA, + BOPOMOFO, + HANGUL, + COMMON, + BOPOMOFO, + COMMON, + KATAKANA, + HANGUL, + COMMON, + HANGUL, + COMMON, + KATAKANA, + COMMON, + HAN, + COMMON, + HAN, + YI, + LISU, + VAI, + CYRILLIC, + BAMUM, + COMMON, + LATIN, + COMMON, + LATIN, + SYLOTI_NAGRI, + COMMON, + PHAGS_PA, + SAURASHTRA, + DEVANAGARI, + KAYAH_LI, + REJANG, + HANGUL, + JAVANESE, + CHAM, + MYANMAR, + TAI_VIET, + MEETEI_MAYEK, + HANGUL, + UNKNOWN, + HAN, + LATIN, + ARMENIAN, + HEBREW, + ARABIC, + COMMON, + ARABIC, + COMMON, + INHERITED, + COMMON, + INHERITED, + COMMON, + ARABIC, + COMMON, + LATIN, + COMMON, + LATIN, + COMMON, + KATAKANA, + COMMON, + KATAKANA, + COMMON, + HANGUL, + COMMON, + LINEAR_B, + COMMON, + GREEK, + COMMON, + INHERITED, + LYCIAN, + CARIAN, + OLD_ITALIC, + GOTHIC, + UGARITIC, + OLD_PERSIAN, + DESERET, + SHAVIAN, + OSMANYA, + CYPRIOT, + IMPERIAL_ARAMAIC, + PHOENICIAN, + LYDIAN, + KHAROSHTHI, + OLD_SOUTH_ARABIAN, + AVESTAN, + INSCRIPTIONAL_PARTHIAN, + INSCRIPTIONAL_PAHLAVI, + OLD_TURKIC, + ARABIC, + KAITHI, + CUNEIFORM, + EGYPTIAN_HIEROGLYPHS, + COMMON, + INHERITED, + COMMON, + INHERITED, + COMMON, + INHERITED, + COMMON, + INHERITED, + COMMON, + GREEK, + COMMON, + HIRAGANA, + COMMON, + HAN, + COMMON, + INHERITED, + UNKNOWN + }; + + private static HashMap<String, Character.UnicodeScript> aliases; + static { + aliases = new HashMap<String, UnicodeScript>(); + aliases.put("ARAB", ARABIC); + aliases.put("ARMI", IMPERIAL_ARAMAIC); + aliases.put("ARMN", ARMENIAN); + aliases.put("AVST", AVESTAN); + aliases.put("BALI", BALINESE); + aliases.put("BAMU", BAMUM); + aliases.put("BENG", BENGALI); + aliases.put("BOPO", BOPOMOFO); + aliases.put("BRAI", BRAILLE); + aliases.put("BUGI", BUGINESE); + aliases.put("BUHD", BUHID); + aliases.put("CANS", CANADIAN_ABORIGINAL); + aliases.put("CARI", CARIAN); + aliases.put("CHAM", CHAM); + aliases.put("CHER", CHEROKEE); + aliases.put("COPT", COPTIC); + aliases.put("CPRT", CYPRIOT); + aliases.put("CYRL", CYRILLIC); + aliases.put("DEVA", DEVANAGARI); + aliases.put("DSRT", DESERET); + aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); + aliases.put("ETHI", ETHIOPIC); + aliases.put("GEOR", GEORGIAN); + aliases.put("GLAG", GLAGOLITIC); + aliases.put("GOTH", GOTHIC); + aliases.put("GREK", GREEK); + aliases.put("GUJR", GUJARATI); + aliases.put("GURU", GURMUKHI); + aliases.put("HANG", HANGUL); + aliases.put("HANI", HAN); + aliases.put("HANO", HANUNOO); + aliases.put("HEBR", HEBREW); + aliases.put("HIRA", HIRAGANA); + // it appears we don't have the KATAKANA_OR_HIRAGANA + //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); + aliases.put("ITAL", OLD_ITALIC); + aliases.put("JAVA", JAVANESE); + aliases.put("KALI", KAYAH_LI); + aliases.put("KANA", KATAKANA); + aliases.put("KHAR", KHAROSHTHI); + aliases.put("KHMR", KHMER); + aliases.put("KNDA", KANNADA); + aliases.put("KTHI", KAITHI); + aliases.put("LANA", TAI_THAM); + aliases.put("LAOO", LAO); + aliases.put("LATN", LATIN); + aliases.put("LEPC", LEPCHA); + aliases.put("LIMB", LIMBU); + aliases.put("LINB", LINEAR_B); + aliases.put("LISU", LISU); + aliases.put("LYCI", LYCIAN); + aliases.put("LYDI", LYDIAN); + aliases.put("MLYM", MALAYALAM); + aliases.put("MONG", MONGOLIAN); + aliases.put("MTEI", MEETEI_MAYEK); + aliases.put("MYMR", MYANMAR); + aliases.put("NKOO", NKO); + aliases.put("OGAM", OGHAM); + aliases.put("OLCK", OL_CHIKI); + aliases.put("ORKH", OLD_TURKIC); + aliases.put("ORYA", ORIYA); + aliases.put("OSMA", OSMANYA); + aliases.put("PHAG", PHAGS_PA); + aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); + aliases.put("PHNX", PHOENICIAN); + aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); + aliases.put("RJNG", REJANG); + aliases.put("RUNR", RUNIC); + aliases.put("SAMR", SAMARITAN); + aliases.put("SARB", OLD_SOUTH_ARABIAN); + aliases.put("SAUR", SAURASHTRA); + aliases.put("SHAW", SHAVIAN); + aliases.put("SINH", SINHALA); + aliases.put("SUND", SUNDANESE); + aliases.put("SYLO", SYLOTI_NAGRI); + aliases.put("SYRC", SYRIAC); + aliases.put("TAGB", TAGBANWA); + aliases.put("TALE", TAI_LE); + aliases.put("TALU", NEW_TAI_LUE); + aliases.put("TAML", TAMIL); + aliases.put("TAVT", TAI_VIET); + aliases.put("TELU", TELUGU); + aliases.put("TFNG", TIFINAGH); + aliases.put("TGLG", TAGALOG); + aliases.put("THAA", THAANA); + aliases.put("THAI", THAI); + aliases.put("TIBT", TIBETAN); + aliases.put("UGAR", UGARITIC); + aliases.put("VAII", VAI); + aliases.put("XPEO", OLD_PERSIAN); + aliases.put("XSUX", CUNEIFORM); + aliases.put("YIII", YI); + aliases.put("ZINH", INHERITED); + aliases.put("ZYYY", COMMON); + aliases.put("ZZZZ", UNKNOWN); + } + + /** + * Returns the enum constant representing the Unicode script of which + * the given character (Unicode code point) is assigned to. + * + * @param codePoint the character (Unicode code point) in question. + * @return The <code>UnicodeScript</code> constant representing the + * Unicode script of which this character is assigned to. + * + * @exception IllegalArgumentException if the specified + * <code>codePoint</code> is an invalid Unicode code point. + * @see Character#isValidCodePoint(int) + * + */ + public static UnicodeScript of(int codePoint) { + if (!isValidCodePoint(codePoint)) + throw new IllegalArgumentException(); + int type = getType(codePoint); + // leave SURROGATE and PRIVATE_USE for table lookup + if (type == UNASSIGNED) + return UNKNOWN; + int index = Arrays.binarySearch(scriptStarts, codePoint); + if (index < 0) + index = -index - 2; + return scripts[index]; + } + + /** + * Returns the UnicodeScript constant with the given Unicode script + * name or the script name alias. Script names and their aliases are + * determined by The Unicode Standard. The files Scripts<version>.txt + * and PropertyValueAliases<version>.txt define script names + * and the script name aliases for a particular version of the + * standard. The {@link Character} class specifies the version of + * the standard that it supports. + * <p> + * Character case is ignored for all of the valid script names. + * The en_US locale's case mapping rules are used to provide + * case-insensitive string comparisons for script name validation. + * <p> + * + * @param scriptName A <code>UnicodeScript</code> name. + * @return The <code>UnicodeScript</code> constant identified + * by <code>scriptName</code> + * @throws IllegalArgumentException if <code>scriptName</code> is an + * invalid name + * @throws NullPointerException if <code>scriptName</code> is null + */ + public static final UnicodeScript forName(String scriptName) { + scriptName = scriptName.toUpperCase(Locale.ENGLISH); + //.replace(' ', '_')); + UnicodeScript sc = aliases.get(scriptName); + if (sc != null) + return sc; + return valueOf(scriptName); + } + } + + /** * The value of the <code>Character</code>. * * @serial @@ -5042,4 +6278,51 @@ public static char reverseBytes(char ch) { return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); } + + /** + * Returns the Unicode name of the specified character + * <code>codePoint</code>, or null if the code point is + * {@link #UNASSIGNED unassigned}. + * <p> + * Note: if the specified character is not assigned a name by + * the <i>UnicodeData</i> file (part of the Unicode Character + * Database maintained by the Unicode Consortium), the returned + * name is the same as the result of expression + * + * <blockquote><code> + * Character.UnicodeBlock.of(codePoint) + * .toString() + * .replace('_', ' ') + * + " " + * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); + * + * </code></blockquote> + * + * @param codePoint the character (Unicode code point) + * + * @return the Unicode name of the specified character, or null if + * the code point is unassigned. + * + * @exception IllegalArgumentException if the specified + * <code>codePoint</code> is not a valid Unicode + * code point. + * + * @since 1.7 + */ + public static String getName(int codePoint) { + if (!isValidCodePoint(codePoint)) { + throw new IllegalArgumentException(); + } + String name = CharacterName.get(codePoint); + if (name != null) + return name; + if (getType(codePoint) == UNASSIGNED) + return null; + UnicodeBlock block = UnicodeBlock.of(codePoint); + if (block != null) + return block.toString().replace('_', ' ') + " " + + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); + // should never come here + return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); + } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/src/share/classes/java/lang/CharacterName.java Tue May 18 15:36:47 2010 -0700 @@ -0,0 +1,106 @@ +/* + * Copyright 2010 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +package java.lang; + +import java.io.DataInputStream; +import java.io.InputStream; +import java.lang.ref.SoftReference; +import java.util.Arrays; +import java.util.zip.InflaterInputStream; +import java.security.AccessController; +import java.security.PrivilegedAction; + +class CharacterName { + + private static SoftReference<byte[]> refStrPool; + private static int[][] lookup; + + private static synchronized byte[] initNamePool() { + byte[] strPool = null; + if (refStrPool != null && (strPool = refStrPool.get()) != null) + return strPool; + DataInputStream dis = null; + try { + dis = new DataInputStream(new InflaterInputStream( + AccessController.doPrivileged(new PrivilegedAction<InputStream>() + { + public InputStream run() { + return getClass().getResourceAsStream("uniName.dat"); + } + }))); + + lookup = new int[(Character.MAX_CODE_POINT + 1) >> 8][]; + int total = dis.readInt(); + int cpEnd = dis.readInt(); + byte ba[] = new byte[cpEnd]; + dis.readFully(ba); + + int nameOff = 0; + int cpOff = 0; + int cp = 0; + do { + int len = ba[cpOff++] & 0xff; + if (len == 0) { + len = ba[cpOff++] & 0xff; + // always big-endian + cp = ((ba[cpOff++] & 0xff) << 16) | + ((ba[cpOff++] & 0xff) << 8) | + ((ba[cpOff++] & 0xff)); + } else { + cp++; + } + int hi = cp >> 8; + if (lookup[hi] == null) { + lookup[hi] = new int[0x100]; + } + lookup[hi][cp&0xff] = (nameOff << 8) | len; + nameOff += len; + } while (cpOff < cpEnd); + strPool = new byte[total - cpEnd]; + dis.readFully(strPool); + refStrPool = new SoftReference<byte[]>(strPool); + } catch (Exception x) { + throw new InternalError(x.getMessage()); + } finally { + try { + if (dis != null) + dis.close(); + } catch (Exception xx) {} + } + return strPool; + } + + public static String get(int cp) { + byte[] strPool = null; + if (refStrPool == null || (strPool = refStrPool.get()) == null) + strPool = initNamePool(); + int off = 0; + if (lookup[cp>>8] == null || + (off = lookup[cp>>8][cp&0xff]) == 0) + return null; + return new String(strPool, 0, off >>> 8, off & 0xff); // ASCII + } +}
--- a/jdk/src/share/classes/java/util/regex/Pattern.java Tue May 18 13:12:46 2010 -0700 +++ b/jdk/src/share/classes/java/util/regex/Pattern.java Tue May 18 15:36:47 2010 -0700 @@ -29,6 +29,7 @@ import java.security.PrivilegedAction; import java.text.CharacterIterator; import java.text.Normalizer; +import java.util.Locale; import java.util.Map; import java.util.ArrayList; import java.util.HashMap; @@ -200,8 +201,9 @@ * <td>Equivalent to java.lang.Character.isMirrored()</td></tr> * * <tr><th> </th></tr> - * <tr align="left"><th colspan="2" id="unicode">Classes for Unicode blocks and categories</th></tr> - * + * <tr align="left"><th colspan="2" id="unicode">Classes for Unicode scripts, blocks and categories</th></tr> + * * <tr><td valign="top" headers="construct unicode"><tt>\p{IsLatin}</tt></td> + * <td headers="matches">A Latin script character (simple <a href="#ubc">script</a>)</td></tr> * <tr><td valign="top" headers="construct unicode"><tt>\p{InGreek}</tt></td> * <td headers="matches">A character in the Greek block (simple <a href="#ubc">block</a>)</td></tr> * <tr><td valign="top" headers="construct unicode"><tt>\p{Lu}</tt></td> @@ -527,25 +529,40 @@ * while not equal, compile into the same pattern, which matches the character * with hexadecimal value <tt>0x2014</tt>. * - * <a name="ubc"> <p>Unicode blocks and categories are written with the - * <tt>\p</tt> and <tt>\P</tt> constructs as in - * Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if the input has the - * property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt> does not match if - * the input has that property. Blocks are specified with the prefix - * <tt>In</tt>, as in <tt>InMongolian</tt>. Categories may be specified with - * the optional prefix <tt>Is</tt>: Both <tt>\p{L}</tt> and <tt>\p{IsL}</tt> - * denote the category of Unicode letters. Blocks and categories can be used - * both inside and outside of a character class. - * + * <a name="ubc"> + * <p>Unicode scripts, blocks and categories are written with the <tt>\p</tt> and + * <tt>\P</tt> constructs as in Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if + * the input has the property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt> + * does not match if the input has that property. + * <p> + * Scripts are specified either with the prefix {@code Is}, as in + * {@code IsHiragana}, or by using the {@code script} keyword (or its short + * form {@code sc})as in {@code script=Hiragana} or {@code sc=Hiragana}. + * <p> + * Blocks are specified with the prefix {@code In}, as in + * {@code InMongolian}, or by using the keyword {@code block} (or its short + * form {@code blk}) as in {@code block=Mongolian} or {@code blk=Mongolian}. + * <p> + * Categories may be specified with the optional prefix {@code Is}: + * Both {@code \p{L}} and {@code \p{IsL}} denote the category of Unicode + * letters. Same as scripts and blocks, categories can also be specified + * by using the keyword {@code general_category} (or its short form + * {@code gc}) as in {@code general_category=Lu} or {@code gc=Lu}. + * <p> + * Scripts, blocks and categories can be used both inside and outside of a + * character class. * <p> The supported categories are those of * <a href="http://www.unicode.org/unicode/standard/standard.html"> * <i>The Unicode Standard</i></a> in the version specified by the * {@link java.lang.Character Character} class. The category names are those * defined in the Standard, both normative and informative. + * The script names supported by <code>Pattern</code> are the valid script names + * accepted and defined by + * {@link java.lang.Character.UnicodeScript#forName(String) UnicodeScript.forName}. * The block names supported by <code>Pattern</code> are the valid block names * accepted and defined by * {@link java.lang.Character.UnicodeBlock#forName(String) UnicodeBlock.forName}. - * + * <p> * <a name="jcc"> <p>Categories that behave like the java.lang.Character * boolean is<i>methodname</i> methods (except for the deprecated ones) are * available through the same <tt>\p{</tt><i>prop</i><tt>}</tt> syntax where @@ -2488,12 +2505,34 @@ name = new String(temp, i, j-i-1); } - if (name.startsWith("In")) { - node = unicodeBlockPropertyFor(name.substring(2)); + int i = name.indexOf('='); + if (i != -1) { + // property construct \p{name=value} + String value = name.substring(i + 1); + name = name.substring(0, i).toLowerCase(Locale.ENGLISH); + if ("sc".equals(name) || "script".equals(name)) { + node = unicodeScriptPropertyFor(value); + } else if ("blk".equals(name) || "block".equals(name)) { + node = unicodeBlockPropertyFor(value); + } else if ("gc".equals(name) || "general_category".equals(name)) { + node = charPropertyNodeFor(value); + } else { + throw error("Unknown Unicode property {name=<" + name + ">, " + + "value=<" + value + ">}"); + } } else { - if (name.startsWith("Is")) + if (name.startsWith("In")) { + // \p{inBlockName} + node = unicodeBlockPropertyFor(name.substring(2)); + } else if (name.startsWith("Is")) { + // \p{isGeneralCategory} and \p{isScriptName} name = name.substring(2); - node = charPropertyNodeFor(name); + node = CharPropertyNames.charPropertyFor(name); + if (node == null) + node = unicodeScriptPropertyFor(name); + } else { + node = charPropertyNodeFor(name); + } } if (maybeComplement) { if (node instanceof Category || node instanceof Block) @@ -2503,6 +2542,21 @@ return node; } + + /** + * Returns a CharProperty matching all characters belong to + * a UnicodeScript. + */ + private CharProperty unicodeScriptPropertyFor(String name) { + final Character.UnicodeScript script; + try { + script = Character.UnicodeScript.forName(name); + } catch (IllegalArgumentException iae) { + throw error("Unknown character script name {" + name + "}"); + } + return new Script(script); + } + /** * Returns a CharProperty matching all characters in a UnicodeBlock. */ @@ -3567,6 +3621,19 @@ } /** + * Node class that matches a Unicode script + */ + static final class Script extends CharProperty { + final Character.UnicodeScript script; + Script(Character.UnicodeScript script) { + this.script = script; + } + boolean isSatisfiedBy(int ch) { + return script == Character.UnicodeScript.of(ch); + } + } + + /** * Node class that matches a Unicode category. */ static final class Category extends CharProperty {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/test/java/lang/Character/CheckScript.java Tue May 18 15:36:47 2010 -0700 @@ -0,0 +1,105 @@ +/** + * @test + * @bug 6945564 + * @summary Check that the j.l.Character.UnicodeScript + * @ignore don't run until #6903266 is integrated + */ + +import java.io.*; +import java.lang.reflect.*; +import java.util.*; +import java.util.regex.*; +import java.lang.Character.UnicodeScript; + +public class CheckScript { + + public static void main(String[] args) throws Exception { + + if (args.length != 1) { + System.out.println("java CharacterScript script.txt"); + System.exit(1); + } + BufferedReader sbfr = new BufferedReader(new FileReader(args[0])); + Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); + String line = null; + HashMap<String,ArrayList<Integer>> scripts = new HashMap<>(); + while ((line = sbfr.readLine()) != null) { + if (line.length() <= 1 || line.charAt(0) == '#') { + continue; + } + m.reset(line); + if (m.matches()) { + int start = Integer.parseInt(m.group(1), 16); + int end = (m.group(2)==null)?start + :Integer.parseInt(m.group(2), 16); + String name = m.group(3).toLowerCase(Locale.ENGLISH); + ArrayList<Integer> ranges = scripts.get(name); + if (ranges == null) { + ranges = new ArrayList<Integer>(); + scripts.put(name, ranges); + } + ranges.add(start); + ranges.add(end); + } + } + sbfr.close(); + // check all defined ranges + Integer[] ZEROSIZEARRAY = new Integer[0]; + for (String name : scripts.keySet()) { + System.out.println("Checking " + name + "..."); + Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY); + Character.UnicodeScript expected = + Character.UnicodeScript.forName(name); + + int off = 0; + while (off < ranges.length) { + int start = ranges[off++]; + int end = ranges[off++]; + for (int cp = start; cp <= end; cp++) { + Character.UnicodeScript script = + Character.UnicodeScript.of(cp); + if (script != expected) { + throw new RuntimeException( + "UnicodeScript failed: cp=" + + Integer.toHexString(cp) + + ", of(cp)=<" + script + "> but <" + + expected + "> is expected"); + } + } + } + } + // check all codepoints + for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { + Character.UnicodeScript script = Character.UnicodeScript.of(cp); + if (script == Character.UnicodeScript.UNKNOWN) { + if (Character.getType(cp) != Character.UNASSIGNED && + Character.getType(cp) != Character.SURROGATE && + Character.getType(cp) != Character.PRIVATE_USE) + throw new RuntimeException( + "UnicodeScript failed: cp=" + + Integer.toHexString(cp) + + ", of(cp)=<" + script + "> but UNKNOWN is expected"); + } else { + Integer[] ranges = + scripts.get(script.name().toLowerCase(Locale.ENGLISH)) + .toArray(ZEROSIZEARRAY); + int off = 0; + boolean found = false; + while (off < ranges.length) { + int start = ranges[off++]; + int end = ranges[off++]; + if (cp >= start && cp <= end) + found = true; + } + if (!found) { + throw new RuntimeException( + "UnicodeScript failed: cp=" + + Integer.toHexString(cp) + + ", of(cp)=<" + script + + "> but NOT in ranges of this script"); + + } + } + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jdk/test/java/lang/Character/Scripts.txt Tue May 18 15:36:47 2010 -0700 @@ -0,0 +1,1972 @@ +# Scripts-5.2.0.txt +# Date: 2009-08-22, 04:58:43 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2009 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +# Property: Script + +# All code points not explicitly listed for Script +# have the value Unknown (Zzzz). + +# @missing: 0000..10FFFF; Unknown + +# ================================================ + +0000..001F ; Common # Cc [32] <control-0000>..<control-001F> +0020 ; Common # Zs SPACE +0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Common # Sc DOLLAR SIGN +0025..0027 ; Common # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Common # Ps LEFT PARENTHESIS +0029 ; Common # Pe RIGHT PARENTHESIS +002A ; Common # Po ASTERISK +002B ; Common # Sm PLUS SIGN +002C ; Common # Po COMMA +002D ; Common # Pd HYPHEN-MINUS +002E..002F ; Common # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Common # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Common # Po [2] COLON..SEMICOLON +003C..003E ; Common # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Common # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Common # Ps LEFT SQUARE BRACKET +005C ; Common # Po REVERSE SOLIDUS +005D ; Common # Pe RIGHT SQUARE BRACKET +005E ; Common # Sk CIRCUMFLEX ACCENT +005F ; Common # Pc LOW LINE +0060 ; Common # Sk GRAVE ACCENT +007B ; Common # Ps LEFT CURLY BRACKET +007C ; Common # Sm VERTICAL LINE +007D ; Common # Pe RIGHT CURLY BRACKET +007E ; Common # Sm TILDE +007F..009F ; Common # Cc [33] <control-007F>..<control-009F> +00A0 ; Common # Zs NO-BREAK SPACE +00A1 ; Common # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN +00A6..00A7 ; Common # So [2] BROKEN BAR..SECTION SIGN +00A8 ; Common # Sk DIAERESIS +00A9 ; Common # So COPYRIGHT SIGN +00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Common # Sm NOT SIGN +00AD ; Common # Cf SOFT HYPHEN +00AE ; Common # So REGISTERED SIGN +00AF ; Common # Sk MACRON +00B0 ; Common # So DEGREE SIGN +00B1 ; Common # Sm PLUS-MINUS SIGN +00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; Common # Sk ACUTE ACCENT +00B5 ; Common # L& MICRO SIGN +00B6 ; Common # So PILCROW SIGN +00B7 ; Common # Po MIDDLE DOT +00B8 ; Common # Sk CEDILLA +00B9 ; Common # No SUPERSCRIPT ONE +00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; Common # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; Common # Po INVERTED QUESTION MARK +00D7 ; Common # Sm MULTIPLICATION SIGN +00F7 ; Common # Sm DIVISION SIGN +02B9..02C1 ; Common # Lm [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Common # Lm MODIFIER LETTER VOICING +02ED ; Common # Sk MODIFIER LETTER UNASPIRATED +02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Common # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0374 ; Common # Lm GREEK NUMERAL SIGN +037E ; Common # Po GREEK QUESTION MARK +0385 ; Common # Sk GREEK DIALYTIKA TONOS +0387 ; Common # Po GREEK ANO TELEIA +0589 ; Common # Po ARMENIAN FULL STOP +0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +060C ; Common # Po ARABIC COMMA +061B ; Common # Po ARABIC SEMICOLON +061F ; Common # Po ARABIC QUESTION MARK +0640 ; Common # Lm ARABIC TATWEEL +0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +06DD ; Common # Cf ARABIC END OF AYAH +0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN +0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT +0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR +16EB..16ED ; Common # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Common # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1802..1803 ; Common # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP +1805 ; Common # Po MONGOLIAN FOUR DOTS +1CD3 ; Common # Po VEDIC SIGN NIHSHVASA +1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF2 ; Common # Mc VEDIC SIGN ARDHAVISARGA +2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE +200B ; Common # Cf ZERO WIDTH SPACE +200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2010..2015 ; Common # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Common # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Common # Pi LEFT SINGLE QUOTATION MARK +2019 ; Common # Pf RIGHT SINGLE QUOTATION MARK +201A ; Common # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Common # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Common # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Common # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Common # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Common # Po [8] DAGGER..HYPHENATION POINT +2028 ; Common # Zl LINE SEPARATOR +2029 ; Common # Zp PARAGRAPH SEPARATOR +202A..202E ; Common # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +202F ; Common # Zs NARROW NO-BREAK SPACE +2030..2038 ; Common # Po [9] PER MILLE SIGN..CARET +2039 ; Common # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Common # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Common # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; Common # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; Common # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Common # Sm FRACTION SLASH +2045 ; Common # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Common # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Common # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Common # Sm COMMERCIAL MINUS SIGN +2053 ; Common # Po SWUNG DASH +2054 ; Common # Pc INVERTED UNDERTIE +2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; Common # Zs MEDIUM MATHEMATICAL SPACE +2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS +206A..206F ; Common # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2070 ; Common # No SUPERSCRIPT ZERO +2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Common # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Common # Pe SUPERSCRIPT RIGHT PARENTHESIS +2080..2089 ; Common # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS +20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN +2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; Common # L& DOUBLE-STRUCK CAPITAL C +2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA +2107 ; Common # L& EULER CONSTANT +2108..2109 ; Common # So [2] SCRUPLE..DEGREE FAHRENHEIT +210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2114 ; Common # So L B BAR SYMBOL +2115 ; Common # L& DOUBLE-STRUCK CAPITAL N +2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P +2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE +2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z +2125 ; Common # So OUNCE SIGN +2127 ; Common # So INVERTED OHM SIGN +2128 ; Common # L& BLACK-LETTER CAPITAL Z +2129 ; Common # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Common # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212E ; Common # So ESTIMATED SYMBOL +212F..2131 ; Common # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Common # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Common # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Common # L& INFORMATION SOURCE +213A..213B ; Common # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; Common # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Common # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Common # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; Common # So PROPERTY LINE +214B ; Common # Sm TURNED AMPERSAND +214C..214D ; Common # So [2] PER SIGN..AKTIESELSKAB +214F ; Common # So SYMBOL FOR SAMARITAN SOURCE +2150..215F ; Common # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; Common # No VULGAR FRACTION ZERO THIRDS +2190..2194 ; Common # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Common # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Common # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Common # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Common # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Common # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Common # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Common # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Common # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Common # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Common # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Common # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Common # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Common # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Common # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Common # So DOWNWARDS DOUBLE ARROW +21D4 ; Common # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE +2308..230B ; Common # Sm [4] LEFT CEILING..RIGHT FLOOR +230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Common # So [7] FROWN..KEYBOARD +2329 ; Common # Ps LEFT-POINTING ANGLE BRACKET +232A ; Common # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Common # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Common # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Common # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL +2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA..24FF ; Common # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Common # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Common # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Common # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Common # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Common # Sm MUSIC SHARP SIGN +2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR +26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2 +26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE +26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS +2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE +270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR +2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274D ; Common # So SHADOWED WHITE CIRCLE +274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE +2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET +2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Common # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Common # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Common # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Common # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Common # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Common # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Common # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Common # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Common # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW +2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW +27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW +27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE +27CC ; Common # Sm LONG DIVISION +27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Common # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Common # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Common # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Common # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Common # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Common # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Common # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Common # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Common # Ps LEFT WHITE CURLY BRACKET +2984 ; Common # Pe RIGHT WHITE CURLY BRACKET +2985 ; Common # Ps LEFT WHITE PARENTHESIS +2986 ; Common # Pe RIGHT WHITE PARENTHESIS +2987 ; Common # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Common # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Common # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Common # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Common # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Common # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Common # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Common # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Common # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Common # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Common # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Common # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Common # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Common # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Common # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Common # Ps LEFT WIGGLY FENCE +29D9 ; Common # Pe RIGHT WIGGLY FENCE +29DA ; Common # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Common # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Common # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Common # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Common # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Common # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B50..2B59 ; Common # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE +2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Common # Po RAISED SQUARE +2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Common # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Common # Pd HYPHEN WITH DIAERESIS +2E1B ; Common # Po TILDE WITH RING ABOVE +2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Common # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Common # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Common # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Common # Ps TOP LEFT HALF BRACKET +2E23 ; Common # Pe TOP RIGHT HALF BRACKET +2E24 ; Common # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Common # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Common # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Common # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Common # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Common # Lm VERTICAL TILDE +2E30..2E31 ; Common # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3000 ; Common # Zs IDEOGRAPHIC SPACE +3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3006 ; Common # Lo IDEOGRAPHIC CLOSING MARK +3008 ; Common # Ps LEFT ANGLE BRACKET +3009 ; Common # Pe RIGHT ANGLE BRACKET +300A ; Common # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Common # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Common # Ps LEFT CORNER BRACKET +300D ; Common # Pe RIGHT CORNER BRACKET +300E ; Common # Ps LEFT WHITE CORNER BRACKET +300F ; Common # Pe RIGHT WHITE CORNER BRACKET +3010 ; Common # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Common # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Common # So [2] POSTAL MARK..GETA MARK +3014 ; Common # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Common # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Common # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Common # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Common # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Common # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Common # Ps LEFT WHITE SQUARE BRACKET +301B ; Common # Pe RIGHT WHITE SQUARE BRACKET +301C ; Common # Pd WAVE DASH +301D ; Common # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Common # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Common # So POSTAL MARK FACE +3030 ; Common # Pd WAVY DASH +3031..3035 ; Common # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; Common # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303C ; Common # Lo MASU MARK +303D ; Common # Po PART ALTERNATION MARK +303E..303F ; Common # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +309B..309C ; Common # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Common # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FB ; Common # Po KATAKANA MIDDLE DOT +30FC ; Common # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q +3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3250 ; Common # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +327F ; Common # So KOREAN STANDARD SYMBOL +3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN +3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL +4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Common # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Common # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Common # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Common # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Common # Sc NORTH INDIC RUPEE MARK +A839 ; Common # So NORTH INDIC QUANTITY MARK +FD3E ; Common # Ps ORNATE LEFT PARENTHESIS +FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS +FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM +FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; Common # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; Common # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; Common # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; Common # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; Common # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; Common # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; Common # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; Common # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Common # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; Common # Pd SMALL EM DASH +FE59 ; Common # Ps SMALL LEFT PARENTHESIS +FE5A ; Common # Pe SMALL RIGHT PARENTHESIS +FE5B ; Common # Ps SMALL LEFT CURLY BRACKET +FE5C ; Common # Pe SMALL RIGHT CURLY BRACKET +FE5D ; Common # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; Common # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; Common # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; Common # Sm SMALL PLUS SIGN +FE63 ; Common # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Common # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Common # Po SMALL REVERSE SOLIDUS +FE69 ; Common # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; Common # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FEFF ; Common # Cf ZERO WIDTH NO-BREAK SPACE +FF01..FF03 ; Common # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; Common # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; Common # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; Common # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; Common # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; Common # Po FULLWIDTH ASTERISK +FF0B ; Common # Sm FULLWIDTH PLUS SIGN +FF0C ; Common # Po FULLWIDTH COMMA +FF0D ; Common # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; Common # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; Common # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; Common # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; Common # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; Common # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3B ; Common # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; Common # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; Common # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; Common # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; Common # Pc FULLWIDTH LOW LINE +FF40 ; Common # Sk FULLWIDTH GRAVE ACCENT +FF5B ; Common # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; Common # Sm FULLWIDTH VERTICAL LINE +FF5D ; Common # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; Common # Sm FULLWIDTH TILDE +FF5F ; Common # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; Common # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; Common # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Common # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Common # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Common # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF70 ; Common # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Common # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE0..FFE1 ; Common # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; Common # Sm FULLWIDTH NOT SIGN +FFE3 ; Common # Sk FULLWIDTH MACRON +FFE4 ; Common # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; Common # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; Common # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10100..10101 ; Common # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT +10102 ; Common # So AEGEAN CHECK MARK +10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN +101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; Common # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; Common # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; Common # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1DD ; Common # So [48] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL PES SUBPUNCTIS +1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D371 ; Common # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE +1D400..1D454 ; Common # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Common # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Common # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Common # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Common # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Common # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Common # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Common # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Common # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Common # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Common # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Common # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Common # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Common # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Common # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Common # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Common # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Common # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Common # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Common # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Common # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Common # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Common # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Common # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Common # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Common # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Common # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Common # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Common # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Common # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Common # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Common # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ +1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B +1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N +1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P +1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S +1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W +1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV +1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H +1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P +1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J +1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M +1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA +1F190 ; Common # So SQUARE DJ +1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253 +1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +E0001 ; Common # Cf LANGUAGE TAG +E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 5395 + +# ================================================ + +0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Latin # L& FEMININE ORDINAL INDICATOR +00BA ; Latin # L& MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Latin # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN +1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN +1D62..1D65 ; Latin # L& [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V +1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH +1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP +2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Latin # L& TURNED CAPITAL F +214E ; Latin # L& TURNED SMALL F +2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C60..2C7C ; Latin # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J +2C7D ; Latin # Lm MODIFIER LETTER CAPITAL V +2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL +A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Latin # Lm MODIFIER LETTER US +A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO +A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M +FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z + +# Total code points: 1244 + +# ================================================ + +0370..0373 ; Greek # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0375 ; Greek # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Greek # Lm GREEK YPOGEGRAMMENI +037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0384 ; Greek # Sk GREEK TONOS +0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Greek # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Greek # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03E1 ; Greek # L& [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI +03F0..03F5 ; Greek # L& [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F6 ; Greek # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI +1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI +1D66..1D6A ; Greek # L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI +1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA +1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Greek # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Greek # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Greek # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Greek # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Greek # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Greek # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; Greek # Sk GREEK KORONIS +1FBE ; Greek # L& GREEK PROSGEGRAMMENI +1FBF..1FC1 ; Greek # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; Greek # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Greek # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; Greek # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; Greek # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Greek # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; Greek # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; Greek # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; Greek # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; Greek # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA +2126 ; Greek # L& OHM SIGN +10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A ; Greek # No GREEK ZERO SIGN +1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D245 ; Greek # So GREEK MUSICAL LEIMMA + +# Total code points: 511 + +# ================================================ + +0400..0481 ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA +0482 ; Cyrillic # So CYRILLIC THOUSANDS SIGN +0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION +0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE +0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER +1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL +1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN +2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN +A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A673 ; Cyrillic # Po SLAVONIC ASTERISK +A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67E ; Cyrillic # Po CYRILLIC KAVYKA +A67F ; Cyrillic # Lm CYRILLIC PAYEROK +A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE + +# Total code points: 404 + +# ================================================ + +0531..0556 ; Armenian # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +058A ; Armenian # Pd ARMENIAN HYPHEN +FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH + +# Total code points: 90 + +# ================================================ + +0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE ; Hebrew # Pd HEBREW PUNCTUATION MAQAF +05BF ; Hebrew # Mn HEBREW POINT RAFE +05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ +05C1..05C2 ; Hebrew # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ +05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA +05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05F0..05F2 ; Hebrew # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +FB1D ; Hebrew # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; Hebrew # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; Hebrew # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; Hebrew # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; Hebrew # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Hebrew # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Hebrew # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Hebrew # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Hebrew # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + +# Total code points: 133 + +# ================================================ + +0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; Arabic # Sc AFGHANI SIGN +060D ; Arabic # Po ARABIC DATE SEPARATOR +060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK +0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS +066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; Arabic # Po ARABIC FULL STOP +06D5 ; Arabic # Lo ARABIC LETTER AE +06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DE ; Arabic # Me ARABIC START OF RUB EL HIZB +06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06E9 ; Arabic # So ARABIC PLACE OF SAJDAH +06EA..06ED ; Arabic # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; Arabic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; Arabic # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; Arabic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V +0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; Arabic # Sc RIAL SIGN +FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS + +# Total code points: 1030 + +# ================================================ + +0700..070D ; Syriac # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F ; Syriac # Cf SYRIAC ABBREVIATION MARK +0710 ; Syriac # Lo SYRIAC LETTER ALAPH +0711 ; Syriac # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE + +# Total code points: 77 + +# ================================================ + +0780..07A5 ; Thaana # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07A6..07B0 ; Thaana # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; Thaana # Lo THAANA LETTER NAA + +# Total code points: 50 + +# ================================================ + +0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA +093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA +094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E +0950 ; Devanagari # Lo DEVANAGARI OM +0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E +0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A +0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA +A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE + +# Total code points: 140 + +# ================================================ + +0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Bengali # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Bengali # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Bengali # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Bengali # Lo BENGALI LETTER LA +09B6..09B9 ; Bengali # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; Bengali # Mn BENGALI SIGN NUKTA +09BD ; Bengali # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; Bengali # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Bengali # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Bengali # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Bengali # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; Bengali # Mn BENGALI SIGN VIRAMA +09CE ; Bengali # Lo BENGALI LETTER KHANDA TA +09D7 ; Bengali # Mc BENGALI AU LENGTH MARK +09DC..09DD ; Bengali # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Bengali # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; Bengali # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; Bengali # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; Bengali # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; Bengali # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; Bengali # So BENGALI ISSHAR +09FB ; Bengali # Sc BENGALI GANDA MARK + +# Total code points: 92 + +# ================================================ + +0A01..0A02 ; Gurmukhi # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Gurmukhi # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Gurmukhi # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Gurmukhi # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Gurmukhi # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Gurmukhi # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Gurmukhi # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Gurmukhi # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Gurmukhi # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; Gurmukhi # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; Gurmukhi # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Gurmukhi # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Gurmukhi # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; Gurmukhi # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Gurmukhi # Lo GURMUKHI LETTER FA +0A66..0A6F ; Gurmukhi # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; Gurmukhi # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; Gurmukhi # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; Gurmukhi # Mn GURMUKHI SIGN YAKASH + +# Total code points: 79 + +# ================================================ + +0A81..0A82 ; Gujarati # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Gujarati # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Gujarati # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Gujarati # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Gujarati # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Gujarati # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Gujarati # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Gujarati # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; Gujarati # Mn GUJARATI SIGN NUKTA +0ABD ; Gujarati # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Gujarati # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Gujarati # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Gujarati # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Gujarati # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; Gujarati # Mn GUJARATI SIGN VIRAMA +0AD0 ; Gujarati # Lo GUJARATI OM +0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN + +# Total code points: 83 + +# ================================================ + +0B01 ; Oriya # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Oriya # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Oriya # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Oriya # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Oriya # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Oriya # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Oriya # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Oriya # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; Oriya # Mn ORIYA SIGN NUKTA +0B3D ; Oriya # Lo ORIYA SIGN AVAGRAHA +0B3E ; Oriya # Mc ORIYA VOWEL SIGN AA +0B3F ; Oriya # Mn ORIYA VOWEL SIGN I +0B40 ; Oriya # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Oriya # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; Oriya # Mn ORIYA SIGN VIRAMA +0B56 ; Oriya # Mn ORIYA AI LENGTH MARK +0B57 ; Oriya # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; Oriya # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; Oriya # So ORIYA ISSHAR +0B71 ; Oriya # Lo ORIYA LETTER WA + +# Total code points: 84 + +# ================================================ + +0B82 ; Tamil # Mn TAMIL SIGN ANUSVARA +0B83 ; Tamil # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Tamil # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Tamil # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Tamil # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Tamil # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Tamil # Lo TAMIL LETTER JA +0B9E..0B9F ; Tamil # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Tamil # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Tamil # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Tamil # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; Tamil # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Tamil # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Tamil # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Tamil # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Tamil # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; Tamil # Mn TAMIL SIGN VIRAMA +0BD0 ; Tamil # Lo TAMIL OM +0BD7 ; Tamil # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; Tamil # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Tamil # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; Tamil # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; Tamil # Sc TAMIL RUPEE SIGN +0BFA ; Tamil # So TAMIL NUMBER SIGN + +# Total code points: 72 + +# ================================================ + +0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C33 ; Telugu # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA +0C35..0C39 ; Telugu # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA +0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Telugu # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C59 ; Telugu # Lo [2] TELUGU LETTER TSA..TELUGU LETTER DZA +0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; Telugu # So TELUGU SIGN TUUMU + +# Total code points: 93 + +# ================================================ + +0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Kannada # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Kannada # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Kannada # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; Kannada # Mn KANNADA SIGN NUKTA +0CBD ; Kannada # Lo KANNADA SIGN AVAGRAHA +0CBE ; Kannada # Mc KANNADA VOWEL SIGN AA +0CBF ; Kannada # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Kannada # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Kannada # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Kannada # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDE ; Kannada # Lo KANNADA LETTER FA +0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE + +# Total code points: 84 + +# ================================================ + +0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA +0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA +0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA +0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK +0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS +0D79 ; Malayalam # So MALAYALAM DATE MARK +0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K + +# Total code points: 95 + +# ================================================ + +0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Sinhala # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Sinhala # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Sinhala # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; Sinhala # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; Sinhala # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA + +# Total code points: 80 + +# ================================================ + +0E01..0E30 ; Thai # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; Thai # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; Thai # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; Thai # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; Thai # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Thai # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; Thai # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E4F ; Thai # Po THAI CHARACTER FONGMAN +0E50..0E59 ; Thai # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; Thai # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT + +# Total code points: 86 + +# ================================================ + +0E81..0E82 ; Lao # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Lao # Lo LAO LETTER KHO TAM +0E87..0E88 ; Lao # Lo [2] LAO LETTER NGO..LAO LETTER CO +0E8A ; Lao # Lo LAO LETTER SO TAM +0E8D ; Lao # Lo LAO LETTER NYO +0E94..0E97 ; Lao # Lo [4] LAO LETTER DO..LAO LETTER THO TAM +0E99..0E9F ; Lao # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG +0EA1..0EA3 ; Lao # Lo [3] LAO LETTER MO..LAO LETTER LO LING +0EA5 ; Lao # Lo LAO LETTER LO LOOT +0EA7 ; Lao # Lo LAO LETTER WO +0EAA..0EAB ; Lao # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG +0EAD..0EB0 ; Lao # Lo [4] LAO LETTER O..LAO VOWEL SIGN A +0EB1 ; Lao # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; Lao # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EB9 ; Lao # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Lao # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0EBD ; Lao # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Lao # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Lao # Lm LAO KO LA +0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDD ; Lao # Lo [2] LAO HO NO..LAO HO MO + +# Total code points: 65 + +# ================================================ + +0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13..0F17 ; Tibetan # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; Tibetan # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; Tibetan # So TIBETAN MARK BSDUS RTAGS +0F35 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F36 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38 ; Tibetan # So TIBETAN MARK CHE MGO +0F39 ; Tibetan # Mn TIBETAN MARK TSA -PHRU +0F3A ; Tibetan # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; Tibetan # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; Tibetan # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; Tibetan # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; Tibetan # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; Tibetan # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Tibetan # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; Tibetan # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Tibetan # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F85 ; Tibetan # Po TIBETAN MARK PALUTA +0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS +0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA + +# Total code points: 201 + +# ================================================ + +1000..102A ; Myanmar # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Myanmar # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Myanmar # Mc MYANMAR VOWEL SIGN E +1032..1037 ; Myanmar # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; Myanmar # Mc MYANMAR SIGN VISARGA +1039..103A ; Myanmar # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; Myanmar # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Myanmar # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; Myanmar # Lo MYANMAR LETTER GREAT SA +1040..1049 ; Myanmar # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; Myanmar # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; Myanmar # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; Myanmar # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; Myanmar # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; Myanmar # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Myanmar # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Myanmar # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Myanmar # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Myanmar # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; Myanmar # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; Myanmar # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Myanmar # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Myanmar # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; Myanmar # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Myanmar # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; Myanmar # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI +109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA +AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE + +# Total code points: 188 + +# ================================================ + +10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10D0..10FA ; Georgian # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR +2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE + +# Total code points: 120 + +# ================================================ + +1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN +3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U +A960..A97C ; Hangul # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +AC00..D7A3 ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Hangul # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Hangul # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +FFA0..FFBE ; Hangul # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I + +# Total code points: 11737 + +# ================================================ + +1200..1248 ; Ethiopic # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Ethiopic # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Ethiopic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Ethiopic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Ethiopic # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK +1360 ; Ethiopic # So ETHIOPIC SECTION MARK +1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +2D80..2D96 ; Ethiopic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO + +# Total code points: 461 + +# ================================================ + +13A0..13F4 ; Cherokee # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV + +# Total code points: 85 + +# ================================================ + +1400 ; Canadian_Aboriginal # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D..166E ; Canadian_Aboriginal # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP +166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S + +# Total code points: 710 + +# ================================================ + +1680 ; Ogham # Zs OGHAM SPACE MARK +1681..169A ; Ogham # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; Ogham # Ps OGHAM FEATHER MARK +169C ; Ogham # Pe OGHAM REVERSED FEATHER MARK + +# Total code points: 29 + +# ================================================ + +16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL + +# Total code points: 78 + +# ================================================ + +1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; Khmer # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; Khmer # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Khmer # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Khmer # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; Khmer # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D4..17D6 ; Khmer # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; Khmer # Lm KHMER SIGN LEK TOO +17D8..17DA ; Khmer # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; Khmer # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; Khmer # Lo KHMER SIGN AVAKRAHASANYA +17DD ; Khmer # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; Khmer # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; Khmer # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +19E0..19FF ; Khmer # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC + +# Total code points: 146 + +# ================================================ + +1800..1801 ; Mongolian # Po [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS +1804 ; Mongolian # Po MONGOLIAN COLON +1806 ; Mongolian # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Mongolian # Zs MONGOLIAN VOWEL SEPARATOR +1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA +1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA + +# Total code points: 153 + +# ================================================ + +3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI +1F200 ; Hiragana # So SQUARE HIRAGANA HOKA + +# Total code points: 90 + +# ================================================ + +30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FD..30FE ; Katakana # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK +30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO +31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO +3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO +FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N + +# Total code points: 299 + +# ================================================ + +3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH +31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H + +# Total code points: 65 + +# ================================================ + +2E80..2E99 ; Han # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Han # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +3005 ; Han # Lm IDEOGRAPHIC ITERATION MARK +3007 ; Han # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FCB ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +F900..FA2D ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D +FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 75738 + +# ================================================ + +A000..A014 ; Yi # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; Yi # Lm YI SYLLABLE WU +A016..A48C ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE + +# Total code points: 1220 + +# ================================================ + +10300..1031E ; Old_Italic # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU +10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY + +# Total code points: 35 + +# ================================================ + +10330..10340 ; Gothic # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341 ; Gothic # Nl GOTHIC LETTER NINETY +10342..10349 ; Gothic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Gothic # Nl GOTHIC LETTER NINE HUNDRED + +# Total code points: 27 + +# ================================================ + +10400..1044F ; Deseret # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW + +# Total code points: 80 + +# ================================================ + +0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA +064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW +0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF +0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA +1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Inherited # Mn VEDIC SIGN TIRYAK +1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z +1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON +101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 523 + +# ================================================ + +1700..170C ; Tagalog # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA +170E..1711 ; Tagalog # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA +1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA + +# Total code points: 20 + +# ================================================ + +1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1732..1734 ; Hanunoo # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD + +# Total code points: 21 + +# ================================================ + +1740..1751 ; Buhid # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; Buhid # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U + +# Total code points: 20 + +# ================================================ + +1760..176C ; Tagbanwa # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Tagbanwa # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; Tagbanwa # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U + +# Total code points: 18 + +# ================================================ + +1900..191C ; Limbu # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA +1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Limbu # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Limbu # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Limbu # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Limbu # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; Limbu # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1940 ; Limbu # So LIMBU SIGN LOO +1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE + +# Total code points: 66 + +# ================================================ + +1950..196D ; Tai_Le # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Tai_Le # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 + +# Total code points: 35 + +# ================================================ + +10000..1000B ; Linear_B # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Linear_B # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Linear_B # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Linear_B # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Linear_B # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Linear_B # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 + +# Total code points: 211 + +# ================================================ + +10380..1039D ; Ugaritic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; Ugaritic # Po UGARITIC WORD DIVIDER + +# Total code points: 31 + +# ================================================ + +10450..1047F ; Shavian # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW + +# Total code points: 48 + +# ================================================ + +10480..1049D ; Osmanya # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO +104A0..104A9 ; Osmanya # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE + +# Total code points: 40 + +# ================================================ + +10800..10805 ; Cypriot # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Cypriot # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Cypriot # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Cypriot # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Cypriot # Lo CYPRIOT SYLLABLE ZA +1083F ; Cypriot # Lo CYPRIOT SYLLABLE ZO + +# Total code points: 55 + +# ================================================ + +2800..28FF ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 + +# Total code points: 256 + +# ================================================ + +1A00..1A16 ; Buginese # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; Buginese # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1B ; Buginese # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE +1A1E..1A1F ; Buginese # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION + +# Total code points: 30 + +# ================================================ + +03E2..03EF ; Coptic # L& [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI +2C80..2CE4 ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; Coptic # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER + +# Total code points: 135 + +# ================================================ + +1980..19AB ; New_Tai_Lue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY +19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B +19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 +19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE +19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV + +# Total code points: 83 + +# ================================================ + +2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE + +# Total code points: 94 + +# ================================================ + +2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK + +# Total code points: 55 + +# ================================================ + +A800..A801 ; Syloti_Nagri # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A802 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; Syloti_Nagri # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; Syloti_Nagri # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; Syloti_Nagri # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 + +# Total code points: 44 + +# ================================================ + +103A0..103C3 ; Old_Persian # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Old_Persian # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; Old_Persian # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; Old_Persian # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED + +# Total code points: 50 + +# ================================================ + +10A00 ; Kharoshthi # Lo KHAROSHTHI LETTER A +10A01..10A03 ; Kharoshthi # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Kharoshthi # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Kharoshthi # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; Kharoshthi # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Kharoshthi # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A33 ; Kharoshthi # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA +10A38..10A3A ; Kharoshthi # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Kharoshthi # Mn KHAROSHTHI VIRAMA +10A40..10A47 ; Kharoshthi # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND +10A50..10A58 ; Kharoshthi # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES + +# Total code points: 65 + +# ================================================ + +1B00..1B03 ; Balinese # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Balinese # Mc BALINESE SIGN BISAH +1B05..1B33 ; Balinese # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; Balinese # Mn BALINESE SIGN REREKAN +1B35 ; Balinese # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Balinese # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Balinese # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Balinese # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4B ; Balinese # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK +1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING + +# Total code points: 121 + +# ================================================ + +12000..1236E ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM +12400..12462 ; Cuneiform # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER +12470..12473 ; Cuneiform # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON + +# Total code points: 982 + +# ================================================ + +10900..10915 ; Phoenician # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; Phoenician # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; Phoenician # Po PHOENICIAN WORD SEPARATOR + +# Total code points: 29 + +# ================================================ + +A840..A873 ; Phags_Pa # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD + +# Total code points: 56 + +# ================================================ + +07C0..07C9 ; Nko # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; Nko # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; Nko # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Nko # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; Nko # So NKO SYMBOL OO DENNEN +07F7..07F9 ; Nko # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; Nko # Lm NKO LAJANYALAN + +# Total code points: 59 + +# ================================================ + +1B80..1B81 ; Sundanese # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Sundanese # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Sundanese # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Sundanese # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Sundanese # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH +1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE + +# Total code points: 55 + +# ================================================ + +1C00..1C23 ; Lepcha # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Lepcha # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Lepcha # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Lepcha # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; Lepcha # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C3B..1C3F ; Lepcha # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; Lepcha # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; Lepcha # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA + +# Total code points: 74 + +# ================================================ + +1C50..1C59 ; Ol_Chiki # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; Ol_Chiki # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Ol_Chiki # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; Ol_Chiki # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD + +# Total code points: 48 + +# ================================================ + +A500..A60B ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Vai # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; Vai # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; Vai # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; Vai # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO + +# Total code points: 300 + +# ================================================ + +A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA +A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE + +# Total code points: 81 + +# ================================================ + +A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A92E..A92F ; Kayah_Li # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA + +# Total code points: 48 + +# ================================================ + +A930..A946 ; Rejang # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; Rejang # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; Rejang # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; Rejang # Po REJANG SECTION MARK + +# Total code points: 37 + +# ================================================ + +10280..1029C ; Lycian # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X + +# Total code points: 29 + +# ================================================ + +102A0..102D0 ; Carian # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 + +# Total code points: 49 + +# ================================================ + +10920..10939 ; Lydian # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; Lydian # Po LYDIAN TRIANGULAR MARK + +# Total code points: 27 + +# ================================================ + +AA00..AA28 ; Cham # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; Cham # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Cham # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Cham # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Cham # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Cham # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; Cham # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; Cham # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; Cham # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; Cham # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Cham # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; Cham # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; Cham # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA + +# Total code points: 83 + +# ================================================ + +1A20..1A54 ; Tai_Tham # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Tai_Tham # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Tai_Tham # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Tai_Tham # Mn TAI THAM SIGN SAKOT +1A61 ; Tai_Tham # Mc TAI THAM VOWEL SIGN A +1A62 ; Tai_Tham # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Tai_Tham # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Tai_Tham # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Tai_Tham # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; Tai_Tham # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Tai_Tham # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; Tai_Tham # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Tai_Tham # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; Tai_Tham # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; Tai_Tham # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; Tai_Tham # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG + +# Total code points: 127 + +# ================================================ + +AA80..AAAF ; Tai_Viet # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O +AAB0 ; Tai_Viet # Mn TAI VIET MAI KANG +AAB1 ; Tai_Viet # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; Tai_Viet # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; Tai_Viet # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; Tai_Viet # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; Tai_Viet # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; Tai_Viet # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; Tai_Viet # Lo TAI VIET TONE MAI NUENG +AAC1 ; Tai_Viet # Mn TAI VIET TONE MAI THO +AAC2 ; Tai_Viet # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Tai_Viet # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Tai_Viet # Lm TAI VIET SYMBOL SAM +AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI + +# Total code points: 72 + +# ================================================ + +10B00..10B35 ; Avestan # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; Avestan # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION + +# Total code points: 61 + +# ================================================ + +13000..1342E ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 + +# Total code points: 1071 + +# ================================================ + +0800..0815 ; Samaritan # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; Samaritan # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; Samaritan # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Samaritan # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Samaritan # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Samaritan # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Samaritan # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; Samaritan # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0830..083E ; Samaritan # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU + +# Total code points: 61 + +# ================================================ + +A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP + +# Total code points: 48 + +# ================================================ + +A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK + +# Total code points: 88 + +# ================================================ + +A980..A982 ; Javanese # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Javanese # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Javanese # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; Javanese # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; Javanese # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Javanese # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC ; Javanese # Mn JAVANESE VOWEL SIGN PEPET +A9BD..A9C0 ; Javanese # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON +A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; Javanese # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN + +# Total code points: 91 + +# ================================================ + +ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; Meetei_Mayek # Po MEETEI MAYEK CHEIKHEI +ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK +ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE + +# Total code points: 56 + +# ================================================ + +10840..10855 ; Imperial_Aramaic # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW +10857 ; Imperial_Aramaic # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; Imperial_Aramaic # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND + +# Total code points: 31 + +# ================================================ + +10A60..10A7C ; Old_South_Arabian # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; Old_South_Arabian # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; Old_South_Arabian # Po OLD SOUTH ARABIAN NUMERIC INDICATOR + +# Total code points: 32 + +# ================================================ + +10B40..10B55 ; Inscriptional_Parthian # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; Inscriptional_Parthian # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND + +# Total code points: 30 + +# ================================================ + +10B60..10B72 ; Inscriptional_Pahlavi # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; Inscriptional_Pahlavi # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND + +# Total code points: 27 + +# ================================================ + +10C00..10C48 ; Old_Turkic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH + +# Total code points: 73 + +# ================================================ + +11080..11081 ; Kaithi # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; Kaithi # Mc KAITHI SIGN VISARGA +11083..110AF ; Kaithi # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Kaithi # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Kaithi # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Kaithi # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; Kaithi # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; Kaithi # Cf KAITHI NUMBER SIGN +110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA + +# Total code points: 66 + +# EOF
--- a/jdk/test/java/util/regex/RegExTest.java Tue May 18 13:12:46 2010 -0700 +++ b/jdk/test/java/util/regex/RegExTest.java Tue May 18 15:36:47 2010 -0700 @@ -32,7 +32,7 @@ * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 - * 6350801 6676425 6878475 6919132 6931676 + * 6350801 6676425 6878475 6919132 6931676 6948903 */ import java.util.regex.*; @@ -135,7 +135,7 @@ surrogatesInClassTest(); namedGroupCaptureTest(); nonBmpClassComplementTest(); - + unicodePropertiesTest(); if (failure) throw new RuntimeException("Failure in the RE handling."); else @@ -3515,7 +3515,7 @@ report("NamedGroupCapture"); } - // This is for bug 6919132 + // This is for bug 6969132 private static void nonBmpClassComplementTest() throws Exception { Pattern p = Pattern.compile("\\P{Lu}"); Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); @@ -3539,4 +3539,79 @@ report("NonBmpClassComplement"); } + private static void unicodePropertiesTest() throws Exception { + // different forms + if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || + !Pattern.compile("\\p{Lu}").matcher("A").matches() || + !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || + !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || + !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || + !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || + !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || + !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || + !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || + !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) + failCount++; + + Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); + Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); + Matcher lastSM = common; + Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); + + Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); + Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); + Matcher lastBM = latin; + Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); + + for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { + if (cp >= 0x30000 && (cp & 0x70) == 0){ + continue; // only pick couple code points, they are the same + } + + // Unicode Script + Character.UnicodeScript script = Character.UnicodeScript.of(cp); + Matcher m; + String str = new String(Character.toChars(cp)); + if (script == lastScript) { + m = lastSM; + m.reset(str); + } else { + m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); + } + if (!m.matches()) { + failCount++; + } + Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; + other.reset(str); + if (other.matches()) { + failCount++; + } + lastSM = m; + lastScript = script; + + // Unicode Block + Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); + if (block == null) { + //System.out.printf("Not a Block: cp=%x%n", cp); + continue; + } + if (block == lastBlock) { + m = lastBM; + m.reset(str); + } else { + m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); + } + if (!m.matches()) { + failCount++; + } + other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; + other.reset(str); + if (other.matches()) { + failCount++; + } + lastBM = m; + lastBlock = block; + } + report("unicodeProperties"); + } }