jdk/src/java.base/share/classes/java/util/regex/CharPredicates.java
author sherman
Fri, 20 May 2016 12:47:41 -0700
changeset 38450 516990ff3a4c
parent 37882 e7f3cf12e739
child 43502 aec39566b45e
permissions -rw-r--r--
8143282: \p{Cn} unassigned code points should be included in \p{C} Summary: to add unassigned cp support into \p{C} Reviewed-by: martin
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     1
/*
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
     2
 * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     4
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    10
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    15
 * accompanied this code).
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    16
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    20
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    23
 * questions.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    24
 */
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    25
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    26
package java.util.regex;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    27
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    28
import java.util.HashMap;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    29
import java.util.Locale;
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    30
import java.util.regex.Pattern.CharPredicate;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    31
import java.util.regex.Pattern.BmpCharPredicate;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    32
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    33
class CharPredicates {
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    34
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    35
    static final CharPredicate ALPHABETIC  = Character::isAlphabetic;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    36
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    37
    // \p{gc=Decimal_Number}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    38
    static final CharPredicate DIGIT       = Character::isDigit;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    39
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    40
    static final CharPredicate LETTER      = Character::isLetter;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    41
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    42
    static final CharPredicate IDEOGRAPHIC = Character::isIdeographic;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    43
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    44
    static final CharPredicate LOWERCASE   = Character::isLowerCase;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    45
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    46
    static final CharPredicate UPPERCASE   = Character::isUpperCase;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    47
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    48
    static final CharPredicate TITLECASE   = Character::isTitleCase;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    49
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    50
    // \p{Whitespace}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    51
    static final CharPredicate WHITE_SPACE = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    52
        ((((1 << Character.SPACE_SEPARATOR) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    53
           (1 << Character.LINE_SEPARATOR) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    54
           (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    55
        != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    56
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    57
    // \p{gc=Control}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    58
    static final CharPredicate CONTROL     = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    59
        Character.getType(ch) == Character.CONTROL;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    60
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    61
    // \p{gc=Punctuation}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    62
    static final CharPredicate PUNCTUATION = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    63
        ((((1 << Character.CONNECTOR_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    64
           (1 << Character.DASH_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    65
           (1 << Character.START_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    66
           (1 << Character.END_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    67
           (1 << Character.OTHER_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    68
           (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    69
           (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    70
        != 0;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    71
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    72
    // \p{gc=Decimal_Number}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    73
    // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    74
    static final CharPredicate HEX_DIGIT = DIGIT.union(
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    75
        ch -> (ch >= 0x0030 && ch <= 0x0039) ||
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    76
              (ch >= 0x0041 && ch <= 0x0046) ||
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    77
              (ch >= 0x0061 && ch <= 0x0066) ||
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    78
              (ch >= 0xFF10 && ch <= 0xFF19) ||
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    79
              (ch >= 0xFF21 && ch <= 0xFF26) ||
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    80
              (ch >= 0xFF41 && ch <= 0xFF46));
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    81
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    82
    static final CharPredicate ASSIGNED = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    83
        Character.getType(ch) != Character.UNASSIGNED;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    84
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    85
    // PropList.txt:Noncharacter_Code_Point
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    86
    static final CharPredicate NONCHARACTER_CODE_POINT = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    87
        (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    88
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    89
    // \p{alpha}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    90
    // \p{digit}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    91
    static final CharPredicate ALNUM = ALPHABETIC.union(DIGIT);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    92
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    93
    // \p{Whitespace} --
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    94
    // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    95
    //  \p{gc=Line_Separator}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    96
    //  \p{gc=Paragraph_Separator}]
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    97
    static final CharPredicate BLANK = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    98
        Character.getType(ch) == Character.SPACE_SEPARATOR ||
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
    99
        ch == 0x9; // \N{HT}
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   100
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   101
    // [^
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   102
    //  \p{space}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   103
    //  \p{gc=Control}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   104
    //  \p{gc=Surrogate}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   105
    //  \p{gc=Unassigned}]
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   106
    static final CharPredicate GRAPH = ch ->
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   107
        ((((1 << Character.SPACE_SEPARATOR) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   108
           (1 << Character.LINE_SEPARATOR) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   109
           (1 << Character.PARAGRAPH_SEPARATOR) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   110
           (1 << Character.CONTROL) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   111
           (1 << Character.SURROGATE) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   112
           (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   113
        == 0;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   114
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   115
    // \p{graph}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   116
    // \p{blank}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   117
    // -- \p{cntrl}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   118
    static final CharPredicate PRINT = GRAPH.union(BLANK).and(CONTROL.negate());
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   119
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   120
    //  200C..200D    PropList.txt:Join_Control
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   121
    static final CharPredicate JOIN_CONTROL = ch -> ch == 0x200C || ch == 0x200D;
17434
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   122
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   123
    //  \p{alpha}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   124
    //  \p{gc=Mark}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   125
    //  \p{digit}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   126
    //  \p{gc=Connector_Punctuation}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   127
    //  \p{Join_Control}    200C..200D
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   128
    static final CharPredicate WORD =
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   129
        ALPHABETIC.union(ch -> ((((1 << Character.NON_SPACING_MARK) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   130
                                  (1 << Character.ENCLOSING_MARK) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   131
                                  (1 << Character.COMBINING_SPACING_MARK) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   132
                                  (1 << Character.DECIMAL_DIGIT_NUMBER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   133
                                  (1 << Character.CONNECTOR_PUNCTUATION))
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   134
                                 >> Character.getType(ch)) & 1) != 0,
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   135
                         JOIN_CONTROL);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   136
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   137
    /////////////////////////////////////////////////////////////////////////////
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   138
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   139
    private static final HashMap<String, CharPredicate> posix = new HashMap<>(12);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   140
    private static final HashMap<String, CharPredicate> uprops = new HashMap<>(18);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   141
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   142
    private static void defPosix(String name, CharPredicate p) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   143
        posix.put(name, p);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   144
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   145
    private static void defUProp(String name, CharPredicate p) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   146
        uprops.put(name, p);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   147
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   148
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   149
    static {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   150
        defPosix("ALPHA", ALPHABETIC);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   151
        defPosix("LOWER", LOWERCASE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   152
        defPosix("UPPER", UPPERCASE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   153
        defPosix("SPACE", WHITE_SPACE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   154
        defPosix("PUNCT", PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   155
        defPosix("XDIGIT",HEX_DIGIT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   156
        defPosix("ALNUM", ALNUM);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   157
        defPosix("CNTRL", CONTROL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   158
        defPosix("DIGIT", DIGIT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   159
        defPosix("BLANK", BLANK);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   160
        defPosix("GRAPH", GRAPH);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   161
        defPosix("PRINT", PRINT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   162
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   163
        defUProp("ALPHABETIC", ALPHABETIC);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   164
        defUProp("ASSIGNED", ASSIGNED);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   165
        defUProp("CONTROL", CONTROL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   166
        defUProp("HEXDIGIT", HEX_DIGIT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   167
        defUProp("IDEOGRAPHIC", IDEOGRAPHIC);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   168
        defUProp("JOINCONTROL", JOIN_CONTROL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   169
        defUProp("LETTER", LETTER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   170
        defUProp("LOWERCASE", LOWERCASE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   171
        defUProp("NONCHARACTERCODEPOINT", NONCHARACTER_CODE_POINT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   172
        defUProp("TITLECASE", TITLECASE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   173
        defUProp("PUNCTUATION", PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   174
        defUProp("UPPERCASE", UPPERCASE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   175
        defUProp("WHITESPACE", WHITE_SPACE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   176
        defUProp("WORD", WORD);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   177
        defUProp("WHITE_SPACE", WHITE_SPACE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   178
        defUProp("HEX_DIGIT", HEX_DIGIT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   179
        defUProp("NONCHARACTER_CODE_POINT", NONCHARACTER_CODE_POINT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   180
        defUProp("JOIN_CONTROL", JOIN_CONTROL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   181
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   182
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   183
    public static CharPredicate forUnicodeProperty(String propName) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   184
        propName = propName.toUpperCase(Locale.ROOT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   185
        CharPredicate p = uprops.get(propName);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   186
        if (p != null)
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   187
            return p;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   188
        return posix.get(propName);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   189
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   190
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   191
    public static CharPredicate forPOSIXName(String propName) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   192
        return posix.get(propName.toUpperCase(Locale.ENGLISH));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   193
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   194
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   195
    /////////////////////////////////////////////////////////////////////////////
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   196
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   197
    /**
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   198
     * Returns a predicate matching all characters belong to a named
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   199
     * UnicodeScript.
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   200
     */
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   201
    static CharPredicate forUnicodeScript(String name) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   202
        final Character.UnicodeScript script;
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   203
        try {
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   204
            script = Character.UnicodeScript.forName(name);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   205
            return ch -> script == Character.UnicodeScript.of(ch);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   206
        } catch (IllegalArgumentException iae) {}
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   207
        return null;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   208
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   209
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   210
    /**
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   211
     * Returns a predicate matching all characters in a UnicodeBlock.
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   212
     */
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   213
    static CharPredicate forUnicodeBlock(String name) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   214
        final Character.UnicodeBlock block;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   215
        try {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   216
            block = Character.UnicodeBlock.forName(name);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   217
            return ch -> block == Character.UnicodeBlock.of(ch);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   218
        } catch (IllegalArgumentException iae) {}
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   219
         return null;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   220
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   221
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   222
    /////////////////////////////////////////////////////////////////////////////
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   223
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   224
    // unicode categories, aliases, properties, java methods ...
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   225
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   226
    private static final HashMap<String, CharPredicate> props = new HashMap<>(128);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   227
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   228
    /**
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   229
     * Returns a predicate matching all characters in a named property.
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   230
     */
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   231
    static CharPredicate forProperty(String name) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   232
        return props.get(name);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   233
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   234
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   235
    private static void defProp(String name, CharPredicate p) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   236
        props.put(name, p);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   237
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   238
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   239
    private static void defCategory(String name, final int typeMask) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   240
        CharPredicate p = ch -> (typeMask & (1 << Character.getType(ch))) != 0;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   241
        props.put(name, p);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   242
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   243
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   244
    private static void defRange(String name, final int lower, final int upper) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   245
        BmpCharPredicate p = ch -> lower <= ch && ch <= upper;
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   246
        props.put(name, p);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   247
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   248
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   249
    private static void defCtype(String name, final int ctype) {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   250
        BmpCharPredicate p = ch -> ch < 128 && ASCII.isType(ch, ctype);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   251
        // PrintPattern.pmap.put(p, name);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   252
        props.put(name, p);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   253
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   254
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   255
    static {
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   256
        // Unicode character property aliases, defined in
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   257
        // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   258
        defCategory("Cn", 1<<Character.UNASSIGNED);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   259
        defCategory("Lu", 1<<Character.UPPERCASE_LETTER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   260
        defCategory("Ll", 1<<Character.LOWERCASE_LETTER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   261
        defCategory("Lt", 1<<Character.TITLECASE_LETTER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   262
        defCategory("Lm", 1<<Character.MODIFIER_LETTER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   263
        defCategory("Lo", 1<<Character.OTHER_LETTER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   264
        defCategory("Mn", 1<<Character.NON_SPACING_MARK);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   265
        defCategory("Me", 1<<Character.ENCLOSING_MARK);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   266
        defCategory("Mc", 1<<Character.COMBINING_SPACING_MARK);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   267
        defCategory("Nd", 1<<Character.DECIMAL_DIGIT_NUMBER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   268
        defCategory("Nl", 1<<Character.LETTER_NUMBER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   269
        defCategory("No", 1<<Character.OTHER_NUMBER);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   270
        defCategory("Zs", 1<<Character.SPACE_SEPARATOR);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   271
        defCategory("Zl", 1<<Character.LINE_SEPARATOR);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   272
        defCategory("Zp", 1<<Character.PARAGRAPH_SEPARATOR);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   273
        defCategory("Cc", 1<<Character.CONTROL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   274
        defCategory("Cf", 1<<Character.FORMAT);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   275
        defCategory("Co", 1<<Character.PRIVATE_USE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   276
        defCategory("Cs", 1<<Character.SURROGATE);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   277
        defCategory("Pd", 1<<Character.DASH_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   278
        defCategory("Ps", 1<<Character.START_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   279
        defCategory("Pe", 1<<Character.END_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   280
        defCategory("Pc", 1<<Character.CONNECTOR_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   281
        defCategory("Po", 1<<Character.OTHER_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   282
        defCategory("Sm", 1<<Character.MATH_SYMBOL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   283
        defCategory("Sc", 1<<Character.CURRENCY_SYMBOL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   284
        defCategory("Sk", 1<<Character.MODIFIER_SYMBOL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   285
        defCategory("So", 1<<Character.OTHER_SYMBOL);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   286
        defCategory("Pi", 1<<Character.INITIAL_QUOTE_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   287
        defCategory("Pf", 1<<Character.FINAL_QUOTE_PUNCTUATION);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   288
        defCategory("L", ((1<<Character.UPPERCASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   289
                          (1<<Character.LOWERCASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   290
                          (1<<Character.TITLECASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   291
                          (1<<Character.MODIFIER_LETTER)  |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   292
                          (1<<Character.OTHER_LETTER)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   293
        defCategory("M", ((1<<Character.NON_SPACING_MARK) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   294
                          (1<<Character.ENCLOSING_MARK)   |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   295
                          (1<<Character.COMBINING_SPACING_MARK)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   296
        defCategory("N", ((1<<Character.DECIMAL_DIGIT_NUMBER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   297
                          (1<<Character.LETTER_NUMBER)        |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   298
                          (1<<Character.OTHER_NUMBER)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   299
        defCategory("Z", ((1<<Character.SPACE_SEPARATOR) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   300
                          (1<<Character.LINE_SEPARATOR)  |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   301
                          (1<<Character.PARAGRAPH_SEPARATOR)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   302
        defCategory("C", ((1<<Character.CONTROL)     |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   303
                          (1<<Character.FORMAT)      |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   304
                          (1<<Character.PRIVATE_USE) |
38450
516990ff3a4c 8143282: \p{Cn} unassigned code points should be included in \p{C}
sherman
parents: 37882
diff changeset
   305
                          (1<<Character.SURROGATE)   |
516990ff3a4c 8143282: \p{Cn} unassigned code points should be included in \p{C}
sherman
parents: 37882
diff changeset
   306
                          (1<<Character.UNASSIGNED))); // Other
37882
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   307
        defCategory("P", ((1<<Character.DASH_PUNCTUATION)      |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   308
                          (1<<Character.START_PUNCTUATION)     |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   309
                          (1<<Character.END_PUNCTUATION)       |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   310
                          (1<<Character.CONNECTOR_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   311
                          (1<<Character.OTHER_PUNCTUATION)     |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   312
                          (1<<Character.INITIAL_QUOTE_PUNCTUATION) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   313
                          (1<<Character.FINAL_QUOTE_PUNCTUATION)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   314
        defCategory("S", ((1<<Character.MATH_SYMBOL)     |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   315
                          (1<<Character.CURRENCY_SYMBOL) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   316
                          (1<<Character.MODIFIER_SYMBOL) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   317
                          (1<<Character.OTHER_SYMBOL)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   318
        defCategory("LC", ((1<<Character.UPPERCASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   319
                           (1<<Character.LOWERCASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   320
                           (1<<Character.TITLECASE_LETTER)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   321
        defCategory("LD", ((1<<Character.UPPERCASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   322
                           (1<<Character.LOWERCASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   323
                           (1<<Character.TITLECASE_LETTER) |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   324
                           (1<<Character.MODIFIER_LETTER)  |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   325
                           (1<<Character.OTHER_LETTER)     |
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   326
                           (1<<Character.DECIMAL_DIGIT_NUMBER)));
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   327
        defRange("L1", 0x00, 0xFF); // Latin-1
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   328
        props.put("all", ch -> true);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   329
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   330
        // Posix regular expression character classes, defined in
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   331
        // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   332
        defRange("ASCII", 0x00, 0x7F);   // ASCII
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   333
        defCtype("Alnum", ASCII.ALNUM);  // Alphanumeric characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   334
        defCtype("Alpha", ASCII.ALPHA);  // Alphabetic characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   335
        defCtype("Blank", ASCII.BLANK);  // Space and tab characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   336
        defCtype("Cntrl", ASCII.CNTRL);  // Control characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   337
        defRange("Digit", '0', '9');     // Numeric characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   338
        defCtype("Graph", ASCII.GRAPH);  // printable and visible
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   339
        defRange("Lower", 'a', 'z');     // Lower-case alphabetic
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   340
        defRange("Print", 0x20, 0x7E);   // Printable characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   341
        defCtype("Punct", ASCII.PUNCT);  // Punctuation characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   342
        defCtype("Space", ASCII.SPACE);  // Space characters
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   343
        defRange("Upper", 'A', 'Z');     // Upper-case alphabetic
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   344
        defCtype("XDigit",ASCII.XDIGIT); // hexadecimal digits
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   345
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   346
        // Java character properties, defined by methods in Character.java
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   347
        defProp("javaLowerCase", java.lang.Character::isLowerCase);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   348
        defProp("javaUpperCase",  Character::isUpperCase);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   349
        defProp("javaAlphabetic", java.lang.Character::isAlphabetic);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   350
        defProp("javaIdeographic", java.lang.Character::isIdeographic);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   351
        defProp("javaTitleCase", java.lang.Character::isTitleCase);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   352
        defProp("javaDigit", java.lang.Character::isDigit);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   353
        defProp("javaDefined", java.lang.Character::isDefined);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   354
        defProp("javaLetter", java.lang.Character::isLetter);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   355
        defProp("javaLetterOrDigit", java.lang.Character::isLetterOrDigit);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   356
        defProp("javaJavaIdentifierStart", java.lang.Character::isJavaIdentifierStart);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   357
        defProp("javaJavaIdentifierPart", java.lang.Character::isJavaIdentifierPart);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   358
        defProp("javaUnicodeIdentifierStart", java.lang.Character::isUnicodeIdentifierStart);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   359
        defProp("javaUnicodeIdentifierPart", java.lang.Character::isUnicodeIdentifierPart);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   360
        defProp("javaIdentifierIgnorable", java.lang.Character::isIdentifierIgnorable);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   361
        defProp("javaSpaceChar", java.lang.Character::isSpaceChar);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   362
        defProp("javaWhitespace", java.lang.Character::isWhitespace);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   363
        defProp("javaISOControl", java.lang.Character::isISOControl);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   364
        defProp("javaMirrored", java.lang.Character::isMirrored);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   365
    }
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   366
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   367
    /////////////////////////////////////////////////////////////////////////////
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   368
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   369
    /**
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   370
     * Posix ASCII variants, not in the lookup map
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   371
     */
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   372
    static final BmpCharPredicate ASCII_DIGIT = ch -> ch < 128 && ASCII.isDigit(ch);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   373
    static final BmpCharPredicate ASCII_WORD  = ch -> ch < 128 && ASCII.isWord(ch);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   374
    static final BmpCharPredicate ASCII_SPACE = ch -> ch < 128 && ASCII.isSpace(ch);
e7f3cf12e739 6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents: 32649
diff changeset
   375
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   376
}