author | naoto |
Thu, 23 May 2019 12:21:21 -0700 | |
changeset 55013 | 8dae495a59e7 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
1 |
/* |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
2 |
* Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
4 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
5 |
* This code is free software; you can redistribute it and/or modify it |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
7 |
* published by the Free Software Foundation. Oracle designates this |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
8 |
* particular file as subject to the "Classpath" exception as provided |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
10 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
15 |
* accompanied this code). |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
16 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
17 |
* You should have received a copy of the GNU General Public License version |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
20 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
22 |
* or visit www.oracle.com if you need additional information or have any |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
23 |
* questions. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
24 |
*/ |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
25 |
|
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
26 |
package java.util.regex; |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
27 |
|
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
28 |
import java.util.HashMap; |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
29 |
import java.util.Locale; |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
30 |
import java.util.regex.Pattern.CharPredicate; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
31 |
import java.util.regex.Pattern.BmpCharPredicate; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
32 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
33 |
class CharPredicates { |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
34 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
35 |
static final CharPredicate ALPHABETIC() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
36 |
return Character::isAlphabetic; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
37 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
38 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
39 |
// \p{gc=Decimal_Number} |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
40 |
static final CharPredicate DIGIT() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
41 |
return Character::isDigit; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
42 |
} |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
43 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
44 |
static final CharPredicate LETTER() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
45 |
return Character::isLetter; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
46 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
47 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
48 |
static final CharPredicate IDEOGRAPHIC() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
49 |
return Character::isIdeographic; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
50 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
51 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
52 |
static final CharPredicate LOWERCASE() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
53 |
return Character::isLowerCase; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
54 |
} |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
55 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
56 |
static final CharPredicate UPPERCASE() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
57 |
return Character::isUpperCase; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
58 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
59 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
60 |
static final CharPredicate TITLECASE() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
61 |
return Character::isTitleCase; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
62 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
63 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
64 |
// \p{Whitespace} |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
65 |
static final CharPredicate WHITE_SPACE() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
66 |
return ch -> |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
67 |
((((1 << Character.SPACE_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
68 |
(1 << Character.LINE_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
69 |
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
70 |
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
71 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
72 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
73 |
// \p{gc=Control} |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
74 |
static final CharPredicate CONTROL() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
75 |
return ch -> Character.getType(ch) == Character.CONTROL; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
76 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
77 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
78 |
// \p{gc=Punctuation} |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
79 |
static final CharPredicate PUNCTUATION() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
80 |
return ch -> |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
81 |
((((1 << Character.CONNECTOR_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
82 |
(1 << Character.DASH_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
83 |
(1 << Character.START_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
84 |
(1 << Character.END_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
85 |
(1 << Character.OTHER_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
86 |
(1 << Character.INITIAL_QUOTE_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
87 |
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
88 |
!= 0; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
89 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
90 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
91 |
// \p{gc=Decimal_Number} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
92 |
// \p{Hex_Digit} -> PropList.txt: Hex_Digit |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
93 |
static final CharPredicate HEX_DIGIT() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
94 |
return DIGIT().union(ch -> (ch >= 0x0030 && ch <= 0x0039) || |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
95 |
(ch >= 0x0041 && ch <= 0x0046) || |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
96 |
(ch >= 0x0061 && ch <= 0x0066) || |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
97 |
(ch >= 0xFF10 && ch <= 0xFF19) || |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
98 |
(ch >= 0xFF21 && ch <= 0xFF26) || |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
99 |
(ch >= 0xFF41 && ch <= 0xFF46)); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
100 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
101 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
102 |
static final CharPredicate ASSIGNED() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
103 |
return ch -> Character.getType(ch) != Character.UNASSIGNED; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
104 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
105 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
106 |
// PropList.txt:Noncharacter_Code_Point |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
107 |
static final CharPredicate NONCHARACTER_CODE_POINT() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
108 |
return ch -> (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
109 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
110 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
111 |
// \p{alpha} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
112 |
// \p{digit} |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
113 |
static final CharPredicate ALNUM() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
114 |
return ALPHABETIC().union(DIGIT()); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
115 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
116 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
117 |
// \p{Whitespace} -- |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
118 |
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
119 |
// \p{gc=Line_Separator} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
120 |
// \p{gc=Paragraph_Separator}] |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
121 |
static final CharPredicate BLANK() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
122 |
return ch -> |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
123 |
Character.getType(ch) == Character.SPACE_SEPARATOR || |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
124 |
ch == 0x9; // \N{HT} |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
125 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
126 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
127 |
// [^ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
128 |
// \p{space} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
129 |
// \p{gc=Control} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
130 |
// \p{gc=Surrogate} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
131 |
// \p{gc=Unassigned}] |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
132 |
static final CharPredicate GRAPH() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
133 |
return ch -> |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
134 |
((((1 << Character.SPACE_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
135 |
(1 << Character.LINE_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
136 |
(1 << Character.PARAGRAPH_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
137 |
(1 << Character.CONTROL) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
138 |
(1 << Character.SURROGATE) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
139 |
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1) |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
140 |
== 0; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
141 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
142 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
143 |
// \p{graph} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
144 |
// \p{blank} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
145 |
// -- \p{cntrl} |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
146 |
static final CharPredicate PRINT() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
147 |
return GRAPH().union(BLANK()).and(CONTROL().negate()); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
148 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
149 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
150 |
// 200C..200D PropList.txt:Join_Control |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
151 |
static final CharPredicate JOIN_CONTROL() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
152 |
return ch -> ch == 0x200C || ch == 0x200D; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
153 |
} |
17434
4a04d7127e80
8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents:
9536
diff
changeset
|
154 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
155 |
// \p{alpha} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
156 |
// \p{gc=Mark} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
157 |
// \p{digit} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
158 |
// \p{gc=Connector_Punctuation} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
159 |
// \p{Join_Control} 200C..200D |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
160 |
static final CharPredicate WORD() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
161 |
return ALPHABETIC().union(ch -> ((((1 << Character.NON_SPACING_MARK) | |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
162 |
(1 << Character.ENCLOSING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
163 |
(1 << Character.COMBINING_SPACING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
164 |
(1 << Character.DECIMAL_DIGIT_NUMBER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
165 |
(1 << Character.CONNECTOR_PUNCTUATION)) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
166 |
>> Character.getType(ch)) & 1) != 0, |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
167 |
JOIN_CONTROL()); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
168 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
169 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
170 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
171 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
172 |
private static CharPredicate getPosixPredicate(String name) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
173 |
switch (name) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
174 |
case "ALPHA": return ALPHABETIC(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
175 |
case "LOWER": return LOWERCASE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
176 |
case "UPPER": return UPPERCASE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
177 |
case "SPACE": return WHITE_SPACE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
178 |
case "PUNCT": return PUNCTUATION(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
179 |
case "XDIGIT": return HEX_DIGIT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
180 |
case "ALNUM": return ALNUM(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
181 |
case "CNTRL": return CONTROL(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
182 |
case "DIGIT": return DIGIT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
183 |
case "BLANK": return BLANK(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
184 |
case "GRAPH": return GRAPH(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
185 |
case "PRINT": return PRINT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
186 |
default: return null; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
187 |
} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
188 |
} |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
189 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
190 |
private static CharPredicate getUnicodePredicate(String name) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
191 |
switch (name) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
192 |
case "ALPHABETIC": return ALPHABETIC(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
193 |
case "ASSIGNED": return ASSIGNED(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
194 |
case "CONTROL": return CONTROL(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
195 |
case "HEXDIGIT": return HEX_DIGIT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
196 |
case "IDEOGRAPHIC": return IDEOGRAPHIC(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
197 |
case "JOINCONTROL": return JOIN_CONTROL(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
198 |
case "LETTER": return LETTER(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
199 |
case "LOWERCASE": return LOWERCASE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
200 |
case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
201 |
case "TITLECASE": return TITLECASE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
202 |
case "PUNCTUATION": return PUNCTUATION(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
203 |
case "UPPERCASE": return UPPERCASE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
204 |
case "WHITESPACE": return WHITE_SPACE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
205 |
case "WORD": return WORD(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
206 |
case "WHITE_SPACE": return WHITE_SPACE(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
207 |
case "HEX_DIGIT": return HEX_DIGIT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
208 |
case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
209 |
case "JOIN_CONTROL": return JOIN_CONTROL(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
210 |
default: return null; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
211 |
} |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
212 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
213 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
214 |
public static CharPredicate forUnicodeProperty(String propName) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
215 |
propName = propName.toUpperCase(Locale.ROOT); |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
216 |
CharPredicate p = getUnicodePredicate(propName); |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
217 |
if (p != null) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
218 |
return p; |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
219 |
return getPosixPredicate(propName); |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
220 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
221 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
222 |
public static CharPredicate forPOSIXName(String propName) { |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
223 |
return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH)); |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
224 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
225 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
226 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
227 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
228 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
229 |
* Returns a predicate matching all characters belong to a named |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
230 |
* UnicodeScript. |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
231 |
*/ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
232 |
static CharPredicate forUnicodeScript(String name) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
233 |
final Character.UnicodeScript script; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
234 |
try { |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
235 |
script = Character.UnicodeScript.forName(name); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
236 |
return ch -> script == Character.UnicodeScript.of(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
237 |
} catch (IllegalArgumentException iae) {} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
238 |
return null; |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
239 |
} |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
240 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
241 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
242 |
* Returns a predicate matching all characters in a UnicodeBlock. |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
243 |
*/ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
244 |
static CharPredicate forUnicodeBlock(String name) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
245 |
final Character.UnicodeBlock block; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
246 |
try { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
247 |
block = Character.UnicodeBlock.forName(name); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
248 |
return ch -> block == Character.UnicodeBlock.of(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
249 |
} catch (IllegalArgumentException iae) {} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
250 |
return null; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
251 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
252 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
253 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
254 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
255 |
// unicode categories, aliases, properties, java methods ... |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
256 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
257 |
static CharPredicate forProperty(String name) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
258 |
// Unicode character property aliases, defined in |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
259 |
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
260 |
switch (name) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
261 |
case "Cn": return category(1<<Character.UNASSIGNED); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
262 |
case "Lu": return category(1<<Character.UPPERCASE_LETTER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
263 |
case "Ll": return category(1<<Character.LOWERCASE_LETTER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
264 |
case "Lt": return category(1<<Character.TITLECASE_LETTER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
265 |
case "Lm": return category(1<<Character.MODIFIER_LETTER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
266 |
case "Lo": return category(1<<Character.OTHER_LETTER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
267 |
case "Mn": return category(1<<Character.NON_SPACING_MARK); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
268 |
case "Me": return category(1<<Character.ENCLOSING_MARK); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
269 |
case "Mc": return category(1<<Character.COMBINING_SPACING_MARK); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
270 |
case "Nd": return category(1<<Character.DECIMAL_DIGIT_NUMBER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
271 |
case "Nl": return category(1<<Character.LETTER_NUMBER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
272 |
case "No": return category(1<<Character.OTHER_NUMBER); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
273 |
case "Zs": return category(1<<Character.SPACE_SEPARATOR); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
274 |
case "Zl": return category(1<<Character.LINE_SEPARATOR); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
275 |
case "Zp": return category(1<<Character.PARAGRAPH_SEPARATOR); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
276 |
case "Cc": return category(1<<Character.CONTROL); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
277 |
case "Cf": return category(1<<Character.FORMAT); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
278 |
case "Co": return category(1<<Character.PRIVATE_USE); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
279 |
case "Cs": return category(1<<Character.SURROGATE); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
280 |
case "Pd": return category(1<<Character.DASH_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
281 |
case "Ps": return category(1<<Character.START_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
282 |
case "Pe": return category(1<<Character.END_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
283 |
case "Pc": return category(1<<Character.CONNECTOR_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
284 |
case "Po": return category(1<<Character.OTHER_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
285 |
case "Sm": return category(1<<Character.MATH_SYMBOL); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
286 |
case "Sc": return category(1<<Character.CURRENCY_SYMBOL); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
287 |
case "Sk": return category(1<<Character.MODIFIER_SYMBOL); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
288 |
case "So": return category(1<<Character.OTHER_SYMBOL); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
289 |
case "Pi": return category(1<<Character.INITIAL_QUOTE_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
290 |
case "Pf": return category(1<<Character.FINAL_QUOTE_PUNCTUATION); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
291 |
case "L": return category(((1<<Character.UPPERCASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
292 |
(1<<Character.LOWERCASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
293 |
(1<<Character.TITLECASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
294 |
(1<<Character.MODIFIER_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
295 |
(1<<Character.OTHER_LETTER))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
296 |
case "M": return category(((1<<Character.NON_SPACING_MARK) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
297 |
(1<<Character.ENCLOSING_MARK) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
298 |
(1<<Character.COMBINING_SPACING_MARK))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
299 |
case "N": return category(((1<<Character.DECIMAL_DIGIT_NUMBER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
300 |
(1<<Character.LETTER_NUMBER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
301 |
(1<<Character.OTHER_NUMBER))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
302 |
case "Z": return category(((1<<Character.SPACE_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
303 |
(1<<Character.LINE_SEPARATOR) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
304 |
(1<<Character.PARAGRAPH_SEPARATOR))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
305 |
case "C": return category(((1<<Character.CONTROL) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
306 |
(1<<Character.FORMAT) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
307 |
(1<<Character.PRIVATE_USE) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
308 |
(1<<Character.SURROGATE) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
309 |
(1<<Character.UNASSIGNED))); // Other |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
310 |
case "P": return category(((1<<Character.DASH_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
311 |
(1<<Character.START_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
312 |
(1<<Character.END_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
313 |
(1<<Character.CONNECTOR_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
314 |
(1<<Character.OTHER_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
315 |
(1<<Character.INITIAL_QUOTE_PUNCTUATION) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
316 |
(1<<Character.FINAL_QUOTE_PUNCTUATION))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
317 |
case "S": return category(((1<<Character.MATH_SYMBOL) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
318 |
(1<<Character.CURRENCY_SYMBOL) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
319 |
(1<<Character.MODIFIER_SYMBOL) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
320 |
(1<<Character.OTHER_SYMBOL))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
321 |
case "LC": return category(((1<<Character.UPPERCASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
322 |
(1<<Character.LOWERCASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
323 |
(1<<Character.TITLECASE_LETTER))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
324 |
case "LD": return category(((1<<Character.UPPERCASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
325 |
(1<<Character.LOWERCASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
326 |
(1<<Character.TITLECASE_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
327 |
(1<<Character.MODIFIER_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
328 |
(1<<Character.OTHER_LETTER) | |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
329 |
(1<<Character.DECIMAL_DIGIT_NUMBER))); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
330 |
case "L1": return range(0x00, 0xFF); // Latin-1 |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
331 |
case "all": return Pattern.ALL(); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
332 |
// Posix regular expression character classes, defined in |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
333 |
// http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
334 |
case "ASCII": return range(0x00, 0x7F); // ASCII |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
335 |
case "Alnum": return ctype(ASCII.ALNUM); // Alphanumeric characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
336 |
case "Alpha": return ctype(ASCII.ALPHA); // Alphabetic characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
337 |
case "Blank": return ctype(ASCII.BLANK); // Space and tab characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
338 |
case "Cntrl": return ctype(ASCII.CNTRL); // Control characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
339 |
case "Digit": return range('0', '9'); // Numeric characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
340 |
case "Graph": return ctype(ASCII.GRAPH); // printable and visible |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
341 |
case "Lower": return range('a', 'z'); // Lower-case alphabetic |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
342 |
case "Print": return range(0x20, 0x7E); // Printable characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
343 |
case "Punct": return ctype(ASCII.PUNCT); // Punctuation characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
344 |
case "Space": return ctype(ASCII.SPACE); // Space characters |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
345 |
case "Upper": return range('A', 'Z'); // Upper-case alphabetic |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
346 |
case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
347 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
348 |
// Java character properties, defined by methods in Character.java |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
349 |
case "javaLowerCase": return java.lang.Character::isLowerCase; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
350 |
case "javaUpperCase": return Character::isUpperCase; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
351 |
case "javaAlphabetic": return java.lang.Character::isAlphabetic; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
352 |
case "javaIdeographic": return java.lang.Character::isIdeographic; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
353 |
case "javaTitleCase": return java.lang.Character::isTitleCase; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
354 |
case "javaDigit": return java.lang.Character::isDigit; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
355 |
case "javaDefined": return java.lang.Character::isDefined; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
356 |
case "javaLetter": return java.lang.Character::isLetter; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
357 |
case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
358 |
case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
359 |
case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
360 |
case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
361 |
case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
362 |
case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
363 |
case "javaSpaceChar": return java.lang.Character::isSpaceChar; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
364 |
case "javaWhitespace": return java.lang.Character::isWhitespace; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
365 |
case "javaISOControl": return java.lang.Character::isISOControl; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
366 |
case "javaMirrored": return java.lang.Character::isMirrored; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
367 |
default: return null; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
368 |
} |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
369 |
} |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
370 |
|
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
371 |
private static CharPredicate category(final int typeMask) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
372 |
return ch -> (typeMask & (1 << Character.getType(ch))) != 0; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
373 |
} |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
374 |
|
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
375 |
private static CharPredicate range(final int lower, final int upper) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
376 |
return (BmpCharPredicate)ch -> lower <= ch && ch <= upper; |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
377 |
} |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
378 |
|
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
379 |
private static CharPredicate ctype(final int ctype) { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
380 |
return (BmpCharPredicate)ch -> ch < 128 && ASCII.isType(ch, ctype); |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
381 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
382 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
383 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
384 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
385 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
386 |
* Posix ASCII variants, not in the lookup map |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
387 |
*/ |
43502
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
388 |
static final BmpCharPredicate ASCII_DIGIT() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
389 |
return ch -> ch < 128 && ASCII.isDigit(ch); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
390 |
} |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
391 |
static final BmpCharPredicate ASCII_WORD() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
392 |
return ch -> ch < 128 && ASCII.isWord(ch); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
393 |
} |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
394 |
static final BmpCharPredicate ASCII_SPACE() { |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
395 |
return ch -> ch < 128 && ASCII.isSpace(ch); |
aec39566b45e
8160302: Reduce number of lambdas created when loading java.util.regex.Pattern
redestad
parents:
38450
diff
changeset
|
396 |
} |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
397 |
|
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
398 |
} |