author | sherman |
Tue, 10 May 2016 21:19:25 -0700 | |
changeset 37882 | e7f3cf12e739 |
parent 32649 | jdk/src/java.base/share/classes/java/util/regex/UnicodeProp.java@2ee9017c7597 |
child 38450 | 516990ff3a4c |
permissions | -rw-r--r-- |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
1 |
/* |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
2 |
* Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
4 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
5 |
* This code is free software; you can redistribute it and/or modify it |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
7 |
* published by the Free Software Foundation. Oracle designates this |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
8 |
* particular file as subject to the "Classpath" exception as provided |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
10 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
15 |
* accompanied this code). |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
16 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
17 |
* You should have received a copy of the GNU General Public License version |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
20 |
* |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
22 |
* or visit www.oracle.com if you need additional information or have any |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
23 |
* questions. |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
24 |
*/ |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
25 |
|
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
26 |
package java.util.regex; |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
27 |
|
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
28 |
import java.util.HashMap; |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
29 |
import java.util.Locale; |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
30 |
import java.util.regex.Pattern.CharPredicate; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
31 |
import java.util.regex.Pattern.BmpCharPredicate; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
32 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
33 |
class CharPredicates { |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
34 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
35 |
static final CharPredicate ALPHABETIC = Character::isAlphabetic; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
36 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
37 |
// \p{gc=Decimal_Number} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
38 |
static final CharPredicate DIGIT = Character::isDigit; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
39 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
40 |
static final CharPredicate LETTER = Character::isLetter; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
41 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
42 |
static final CharPredicate IDEOGRAPHIC = Character::isIdeographic; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
43 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
44 |
static final CharPredicate LOWERCASE = Character::isLowerCase; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
45 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
46 |
static final CharPredicate UPPERCASE = Character::isUpperCase; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
47 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
48 |
static final CharPredicate TITLECASE = Character::isTitleCase; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
49 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
50 |
// \p{Whitespace} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
51 |
static final CharPredicate WHITE_SPACE = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
52 |
((((1 << Character.SPACE_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
53 |
(1 << Character.LINE_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
54 |
(1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
55 |
!= 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
56 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
57 |
// \p{gc=Control} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
58 |
static final CharPredicate CONTROL = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
59 |
Character.getType(ch) == Character.CONTROL; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
60 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
61 |
// \p{gc=Punctuation} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
62 |
static final CharPredicate PUNCTUATION = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
63 |
((((1 << Character.CONNECTOR_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
64 |
(1 << Character.DASH_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
65 |
(1 << Character.START_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
66 |
(1 << Character.END_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
67 |
(1 << Character.OTHER_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
68 |
(1 << Character.INITIAL_QUOTE_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
69 |
(1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
70 |
!= 0; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
71 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
72 |
// \p{gc=Decimal_Number} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
73 |
// \p{Hex_Digit} -> PropList.txt: Hex_Digit |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
74 |
static final CharPredicate HEX_DIGIT = DIGIT.union( |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
75 |
ch -> (ch >= 0x0030 && ch <= 0x0039) || |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
76 |
(ch >= 0x0041 && ch <= 0x0046) || |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
77 |
(ch >= 0x0061 && ch <= 0x0066) || |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
78 |
(ch >= 0xFF10 && ch <= 0xFF19) || |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
79 |
(ch >= 0xFF21 && ch <= 0xFF26) || |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
80 |
(ch >= 0xFF41 && ch <= 0xFF46)); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
81 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
82 |
static final CharPredicate ASSIGNED = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
83 |
Character.getType(ch) != Character.UNASSIGNED; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
84 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
85 |
// PropList.txt:Noncharacter_Code_Point |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
86 |
static final CharPredicate NONCHARACTER_CODE_POINT = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
87 |
(ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
88 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
89 |
// \p{alpha} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
90 |
// \p{digit} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
91 |
static final CharPredicate ALNUM = ALPHABETIC.union(DIGIT); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
92 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
93 |
// \p{Whitespace} -- |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
94 |
// [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
95 |
// \p{gc=Line_Separator} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
96 |
// \p{gc=Paragraph_Separator}] |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
97 |
static final CharPredicate BLANK = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
98 |
Character.getType(ch) == Character.SPACE_SEPARATOR || |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
99 |
ch == 0x9; // \N{HT} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
100 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
101 |
// [^ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
102 |
// \p{space} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
103 |
// \p{gc=Control} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
104 |
// \p{gc=Surrogate} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
105 |
// \p{gc=Unassigned}] |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
106 |
static final CharPredicate GRAPH = ch -> |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
107 |
((((1 << Character.SPACE_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
108 |
(1 << Character.LINE_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
109 |
(1 << Character.PARAGRAPH_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
110 |
(1 << Character.CONTROL) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
111 |
(1 << Character.SURROGATE) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
112 |
(1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
113 |
== 0; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
114 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
115 |
// \p{graph} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
116 |
// \p{blank} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
117 |
// -- \p{cntrl} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
118 |
static final CharPredicate PRINT = GRAPH.union(BLANK).and(CONTROL.negate()); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
119 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
120 |
// 200C..200D PropList.txt:Join_Control |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
121 |
static final CharPredicate JOIN_CONTROL = ch -> ch == 0x200C || ch == 0x200D; |
17434
4a04d7127e80
8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents:
9536
diff
changeset
|
122 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
123 |
// \p{alpha} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
124 |
// \p{gc=Mark} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
125 |
// \p{digit} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
126 |
// \p{gc=Connector_Punctuation} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
127 |
// \p{Join_Control} 200C..200D |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
128 |
static final CharPredicate WORD = |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
129 |
ALPHABETIC.union(ch -> ((((1 << Character.NON_SPACING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
130 |
(1 << Character.ENCLOSING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
131 |
(1 << Character.COMBINING_SPACING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
132 |
(1 << Character.DECIMAL_DIGIT_NUMBER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
133 |
(1 << Character.CONNECTOR_PUNCTUATION)) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
134 |
>> Character.getType(ch)) & 1) != 0, |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
135 |
JOIN_CONTROL); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
136 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
137 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
138 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
139 |
private static final HashMap<String, CharPredicate> posix = new HashMap<>(12); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
140 |
private static final HashMap<String, CharPredicate> uprops = new HashMap<>(18); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
141 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
142 |
private static void defPosix(String name, CharPredicate p) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
143 |
posix.put(name, p); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
144 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
145 |
private static void defUProp(String name, CharPredicate p) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
146 |
uprops.put(name, p); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
147 |
} |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
148 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
149 |
static { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
150 |
defPosix("ALPHA", ALPHABETIC); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
151 |
defPosix("LOWER", LOWERCASE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
152 |
defPosix("UPPER", UPPERCASE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
153 |
defPosix("SPACE", WHITE_SPACE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
154 |
defPosix("PUNCT", PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
155 |
defPosix("XDIGIT",HEX_DIGIT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
156 |
defPosix("ALNUM", ALNUM); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
157 |
defPosix("CNTRL", CONTROL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
158 |
defPosix("DIGIT", DIGIT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
159 |
defPosix("BLANK", BLANK); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
160 |
defPosix("GRAPH", GRAPH); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
161 |
defPosix("PRINT", PRINT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
162 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
163 |
defUProp("ALPHABETIC", ALPHABETIC); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
164 |
defUProp("ASSIGNED", ASSIGNED); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
165 |
defUProp("CONTROL", CONTROL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
166 |
defUProp("HEXDIGIT", HEX_DIGIT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
167 |
defUProp("IDEOGRAPHIC", IDEOGRAPHIC); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
168 |
defUProp("JOINCONTROL", JOIN_CONTROL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
169 |
defUProp("LETTER", LETTER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
170 |
defUProp("LOWERCASE", LOWERCASE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
171 |
defUProp("NONCHARACTERCODEPOINT", NONCHARACTER_CODE_POINT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
172 |
defUProp("TITLECASE", TITLECASE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
173 |
defUProp("PUNCTUATION", PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
174 |
defUProp("UPPERCASE", UPPERCASE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
175 |
defUProp("WHITESPACE", WHITE_SPACE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
176 |
defUProp("WORD", WORD); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
177 |
defUProp("WHITE_SPACE", WHITE_SPACE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
178 |
defUProp("HEX_DIGIT", HEX_DIGIT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
179 |
defUProp("NONCHARACTER_CODE_POINT", NONCHARACTER_CODE_POINT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
180 |
defUProp("JOIN_CONTROL", JOIN_CONTROL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
181 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
182 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
183 |
public static CharPredicate forUnicodeProperty(String propName) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
184 |
propName = propName.toUpperCase(Locale.ROOT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
185 |
CharPredicate p = uprops.get(propName); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
186 |
if (p != null) |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
187 |
return p; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
188 |
return posix.get(propName); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
189 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
190 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
191 |
public static CharPredicate forPOSIXName(String propName) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
192 |
return posix.get(propName.toUpperCase(Locale.ENGLISH)); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
193 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
194 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
195 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
196 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
197 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
198 |
* Returns a predicate matching all characters belong to a named |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
199 |
* UnicodeScript. |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
200 |
*/ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
201 |
static CharPredicate forUnicodeScript(String name) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
202 |
final Character.UnicodeScript script; |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
203 |
try { |
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
204 |
script = Character.UnicodeScript.forName(name); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
205 |
return ch -> script == Character.UnicodeScript.of(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
206 |
} catch (IllegalArgumentException iae) {} |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
207 |
return null; |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
208 |
} |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
209 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
210 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
211 |
* Returns a predicate matching all characters in a UnicodeBlock. |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
212 |
*/ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
213 |
static CharPredicate forUnicodeBlock(String name) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
214 |
final Character.UnicodeBlock block; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
215 |
try { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
216 |
block = Character.UnicodeBlock.forName(name); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
217 |
return ch -> block == Character.UnicodeBlock.of(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
218 |
} catch (IllegalArgumentException iae) {} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
219 |
return null; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
220 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
221 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
222 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
223 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
224 |
// unicode categories, aliases, properties, java methods ... |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
225 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
226 |
private static final HashMap<String, CharPredicate> props = new HashMap<>(128); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
227 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
228 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
229 |
* Returns a predicate matching all characters in a named property. |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
230 |
*/ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
231 |
static CharPredicate forProperty(String name) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
232 |
return props.get(name); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
233 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
234 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
235 |
private static void defProp(String name, CharPredicate p) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
236 |
props.put(name, p); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
237 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
238 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
239 |
private static void defCategory(String name, final int typeMask) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
240 |
CharPredicate p = ch -> (typeMask & (1 << Character.getType(ch))) != 0; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
241 |
props.put(name, p); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
242 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
243 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
244 |
private static void defRange(String name, final int lower, final int upper) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
245 |
BmpCharPredicate p = ch -> lower <= ch && ch <= upper; |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
246 |
props.put(name, p); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
247 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
248 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
249 |
private static void defCtype(String name, final int ctype) { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
250 |
BmpCharPredicate p = ch -> ch < 128 && ASCII.isType(ch, ctype); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
251 |
// PrintPattern.pmap.put(p, name); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
252 |
props.put(name, p); |
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
253 |
} |
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
254 |
|
37882
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
255 |
static { |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
256 |
// Unicode character property aliases, defined in |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
257 |
// http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
258 |
defCategory("Cn", 1<<Character.UNASSIGNED); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
259 |
defCategory("Lu", 1<<Character.UPPERCASE_LETTER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
260 |
defCategory("Ll", 1<<Character.LOWERCASE_LETTER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
261 |
defCategory("Lt", 1<<Character.TITLECASE_LETTER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
262 |
defCategory("Lm", 1<<Character.MODIFIER_LETTER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
263 |
defCategory("Lo", 1<<Character.OTHER_LETTER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
264 |
defCategory("Mn", 1<<Character.NON_SPACING_MARK); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
265 |
defCategory("Me", 1<<Character.ENCLOSING_MARK); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
266 |
defCategory("Mc", 1<<Character.COMBINING_SPACING_MARK); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
267 |
defCategory("Nd", 1<<Character.DECIMAL_DIGIT_NUMBER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
268 |
defCategory("Nl", 1<<Character.LETTER_NUMBER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
269 |
defCategory("No", 1<<Character.OTHER_NUMBER); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
270 |
defCategory("Zs", 1<<Character.SPACE_SEPARATOR); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
271 |
defCategory("Zl", 1<<Character.LINE_SEPARATOR); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
272 |
defCategory("Zp", 1<<Character.PARAGRAPH_SEPARATOR); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
273 |
defCategory("Cc", 1<<Character.CONTROL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
274 |
defCategory("Cf", 1<<Character.FORMAT); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
275 |
defCategory("Co", 1<<Character.PRIVATE_USE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
276 |
defCategory("Cs", 1<<Character.SURROGATE); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
277 |
defCategory("Pd", 1<<Character.DASH_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
278 |
defCategory("Ps", 1<<Character.START_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
279 |
defCategory("Pe", 1<<Character.END_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
280 |
defCategory("Pc", 1<<Character.CONNECTOR_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
281 |
defCategory("Po", 1<<Character.OTHER_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
282 |
defCategory("Sm", 1<<Character.MATH_SYMBOL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
283 |
defCategory("Sc", 1<<Character.CURRENCY_SYMBOL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
284 |
defCategory("Sk", 1<<Character.MODIFIER_SYMBOL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
285 |
defCategory("So", 1<<Character.OTHER_SYMBOL); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
286 |
defCategory("Pi", 1<<Character.INITIAL_QUOTE_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
287 |
defCategory("Pf", 1<<Character.FINAL_QUOTE_PUNCTUATION); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
288 |
defCategory("L", ((1<<Character.UPPERCASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
289 |
(1<<Character.LOWERCASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
290 |
(1<<Character.TITLECASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
291 |
(1<<Character.MODIFIER_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
292 |
(1<<Character.OTHER_LETTER))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
293 |
defCategory("M", ((1<<Character.NON_SPACING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
294 |
(1<<Character.ENCLOSING_MARK) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
295 |
(1<<Character.COMBINING_SPACING_MARK))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
296 |
defCategory("N", ((1<<Character.DECIMAL_DIGIT_NUMBER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
297 |
(1<<Character.LETTER_NUMBER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
298 |
(1<<Character.OTHER_NUMBER))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
299 |
defCategory("Z", ((1<<Character.SPACE_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
300 |
(1<<Character.LINE_SEPARATOR) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
301 |
(1<<Character.PARAGRAPH_SEPARATOR))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
302 |
defCategory("C", ((1<<Character.CONTROL) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
303 |
(1<<Character.FORMAT) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
304 |
(1<<Character.PRIVATE_USE) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
305 |
(1<<Character.SURROGATE))); // Other |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
306 |
defCategory("P", ((1<<Character.DASH_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
307 |
(1<<Character.START_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
308 |
(1<<Character.END_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
309 |
(1<<Character.CONNECTOR_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
310 |
(1<<Character.OTHER_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
311 |
(1<<Character.INITIAL_QUOTE_PUNCTUATION) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
312 |
(1<<Character.FINAL_QUOTE_PUNCTUATION))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
313 |
defCategory("S", ((1<<Character.MATH_SYMBOL) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
314 |
(1<<Character.CURRENCY_SYMBOL) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
315 |
(1<<Character.MODIFIER_SYMBOL) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
316 |
(1<<Character.OTHER_SYMBOL))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
317 |
defCategory("LC", ((1<<Character.UPPERCASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
318 |
(1<<Character.LOWERCASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
319 |
(1<<Character.TITLECASE_LETTER))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
320 |
defCategory("LD", ((1<<Character.UPPERCASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
321 |
(1<<Character.LOWERCASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
322 |
(1<<Character.TITLECASE_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
323 |
(1<<Character.MODIFIER_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
324 |
(1<<Character.OTHER_LETTER) | |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
325 |
(1<<Character.DECIMAL_DIGIT_NUMBER))); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
326 |
defRange("L1", 0x00, 0xFF); // Latin-1 |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
327 |
props.put("all", ch -> true); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
328 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
329 |
// Posix regular expression character classes, defined in |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
330 |
// http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
331 |
defRange("ASCII", 0x00, 0x7F); // ASCII |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
332 |
defCtype("Alnum", ASCII.ALNUM); // Alphanumeric characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
333 |
defCtype("Alpha", ASCII.ALPHA); // Alphabetic characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
334 |
defCtype("Blank", ASCII.BLANK); // Space and tab characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
335 |
defCtype("Cntrl", ASCII.CNTRL); // Control characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
336 |
defRange("Digit", '0', '9'); // Numeric characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
337 |
defCtype("Graph", ASCII.GRAPH); // printable and visible |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
338 |
defRange("Lower", 'a', 'z'); // Lower-case alphabetic |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
339 |
defRange("Print", 0x20, 0x7E); // Printable characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
340 |
defCtype("Punct", ASCII.PUNCT); // Punctuation characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
341 |
defCtype("Space", ASCII.SPACE); // Space characters |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
342 |
defRange("Upper", 'A', 'Z'); // Upper-case alphabetic |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
343 |
defCtype("XDigit",ASCII.XDIGIT); // hexadecimal digits |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
344 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
345 |
// Java character properties, defined by methods in Character.java |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
346 |
defProp("javaLowerCase", java.lang.Character::isLowerCase); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
347 |
defProp("javaUpperCase", Character::isUpperCase); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
348 |
defProp("javaAlphabetic", java.lang.Character::isAlphabetic); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
349 |
defProp("javaIdeographic", java.lang.Character::isIdeographic); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
350 |
defProp("javaTitleCase", java.lang.Character::isTitleCase); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
351 |
defProp("javaDigit", java.lang.Character::isDigit); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
352 |
defProp("javaDefined", java.lang.Character::isDefined); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
353 |
defProp("javaLetter", java.lang.Character::isLetter); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
354 |
defProp("javaLetterOrDigit", java.lang.Character::isLetterOrDigit); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
355 |
defProp("javaJavaIdentifierStart", java.lang.Character::isJavaIdentifierStart); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
356 |
defProp("javaJavaIdentifierPart", java.lang.Character::isJavaIdentifierPart); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
357 |
defProp("javaUnicodeIdentifierStart", java.lang.Character::isUnicodeIdentifierStart); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
358 |
defProp("javaUnicodeIdentifierPart", java.lang.Character::isUnicodeIdentifierPart); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
359 |
defProp("javaIdentifierIgnorable", java.lang.Character::isIdentifierIgnorable); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
360 |
defProp("javaSpaceChar", java.lang.Character::isSpaceChar); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
361 |
defProp("javaWhitespace", java.lang.Character::isWhitespace); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
362 |
defProp("javaISOControl", java.lang.Character::isISOControl); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
363 |
defProp("javaMirrored", java.lang.Character::isMirrored); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
364 |
} |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
365 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
366 |
///////////////////////////////////////////////////////////////////////////// |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
367 |
|
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
368 |
/** |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
369 |
* Posix ASCII variants, not in the lookup map |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
370 |
*/ |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
371 |
static final BmpCharPredicate ASCII_DIGIT = ch -> ch < 128 && ASCII.isDigit(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
372 |
static final BmpCharPredicate ASCII_WORD = ch -> ch < 128 && ASCII.isWord(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
373 |
static final BmpCharPredicate ASCII_SPACE = ch -> ch < 128 && ASCII.isSpace(ch); |
e7f3cf12e739
6328855: String: Matches hangs at short and easy Strings containing \r \n
sherman
parents:
32649
diff
changeset
|
374 |
|
9536
648c9add2a74
7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff
changeset
|
375 |
} |