jdk/test/java/util/regex/TestCases.txt
author sherman
Sun, 14 Feb 2016 14:30:48 -0800
changeset 35785 d17d49d794e6
parent 5506 202f599c92aa
child 37882 e7f3cf12e739
permissions -rw-r--r--
8149787: test/java/util/regex/GraphemeTest.java source file has non-ascii character u+00f7 Reviewed-by: igerasim

//
// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
// This file contains test cases for regular expressions.
// A test case consists of three lines:
// The first line is a pattern used in the test
// The second line is the input to search for the pattern in
// The third line is a concatentation of the match, the number of groups,
//     and the contents of the first four subexpressions.
// Empty lines and lines beginning with comment slashes are ignored.
//
// Test unsetting of backed off groups
^(a)?a
a
true a 1

^(aa(bb)?)+$
aabbaa
true aabbaa 2 aa bb

((a|b)?b)+
b
true b 2 b

(aaa)?aaa
aaa
true aaa 1

^(a(b)?)+$
aba
true aba 2 a b

^(a(b(c)?)?)?abc
abc
true abc 3

^(a(b(c))).*
abc
true abc 3 abc bc c

// use of x modifier
abc(?x)blah
abcblah
true abcblah 0

abc(?x)  blah
abcblah
true abcblah 0

abc(?x)  blah  blech
abcblahblech
true abcblahblech 0

abc(?x)  blah # ignore comment
abcblah
true abcblah 0

// Simple alternation
a|b
a
true a 0

a|b
z
false 0

a|b
b
true b 0

a|b|cd
cd
true cd 0

a|ad
ad
true a 0

z(a|ac)b
zacb
true zacb 1 ac

// Simple char class
[abc]+
ababab
true ababab 0

[abc]+
defg
false 0

[abc]+[def]+[ghi]+
zzzaaddggzzz
true aaddgg 0

// Range char class
[a-g]+
zzzggg
true ggg 0

[a-g]+
mmm
false 0

[a-]+
za-9z
true a- 0

[a-\\u4444]+
za-9z
true za 0

// Negated char class
[^abc]+
ababab
false 0

[^abc]+
aaabbbcccdefg
true defg 0

// Making sure a ^ not in first position matches literal ^
[abc^b]
b
true b 0

[abc^b]
^
true ^ 0

// Class union and intersection
[abc[def]]
b
true b 0

[abc[def]]
e
true e 0

[a-d[0-9][m-p]]
a
true a 0

[a-d[0-9][m-p]]
o
true o 0

[a-d[0-9][m-p]]
4
true 4 0

[a-d[0-9][m-p]]
e
false 0

[a-d[0-9][m-p]]
u
false 0

[[a-d][0-9][m-p]]
b
true b 0

[[a-d][0-9][m-p]]
z
false 0

[a-c[d-f[g-i]]]
a
true a 0

[a-c[d-f[g-i]]]
e
true e 0

[a-c[d-f[g-i]]]
h
true h 0

[a-c[d-f[g-i]]]
m
false 0

[a-c[d-f[g-i]]m]
m
true m 0

[abc[def]ghi]
a
true a 0

[abc[def]ghi]
d
true d 0

[abc[def]ghi]
h
true h 0

[abc[def]ghi]
w
false 0

[a-c&&[d-f]]
a
false 0

[a-c&&[d-f]]
e
false 0

[a-c&&[d-f]]
z
false 0

[[a-c]&&[d-f]]
a
false 0

[[a-c]&&[d-f]]
e
false 0

[[a-c]&&[d-f]]
z
false 0

[a-c&&d-f]
a
false 0

[a-m&&m-z]
m
true m 0

[a-m&&m-z&&a-c]
m
false 0

[a-m&&m-z&&a-z]
m
true m 0

[[a-m]&&[m-z]]
a
false 0

[[a-m]&&[m-z]]
m
true m 0

[[a-m]&&[m-z]]
z
false 0

[[a-m]&&[^a-c]]
a
false 0

[[a-m]&&[^a-c]]
d
true d 0

[a-m&&[^a-c]]
a
false 0

[a-m&&[^a-c]]
d
true d 0

[a-cd-f&&[d-f]]
a
false 0

[a-cd-f&&[d-f]]
e
true e 0

[[a-c]&&d-fa-c]
a
true a 0

[[a-c]&&[d-f][a-c]]
a
true a 0

[[a-c][d-f]&&abc]
a
true a 0

[[a-c][d-f]&&abc[def]]
e
true e 0

[[a-c]&&[b-d]&&[c-e]]
a
false 0

[[a-c]&&[b-d]&&[c-e]]
c
true c 0

[[a-c]&&[b-d][c-e]&&[u-z]]
c
false 0

[abc[^bcd]]
a
true a 0

[abc[^bcd]]
d
false 0

[a-c&&a-d&&a-eghi]
b
true b 0

[a-c&&a-d&&a-eghi]
g
false 0

[[a[b]]&&[b[a]]]
a
true a 0

[[a]&&[b][c][a]&&[^d]]
a
true a 0

[[a]&&[b][c][a]&&[^d]]
d
false 0

[[[a-d]&&[c-f]]]
a
false 0

[[[a-d]&&[c-f]]]
c
true c 0

[[[a-d]&&[c-f]]&&[c]]
c
true c 0

[[[a-d]&&[c-f]]&&[c]&&c]
c
true c 0

[[[a-d]&&[c-f]]&&[c]&&c&&c]
c
true c 0

[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]
c
true c 0

[z[abc&&bcd]]
c
true c 0

[z[abc&&bcd]&&[u-z]]
z
true z 0

[x[abc&&bcd[z]]&&[u-z]]
z
false 0

[x[[wz]abc&&bcd[z]]&&[u-z]]
z
true z 0

[[abc]&&[def]abc]
a
true a 0

[[abc]&&[def]xyz[abc]]
a
true a 0

\pL
a
true a 0

\pL
7
false 0

\p{L}
a
true a 0

\p{LC}
a
true a 0

\p{LC}
A
true A 0

\p{IsL}
a
true a 0

\p{IsLC}
a
true a 0

\p{IsLC}
A
true A 0

\p{IsLC}
9
false 0

\P{IsLC}
9
true 9 0

// Guillemet left is initial quote punctuation
\p{Pi}
\u00ab
true \u00ab 0

\P{Pi}
\u00ac
true \u00ac 0

// Guillemet right is final quote punctuation
\p{IsPf}
\u00bb
true \u00bb 0

\p{P}
\u00bb
true \u00bb 0

\p{P}+
\u00bb
true \u00bb 0

\P{IsPf}
\u00bc
true \u00bc 0

\P{IsP}
\u00bc
true \u00bc 0

\p{L1}
\u00bc
true \u00bc 0

\p{L1}+
\u00bc
true \u00bc 0

\p{L1}
\u02bc
false 0

\p{ASCII}
a
true a 0

\p{IsASCII}
a
true a 0

\p{IsASCII}
\u0370
false 0

\pLbc
abc
true abc 0

a[r\p{InGreek}]c
a\u0370c
true a\u0370c 0

a\p{InGreek}
a\u0370
true a\u0370 0

a\P{InGreek}
a\u0370
false 0

a\P{InGreek}
ab
true ab 0

a{^InGreek}
-
error

a\p{^InGreek}
-
error

a\P{^InGreek}
-
error

a\p{InGreek}
a\u0370
true a\u0370 0

a[\p{InGreek}]c
a\u0370c
true a\u0370c 0

a[\P{InGreek}]c
a\u0370c
false 0

a[\P{InGreek}]c
abc
true abc 0

a[{^InGreek}]c
anc
true anc 0

a[{^InGreek}]c
azc
false 0

a[\p{^InGreek}]c
-
error

a[\P{^InGreek}]c
-
error

a[\p{InGreek}]
a\u0370
true a\u0370 0

a[r\p{InGreek}]c
arc
true arc 0

a[\p{InGreek}r]c
arc
true arc 0

a[r\p{InGreek}]c
arc
true arc 0

a[^\p{InGreek}]c
a\u0370c
false 0

a[^\P{InGreek}]c
a\u0370c
true a\u0370c 0

a[\p{InGreek}&&[^\u0370]]c
a\u0370c
false 0

// Test the dot metacharacter
a.c.+
a#c%&
true a#c%& 0

ab.
ab\n
false 0

(?s)ab.
ab\n
true ab\n 0

a[\p{L}&&[\P{InGreek}]]c
a\u6000c
true a\u6000c 0

a[\p{L}&&[\P{InGreek}]]c
arc
true arc 0

a[\p{L}&&[\P{InGreek}]]c
a\u0370c
false 0

a\p{InGreek}c
a\u0370c
true a\u0370c 0

a\p{Sc}
a$
true a$ 0

// Test the word char escape sequence
ab\wc
abcc
true abcc 0

\W\w\W
#r#
true #r# 0

\W\w\W
rrrr#ggg
false 0

abc[\w]
abcd
true abcd 0

abc[\sdef]*
abc  def
true abc  def 0

abc[\sy-z]*
abc y z
true abc y z 0

abc[a-d\sm-p]*
abcaa mn  p
true abcaa mn  p 0

// Test the whitespace escape sequence
ab\sc
ab c
true ab c 0

\s\s\s
blah  err
false 0

\S\S\s
blah  err
true ah  0

// Test the digit escape sequence
ab\dc
ab9c
true ab9c 0

\d\d\d
blah45
false 0

// Test the caret metacharacter
^abc
abcdef
true abc 0

^abc
bcdabc
false 0

// Greedy ? metacharacter
a?b
aaaab
true ab 0

a?b
b
true b 0

a?b
aaaccc
false 0

.?b
aaaab
true ab 0

// Reluctant ? metacharacter
a??b
aaaab
true ab 0

a??b
b
true b 0

a??b
aaaccc
false 0

.??b
aaaab
true ab 0

// Possessive ? metacharacter
a?+b
aaaab
true ab 0

a?+b
b
true b 0

a?+b
aaaccc
false 0

.?+b
aaaab
true ab 0

// Greedy + metacharacter
a+b
aaaab
true aaaab 0

a+b
b
false 0

a+b
aaaccc
false 0

.+b
aaaab
true aaaab 0

// Reluctant + metacharacter
a+?b
aaaab
true aaaab 0

a+?b
b
false 0

a+?b
aaaccc
false 0

.+?b
aaaab
true aaaab 0

// Possessive + metacharacter
a++b
aaaab
true aaaab 0

a++b
b
false 0

a++b
aaaccc
false 0

.++b
aaaab
false 0

// Greedy Repetition
a{2,3}
a
false 0

a{2,3}
aa
true aa 0

a{2,3}
aaa
true aaa 0

a{2,3}
aaaa
true aaa 0

a{3,}
zzzaaaazzz
true aaaa 0

a{3,}
zzzaazzz
false 0

// Reluctant Repetition
a{2,3}?
a
false 0

a{2,3}?
aa
true aa 0

a{2,3}?
aaa
true aa 0

a{2,3}?
aaaa
true aa 0

// Zero width Positive lookahead
abc(?=d)
zzzabcd
true abc 0

abc(?=d)
zzzabced
false 0

// Zero width Negative lookahead
abc(?!d)
zzabcd
false 0

abc(?!d)
zzabced
true abc 0

// Zero width Positive lookbehind
\w(?<=a)
###abc###
true a 0

\w(?<=a)
###ert###
false 0

// Zero width Negative lookbehind
(?<!a)\w
###abc###
true a 0

(?<!a)c
bc
true c 0

(?<!a)c
ac
false 0

// Nondeterministic group
(a+b)+
ababab
true ababab 1 ab

(a|b)+
ccccd
false 1

// Deterministic group
(ab)+
ababab
true ababab 1 ab

(ab)+
accccd
false 1

(ab)*
ababab
true ababab 1 ab

(ab)(cd*)
zzzabczzz
true abc 2 ab c

abc(d)*abc
abcdddddabc
true abcdddddabc 1 d

// Escaped metacharacter
\*
*
true * 0

\\
\
true \ 0

\\
\\\\
true \ 0

// Back references
(a*)bc\1
zzzaabcaazzz
true aabcaa 1 aa

(a*)bc\1
zzzaabcazzz
true abca 1 a

(gt*)(dde)*(yu)\1\3(vv)
zzzgttddeddeyugttyuvvzzz
true gttddeddeyugttyuvv 4 gtt dde yu vv

// Greedy * metacharacter
a*b
aaaab
true aaaab 0

a*b
b
true b 0

a*b
aaaccc
false 0

.*b
aaaab
true aaaab 0

// Reluctant * metacharacter
a*?b
aaaab
true aaaab 0

a*?b
b
true b 0

a*?b
aaaccc
false 0

.*?b
aaaab
true aaaab 0

// Possessive * metacharacter
a*+b
aaaab
true aaaab 0

a*+b
b
true b 0

a*+b
aaaccc
false 0

.*+b
aaaab
false 0

// Case insensitivity
(?i)foobar
fOobAr
true fOobAr 0

f(?i)oobar
fOobAr
true fOobAr 0

foo(?i)bar
fOobAr
false 0

(?i)foo[bar]+
foObAr
true foObAr 0

(?i)foo[a-r]+
foObAr
true foObAr 0

// Disable metacharacters- test both length <=3 and >3
// So that the BM optimization is part of test
\Q***\Eabc
***abc
true ***abc 0

bl\Q***\Eabc
bl***abc
true bl***abc 0

\Q***abc
***abc
true ***abc 0

blah\Q***\Eabc
blah***abc
true blah***abc 0

\Q***abc
***abc
true ***abc 0

\Q*ab
*ab
true *ab 0

blah\Q***abc
blah***abc
true blah***abc 0

bla\Q***abc
bla***abc
true bla***abc 0

// Escapes in char classes
[ab\Qdef\E]
d
true d 0

[ab\Q[\E]
[
true [ 0

[\Q]\E]
]
true ] 0

[\Q\\E]
\
true \ 0

[\Q(\E]
(
true ( 0

[\n-#]
!
true ! 0

[\n-#]
-
false 0

[\w-#]
!
false 0

[\w-#]
a
true a 0

[\w-#]
-
true - 0

[\w-#]
#
true # 0

[\043]+
blahblah#blech
true # 0

[\042-\044]+
blahblah#blech
true # 0

[\u1234-\u1236]
blahblah\u1235blech
true \u1235 0

[^\043]*
blahblah#blech
true blahblah 0

(|f)?+
foo
true  1