30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 |
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 |
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 |
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 |
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 |
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 |
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 |
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 |
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 |
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 |
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 |
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 |
36 * @library /lib/testlibrary |
36 * @library /lib/testlibrary |
37 * @build jdk.testlibrary.* |
37 * @build jdk.testlibrary.* |
38 * @run main RegExTest |
38 * @run main RegExTest |
39 * @key randomness |
39 * @key randomness |
40 */ |
40 */ |
41 |
41 |
42 import java.util.function.Function; |
42 import java.util.function.Function; |
43 import java.util.regex.*; |
43 import java.util.regex.*; |
44 import java.util.Random; |
44 import java.util.Random; |
|
45 import java.util.Scanner; |
45 import java.io.*; |
46 import java.io.*; |
|
47 import java.nio.file.*; |
46 import java.util.*; |
48 import java.util.*; |
47 import java.nio.CharBuffer; |
49 import java.nio.CharBuffer; |
48 import java.util.function.Predicate; |
50 import java.util.function.Predicate; |
49 import jdk.testlibrary.RandomFactory; |
51 import jdk.testlibrary.RandomFactory; |
50 |
52 |
149 namedGroupCaptureTest(); |
151 namedGroupCaptureTest(); |
150 nonBmpClassComplementTest(); |
152 nonBmpClassComplementTest(); |
151 unicodePropertiesTest(); |
153 unicodePropertiesTest(); |
152 unicodeHexNotationTest(); |
154 unicodeHexNotationTest(); |
153 unicodeClassesTest(); |
155 unicodeClassesTest(); |
|
156 unicodeCharacterNameTest(); |
154 horizontalAndVerticalWSTest(); |
157 horizontalAndVerticalWSTest(); |
155 linebreakTest(); |
158 linebreakTest(); |
156 branchTest(); |
159 branchTest(); |
157 groupCurlyNotFoundSuppTest(); |
160 groupCurlyNotFoundSuppTest(); |
158 groupCurlyBackoffTest(); |
161 groupCurlyBackoffTest(); |
159 patternAsPredicate(); |
162 patternAsPredicate(); |
160 invalidFlags(); |
163 invalidFlags(); |
|
164 grapheme(); |
161 |
165 |
162 if (failure) { |
166 if (failure) { |
163 throw new |
167 throw new |
164 RuntimeException("RegExTest failed, 1st failure: " + |
168 RuntimeException("RegExTest failed, 1st failure: " + |
165 firstFailure); |
169 firstFailure); |
4370 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) |
4374 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) |
4371 failCount++; |
4375 failCount++; |
4372 report("unicodePredefinedClasses"); |
4376 report("unicodePredefinedClasses"); |
4373 } |
4377 } |
4374 |
4378 |
|
4379 private static void unicodeCharacterNameTest() throws Exception { |
|
4380 |
|
4381 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { |
|
4382 if (!Character.isValidCodePoint(cp) || |
|
4383 Character.getType(cp) == Character.UNASSIGNED) |
|
4384 continue; |
|
4385 String str = new String(Character.toChars(cp)); |
|
4386 // single |
|
4387 String p = "\\N{" + Character.getName(cp) + "}"; |
|
4388 if (!Pattern.compile(p).matcher(str).matches()) { |
|
4389 failCount++; |
|
4390 } |
|
4391 // class[c] |
|
4392 p = "[\\N{" + Character.getName(cp) + "}]"; |
|
4393 if (!Pattern.compile(p).matcher(str).matches()) { |
|
4394 failCount++; |
|
4395 } |
|
4396 } |
|
4397 |
|
4398 // range |
|
4399 for (int i = 0; i < 10; i++) { |
|
4400 int start = generator.nextInt(20); |
|
4401 int end = start + generator.nextInt(200); |
|
4402 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; |
|
4403 String str; |
|
4404 for (int cp = start; cp < end; cp++) { |
|
4405 str = new String(Character.toChars(cp)); |
|
4406 if (!Pattern.compile(p).matcher(str).matches()) { |
|
4407 failCount++; |
|
4408 } |
|
4409 } |
|
4410 str = new String(Character.toChars(end + 10)); |
|
4411 if (Pattern.compile(p).matcher(str).matches()) { |
|
4412 failCount++; |
|
4413 } |
|
4414 } |
|
4415 |
|
4416 // slice |
|
4417 for (int i = 0; i < 10; i++) { |
|
4418 int n = generator.nextInt(256); |
|
4419 int[] buf = new int[n]; |
|
4420 StringBuffer sb = new StringBuffer(1024); |
|
4421 for (int j = 0; j < n; j++) { |
|
4422 int cp = generator.nextInt(1000); |
|
4423 if (!Character.isValidCodePoint(cp) || |
|
4424 Character.getType(cp) == Character.UNASSIGNED) |
|
4425 cp = 0x4e00; // just use 4e00 |
|
4426 sb.append("\\N{" + Character.getName(cp) + "}"); |
|
4427 buf[j] = cp; |
|
4428 } |
|
4429 String p = sb.toString(); |
|
4430 String str = new String(buf, 0, buf.length); |
|
4431 if (!Pattern.compile(p).matcher(str).matches()) { |
|
4432 failCount++; |
|
4433 } |
|
4434 } |
|
4435 report("unicodeCharacterName"); |
|
4436 } |
|
4437 |
4375 private static void horizontalAndVerticalWSTest() throws Exception { |
4438 private static void horizontalAndVerticalWSTest() throws Exception { |
4376 String hws = new String (new char[] { |
4439 String hws = new String (new char[] { |
4377 0x09, 0x20, 0xa0, 0x1680, 0x180e, |
4440 0x09, 0x20, 0xa0, 0x1680, 0x180e, |
4378 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, |
4441 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, |
4379 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, |
4442 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, |
4543 } |
4606 } |
4544 } |
4607 } |
4545 } |
4608 } |
4546 report("Invalid compile flags"); |
4609 report("Invalid compile flags"); |
4547 } |
4610 } |
|
4611 |
|
4612 private static void grapheme() throws Exception { |
|
4613 Files.lines(Paths.get(System.getProperty("test.src", "."), |
|
4614 "GraphemeBreakTest.txt")) |
|
4615 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) |
|
4616 .forEach( ln -> { |
|
4617 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); |
|
4618 // System.out.println(str); |
|
4619 String[] strs = ln.split("\u00f7|\u00d7"); |
|
4620 StringBuilder src = new StringBuilder(); |
|
4621 ArrayList<String> graphemes = new ArrayList<>(); |
|
4622 StringBuilder buf = new StringBuilder(); |
|
4623 int offBk = 0; |
|
4624 for (String str : strs) { |
|
4625 if (str.length() == 0) // first empty str |
|
4626 continue; |
|
4627 int cp = Integer.parseInt(str, 16); |
|
4628 src.appendCodePoint(cp); |
|
4629 buf.appendCodePoint(cp); |
|
4630 offBk += (str.length() + 1); |
|
4631 if (ln.charAt(offBk) == '\u00f7') { // DIV |
|
4632 graphemes.add(buf.toString()); |
|
4633 buf = new StringBuilder(); |
|
4634 } |
|
4635 } |
|
4636 Pattern p = Pattern.compile("\\X"); |
|
4637 Matcher m = p.matcher(src.toString()); |
|
4638 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); |
|
4639 for (String g : graphemes) { |
|
4640 // System.out.printf(" grapheme:=[%s]%n", g); |
|
4641 // (1) test \\X directly |
|
4642 if (!m.find() || !m.group().equals(g)) { |
|
4643 System.out.println("Failed \\X [" + ln + "] : " + g); |
|
4644 failCount++; |
|
4645 } |
|
4646 // (2) test \\b{g} + \\X via Scanner |
|
4647 boolean hasNext = s.hasNext(p); |
|
4648 // if (!s.hasNext() || !s.next().equals(next)) { |
|
4649 if (!s.hasNext(p) || !s.next(p).equals(g)) { |
|
4650 System.out.println("Failed b{g} [" + ln + "] : " + g); |
|
4651 failCount++; |
|
4652 } |
|
4653 } |
|
4654 }); |
|
4655 // some sanity checks |
|
4656 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || |
|
4657 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || |
|
4658 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) |
|
4659 failCount++; |
|
4660 // make sure "\b{n}" still works |
|
4661 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) |
|
4662 failCount++; |
|
4663 report("Unicode extended grapheme cluster"); |
|
4664 } |
4548 } |
4665 } |