jdk/test/java/util/regex/RegExTest.java
changeset 35783 2690535d72cc
parent 34436 33c20335507c
child 37882 e7f3cf12e739
equal deleted inserted replaced
35782:cce69c0777dc 35783:2690535d72cc
    30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
    30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
    31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
    31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
    32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
    32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
    33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
    33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
    34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
    34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
    35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854
    35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
    36  * @library /lib/testlibrary
    36  * @library /lib/testlibrary
    37  * @build jdk.testlibrary.*
    37  * @build jdk.testlibrary.*
    38  * @run main RegExTest
    38  * @run main RegExTest
    39  * @key randomness
    39  * @key randomness
    40  */
    40  */
    41 
    41 
    42 import java.util.function.Function;
    42 import java.util.function.Function;
    43 import java.util.regex.*;
    43 import java.util.regex.*;
    44 import java.util.Random;
    44 import java.util.Random;
       
    45 import java.util.Scanner;
    45 import java.io.*;
    46 import java.io.*;
       
    47 import java.nio.file.*;
    46 import java.util.*;
    48 import java.util.*;
    47 import java.nio.CharBuffer;
    49 import java.nio.CharBuffer;
    48 import java.util.function.Predicate;
    50 import java.util.function.Predicate;
    49 import jdk.testlibrary.RandomFactory;
    51 import jdk.testlibrary.RandomFactory;
    50 
    52 
   149         namedGroupCaptureTest();
   151         namedGroupCaptureTest();
   150         nonBmpClassComplementTest();
   152         nonBmpClassComplementTest();
   151         unicodePropertiesTest();
   153         unicodePropertiesTest();
   152         unicodeHexNotationTest();
   154         unicodeHexNotationTest();
   153         unicodeClassesTest();
   155         unicodeClassesTest();
       
   156         unicodeCharacterNameTest();
   154         horizontalAndVerticalWSTest();
   157         horizontalAndVerticalWSTest();
   155         linebreakTest();
   158         linebreakTest();
   156         branchTest();
   159         branchTest();
   157         groupCurlyNotFoundSuppTest();
   160         groupCurlyNotFoundSuppTest();
   158         groupCurlyBackoffTest();
   161         groupCurlyBackoffTest();
   159         patternAsPredicate();
   162         patternAsPredicate();
   160         invalidFlags();
   163         invalidFlags();
       
   164         grapheme();
   161 
   165 
   162         if (failure) {
   166         if (failure) {
   163             throw new
   167             throw new
   164                 RuntimeException("RegExTest failed, 1st failure: " +
   168                 RuntimeException("RegExTest failed, 1st failure: " +
   165                                  firstFailure);
   169                                  firstFailure);
  4370         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
  4374         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
  4371             failCount++;
  4375             failCount++;
  4372         report("unicodePredefinedClasses");
  4376         report("unicodePredefinedClasses");
  4373     }
  4377     }
  4374 
  4378 
       
  4379     private static void unicodeCharacterNameTest() throws Exception {
       
  4380 
       
  4381         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
       
  4382             if (!Character.isValidCodePoint(cp) ||
       
  4383                 Character.getType(cp) == Character.UNASSIGNED)
       
  4384                 continue;
       
  4385             String str = new String(Character.toChars(cp));
       
  4386             // single
       
  4387             String p = "\\N{" + Character.getName(cp) + "}";
       
  4388             if (!Pattern.compile(p).matcher(str).matches()) {
       
  4389                 failCount++;
       
  4390             }
       
  4391             // class[c]
       
  4392             p = "[\\N{" + Character.getName(cp) + "}]";
       
  4393             if (!Pattern.compile(p).matcher(str).matches()) {
       
  4394                 failCount++;
       
  4395             }
       
  4396         }
       
  4397 
       
  4398         // range
       
  4399         for (int i = 0; i < 10; i++) {
       
  4400             int start = generator.nextInt(20);
       
  4401             int end = start + generator.nextInt(200);
       
  4402             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
       
  4403             String str;
       
  4404             for (int cp = start; cp < end; cp++) {
       
  4405                 str = new String(Character.toChars(cp));
       
  4406                 if (!Pattern.compile(p).matcher(str).matches()) {
       
  4407                     failCount++;
       
  4408                 }
       
  4409             }
       
  4410             str = new String(Character.toChars(end + 10));
       
  4411             if (Pattern.compile(p).matcher(str).matches()) {
       
  4412                 failCount++;
       
  4413             }
       
  4414         }
       
  4415 
       
  4416         // slice
       
  4417         for (int i = 0; i < 10; i++) {
       
  4418             int n = generator.nextInt(256);
       
  4419             int[] buf = new int[n];
       
  4420             StringBuffer sb = new StringBuffer(1024);
       
  4421             for (int j = 0; j < n; j++) {
       
  4422                 int cp = generator.nextInt(1000);
       
  4423                 if (!Character.isValidCodePoint(cp) ||
       
  4424                     Character.getType(cp) == Character.UNASSIGNED)
       
  4425                     cp = 0x4e00;    // just use 4e00
       
  4426                 sb.append("\\N{" + Character.getName(cp) + "}");
       
  4427                 buf[j] = cp;
       
  4428             }
       
  4429             String p = sb.toString();
       
  4430             String str = new String(buf, 0, buf.length);
       
  4431             if (!Pattern.compile(p).matcher(str).matches()) {
       
  4432                 failCount++;
       
  4433             }
       
  4434         }
       
  4435         report("unicodeCharacterName");
       
  4436     }
       
  4437 
  4375     private static void horizontalAndVerticalWSTest() throws Exception {
  4438     private static void horizontalAndVerticalWSTest() throws Exception {
  4376         String hws = new String (new char[] {
  4439         String hws = new String (new char[] {
  4377                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
  4440                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
  4378                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
  4441                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
  4379                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
  4442                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
  4543                 }
  4606                 }
  4544             }
  4607             }
  4545         }
  4608         }
  4546         report("Invalid compile flags");
  4609         report("Invalid compile flags");
  4547     }
  4610     }
       
  4611 
       
  4612     private static void grapheme() throws Exception {
       
  4613         Files.lines(Paths.get(System.getProperty("test.src", "."),
       
  4614                               "GraphemeBreakTest.txt"))
       
  4615             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
       
  4616             .forEach( ln -> {
       
  4617                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
       
  4618                     // System.out.println(str);
       
  4619                     String[] strs = ln.split("\u00f7|\u00d7");
       
  4620                     StringBuilder src = new StringBuilder();
       
  4621                     ArrayList<String> graphemes = new ArrayList<>();
       
  4622                     StringBuilder buf = new StringBuilder();
       
  4623                     int offBk = 0;
       
  4624                     for (String str : strs) {
       
  4625                         if (str.length() == 0)  // first empty str
       
  4626                             continue;
       
  4627                         int cp = Integer.parseInt(str, 16);
       
  4628                         src.appendCodePoint(cp);
       
  4629                         buf.appendCodePoint(cp);
       
  4630                         offBk += (str.length() + 1);
       
  4631                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
       
  4632                             graphemes.add(buf.toString());
       
  4633                             buf = new StringBuilder();
       
  4634                         }
       
  4635                     }
       
  4636                     Pattern p = Pattern.compile("\\X");
       
  4637                     Matcher m = p.matcher(src.toString());
       
  4638                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
       
  4639                     for (String g : graphemes) {
       
  4640                         // System.out.printf("     grapheme:=[%s]%n", g);
       
  4641                         // (1) test \\X directly
       
  4642                         if (!m.find() || !m.group().equals(g)) {
       
  4643                             System.out.println("Failed \\X [" + ln + "] : " + g);
       
  4644                             failCount++;
       
  4645                         }
       
  4646                         // (2) test \\b{g} + \\X  via Scanner
       
  4647                         boolean hasNext = s.hasNext(p);
       
  4648                         // if (!s.hasNext() || !s.next().equals(next)) {
       
  4649                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
       
  4650                             System.out.println("Failed b{g} [" + ln + "] : " + g);
       
  4651                             failCount++;
       
  4652                         }
       
  4653                     }
       
  4654                 });
       
  4655         // some sanity checks
       
  4656         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
       
  4657             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
       
  4658             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
       
  4659             failCount++;
       
  4660         // make sure "\b{n}" still works
       
  4661         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
       
  4662             failCount++;
       
  4663         report("Unicode extended grapheme cluster");
       
  4664     }
  4548 }
  4665 }