jdk/test/java/util/regex/RegExTest.java
changeset 9536 648c9add2a74
parent 9035 1255eb81cc2f
child 11287 3db172a5433c
equal deleted inserted replaced
9535:d930011fd275 9536:648c9add2a74
    30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
    30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
    31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
    31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
    32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
    32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
    33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
    33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
    34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
    34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
    35  * 6350801 6676425 6878475 6919132 6931676 6948903 7014645
    35  * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066
    36  */
    36  */
    37 
    37 
    38 import java.util.regex.*;
    38 import java.util.regex.*;
    39 import java.util.Random;
    39 import java.util.Random;
    40 import java.io.*;
    40 import java.io.*;
   135         surrogatesInClassTest();
   135         surrogatesInClassTest();
   136         namedGroupCaptureTest();
   136         namedGroupCaptureTest();
   137         nonBmpClassComplementTest();
   137         nonBmpClassComplementTest();
   138         unicodePropertiesTest();
   138         unicodePropertiesTest();
   139         unicodeHexNotationTest();
   139         unicodeHexNotationTest();
       
   140         unicodeClassesTest();
   140         if (failure)
   141         if (failure)
   141             throw new RuntimeException("Failure in the RE handling.");
   142             throw new RuntimeException("Failure in the RE handling.");
   142         else
   143         else
   143             System.err.println("OKAY: All tests passed.");
   144             System.err.println("OKAY: All tests passed.");
   144     }
   145     }
  3654                  failCount++;
  3655                  failCount++;
  3655              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
  3656              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
  3656                  failCount++;
  3657                  failCount++;
  3657          }
  3658          }
  3658          report("unicodeHexNotation");
  3659          report("unicodeHexNotation");
  3659      }
  3660     }
       
  3661 
       
  3662     private static void unicodeClassesTest() throws Exception {
       
  3663 
       
  3664         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
       
  3665         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
       
  3666         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
       
  3667         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
       
  3668         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
       
  3669         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
       
  3670         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
       
  3671         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
       
  3672         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
       
  3673         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
       
  3674         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
       
  3675         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
       
  3676         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
       
  3677         Matcher bound  = Pattern.compile("\\b").matcher("");
       
  3678         Matcher word   = Pattern.compile("\\w++").matcher("");
       
  3679         // UNICODE_CHARACTER_CLASS
       
  3680         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3681         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3682         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3683         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3684         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3685         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3686         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3687         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3688         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3689         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3690         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3691         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3692         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3693         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3694         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3695         // embedded flag (?U)
       
  3696         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3697         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3698         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3699 
       
  3700         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
       
  3701         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3702         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
       
  3703         // properties
       
  3704         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
       
  3705         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
       
  3706         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
       
  3707         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
       
  3708         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
       
  3709         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
       
  3710         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
       
  3711         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
       
  3712         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
       
  3713         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
       
  3714 
       
  3715         // javaMethod
       
  3716         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
       
  3717         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
       
  3718         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
       
  3719         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
       
  3720 
       
  3721         for (int cp = 1; cp < 0x30000; cp++) {
       
  3722             String str = new String(Character.toChars(cp));
       
  3723             int type = Character.getType(cp);
       
  3724             if (// lower
       
  3725                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
       
  3726                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
       
  3727                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
       
  3728                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
       
  3729                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
       
  3730                 // upper
       
  3731                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
       
  3732                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
       
  3733                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
       
  3734                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
       
  3735                 // alpha
       
  3736                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
       
  3737                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
       
  3738                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
       
  3739                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
       
  3740                 // digit
       
  3741                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
       
  3742                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
       
  3743                 // alnum
       
  3744                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
       
  3745                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
       
  3746                 // punct
       
  3747                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
       
  3748                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
       
  3749                 // graph
       
  3750                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
       
  3751                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
       
  3752                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
       
  3753                 // blank
       
  3754                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
       
  3755                                           != blank.reset(str).matches()  ||
       
  3756                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
       
  3757                 // print
       
  3758                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
       
  3759                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
       
  3760                 // cntrl
       
  3761                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
       
  3762                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
       
  3763                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
       
  3764                 // hexdigit
       
  3765                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
       
  3766                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
       
  3767                 // space
       
  3768                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
       
  3769                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
       
  3770                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
       
  3771                 // word
       
  3772                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
       
  3773                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
       
  3774                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
       
  3775                 // bwordb
       
  3776                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
       
  3777                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
       
  3778                 // properties
       
  3779                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
       
  3780                 Character.isLetter(cp)    != letterP.reset(str).matches()||
       
  3781                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
       
  3782                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
       
  3783                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
       
  3784                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
       
  3785                 failCount++;
       
  3786         }
       
  3787 
       
  3788         // bounds/word align
       
  3789         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
       
  3790         if (!bwbU.reset("\u0180sherman\u0400").matches())
       
  3791             failCount++;
       
  3792         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
       
  3793         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
       
  3794             failCount++;
       
  3795         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
       
  3796         if (!bwbU.reset("\u0724\u0739\u0724").matches())
       
  3797             failCount++;
       
  3798         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
       
  3799             failCount++;
       
  3800         report("unicodePredefinedClasses");
       
  3801     }
  3660 }
  3802 }