src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/xpath/regex/Token.java
changeset 50186 5ec7380f671d
parent 47216 71c04702a3d5
equal deleted inserted replaced
50185:97be261ebcdd 50186:5ec7380f671d
     1 /*
     1 /*
     2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
     2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
     3  */
     3  */
     4 /*
     4 /*
     5  * Licensed to the Apache Software Foundation (ASF) under one or more
     5  * Licensed to the Apache Software Foundation (ASF) under one or more
     6  * contributor license agreements.  See the NOTICE file distributed with
     6  * contributor license agreements.  See the NOTICE file distributed with
     7  * this work for additional information regarding copyright ownership.
     7  * this work for additional information regarding copyright ownership.
    35 
    35 
    36 /**
    36 /**
    37  * This class represents a node in parse tree.
    37  * This class represents a node in parse tree.
    38  *
    38  *
    39  * @xerces.internal
    39  * @xerces.internal
    40  *
    40  * @LastModified: May 2018
    41  */
    41  */
    42 class Token implements java.io.Serializable {
    42 class Token implements java.io.Serializable {
    43 
    43 
    44     private static final long serialVersionUID = 8484976002585487481L;
    44     private static final long serialVersionUID = 8484976002585487481L;
    45 
    45 
   590     boolean match(int ch) {
   590     boolean match(int ch) {
   591         throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
   591         throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
   592     }
   592     }
   593 
   593 
   594     // ------------------------------------------------------
   594     // ------------------------------------------------------
   595     private final static Map<String, Token> categories = new HashMap<>();
   595     private static volatile Map<String, Token> categories = null;
   596     private final static Map<String, Token> categories2 = new HashMap<>();
   596     private static volatile Map<String, Token> categories2 = null;
       
   597     private static final Object lock = new Object();
   597     private static final String[] categoryNames = {
   598     private static final String[] categoryNames = {
   598         "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
   599         "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
   599         "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
   600         "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
   600         "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
   601         "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
   601         "Pi", "Pf",  // 29, 30
   602         "Pi", "Pf",  // 29, 30
   740         0xE0000, 0xE007F
   741         0xE0000, 0xE007F
   741     };
   742     };
   742     private static final int NONBMP_BLOCK_START = 84;
   743     private static final int NONBMP_BLOCK_START = 84;
   743 
   744 
   744     static protected RangeToken getRange(String name, boolean positive) {
   745     static protected RangeToken getRange(String name, boolean positive) {
   745         if (Token.categories.size() == 0) {
   746         // use local variable for better performance
   746             synchronized (Token.categories) {
   747         Map<String, Token> localCat = Token.categories;
   747                 Token[] ranges = new Token[Token.categoryNames.length];
   748         if (localCat == null) {
   748                 for (int i = 0;  i < ranges.length;  i ++) {
   749             synchronized (lock) {
   749                     ranges[i] = Token.createRange();
   750                 localCat = Token.categories;
   750                 }
   751                 if (localCat == null) {
   751                 int type;
   752                     Map<String, Token> tmpCat = new HashMap<>();
   752                 for (int i = 0;  i < 0x10000;  i ++) {
   753                     Map<String, Token> tmpCat2 = new HashMap<>();
   753                     type = Character.getType((char)i);
   754 
   754                     if (type == Character.START_PUNCTUATION ||
   755                     Token[] ranges = new Token[Token.categoryNames.length];
   755                         type == Character.END_PUNCTUATION) {
   756                     for (int i = 0;  i < ranges.length;  i ++) {
   756                         //build table of Pi values
   757                         ranges[i] = Token.createRange();
   757                         if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
   758                     }
   758                             i == 0x201F || i == 0x2039) {
   759                     int type;
   759                             type = CHAR_INIT_QUOTE;
   760                     for (int i = 0;  i < 0x10000;  i ++) {
       
   761                         type = Character.getType((char)i);
       
   762                         if (type == Character.START_PUNCTUATION ||
       
   763                             type == Character.END_PUNCTUATION) {
       
   764                             //build table of Pi values
       
   765                             if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
       
   766                                 i == 0x201F || i == 0x2039) {
       
   767                                 type = CHAR_INIT_QUOTE;
       
   768                             }
       
   769                             //build table of Pf values
       
   770                             if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
       
   771                                 type = CHAR_FINAL_QUOTE;
       
   772                             }
   760                         }
   773                         }
   761                         //build table of Pf values
   774                         ranges[type].addRange(i, i);
   762                         if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
   775                         switch (type) {
   763                             type = CHAR_FINAL_QUOTE;
   776                           case Character.UPPERCASE_LETTER:
       
   777                           case Character.LOWERCASE_LETTER:
       
   778                           case Character.TITLECASE_LETTER:
       
   779                           case Character.MODIFIER_LETTER:
       
   780                           case Character.OTHER_LETTER:
       
   781                             type = CHAR_LETTER;
       
   782                             break;
       
   783                           case Character.NON_SPACING_MARK:
       
   784                           case Character.COMBINING_SPACING_MARK:
       
   785                           case Character.ENCLOSING_MARK:
       
   786                             type = CHAR_MARK;
       
   787                             break;
       
   788                           case Character.DECIMAL_DIGIT_NUMBER:
       
   789                           case Character.LETTER_NUMBER:
       
   790                           case Character.OTHER_NUMBER:
       
   791                             type = CHAR_NUMBER;
       
   792                             break;
       
   793                           case Character.SPACE_SEPARATOR:
       
   794                           case Character.LINE_SEPARATOR:
       
   795                           case Character.PARAGRAPH_SEPARATOR:
       
   796                             type = CHAR_SEPARATOR;
       
   797                             break;
       
   798                           case Character.CONTROL:
       
   799                           case Character.FORMAT:
       
   800                           case Character.SURROGATE:
       
   801                           case Character.PRIVATE_USE:
       
   802                           case Character.UNASSIGNED:
       
   803                             type = CHAR_OTHER;
       
   804                             break;
       
   805                           case Character.CONNECTOR_PUNCTUATION:
       
   806                           case Character.DASH_PUNCTUATION:
       
   807                           case Character.START_PUNCTUATION:
       
   808                           case Character.END_PUNCTUATION:
       
   809                           case CHAR_INIT_QUOTE:
       
   810                           case CHAR_FINAL_QUOTE:
       
   811                           case Character.OTHER_PUNCTUATION:
       
   812                             type = CHAR_PUNCTUATION;
       
   813                             break;
       
   814                           case Character.MATH_SYMBOL:
       
   815                           case Character.CURRENCY_SYMBOL:
       
   816                           case Character.MODIFIER_SYMBOL:
       
   817                           case Character.OTHER_SYMBOL:
       
   818                             type = CHAR_SYMBOL;
       
   819                             break;
       
   820                           default:
       
   821                             throw new RuntimeException("org.apache.xerces.utils.regex.Token#getRange(): Unknown Unicode category: "+type);
       
   822                         }
       
   823                         ranges[type].addRange(i, i);
       
   824                     } // for all characters
       
   825                     ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
       
   826 
       
   827                     for (int i = 0;  i < ranges.length;  i ++) {
       
   828                         if (Token.categoryNames[i] != null) {
       
   829                             if (i == Character.UNASSIGNED) { // Unassigned
       
   830                                 ranges[i].addRange(0x10000, Token.UTF16_MAX);
       
   831                             }
       
   832                             tmpCat.put(Token.categoryNames[i], ranges[i]);
       
   833                             tmpCat2.put(Token.categoryNames[i],
       
   834                                                   Token.complementRanges(ranges[i]));
   764                         }
   835                         }
   765                     }
   836                     }
   766                     ranges[type].addRange(i, i);
   837                     //REVISIT: do we really need to support block names as in Unicode 3.1
   767                     switch (type) {
   838                     //         or we can just create all the names in IsBLOCKNAME format (XML Schema REC)?
   768                       case Character.UPPERCASE_LETTER:
   839                     //
   769                       case Character.LOWERCASE_LETTER:
   840                     StringBuilder buffer = new StringBuilder(50);
   770                       case Character.TITLECASE_LETTER:
   841                     for (int i = 0;  i < Token.blockNames.length;  i ++) {
   771                       case Character.MODIFIER_LETTER:
   842                         Token r1 = Token.createRange();
   772                       case Character.OTHER_LETTER:
   843                         int location;
   773                         type = CHAR_LETTER;
   844                         if (i < NONBMP_BLOCK_START) {
   774                         break;
   845                             location = i*2;
   775                       case Character.NON_SPACING_MARK:
   846                             int rstart = Token.blockRanges.charAt(location);
   776                       case Character.COMBINING_SPACING_MARK:
   847                             int rend = Token.blockRanges.charAt(location+1);
   777                       case Character.ENCLOSING_MARK:
   848                             //DEBUGING
   778                         type = CHAR_MARK;
   849                             //System.out.println(n+" " +Integer.toHexString(rstart)
   779                         break;
   850                             //                     +"-"+ Integer.toHexString(rend));
   780                       case Character.DECIMAL_DIGIT_NUMBER:
   851                             r1.addRange(rstart, rend);
   781                       case Character.LETTER_NUMBER:
   852                         } else {
   782                       case Character.OTHER_NUMBER:
   853                             location = (i - NONBMP_BLOCK_START) * 2;
   783                         type = CHAR_NUMBER;
   854                             r1.addRange(Token.nonBMPBlockRanges[location],
   784                         break;
   855                                         Token.nonBMPBlockRanges[location + 1]);
   785                       case Character.SPACE_SEPARATOR:
   856                         }
   786                       case Character.LINE_SEPARATOR:
   857                         String n = Token.blockNames[i];
   787                       case Character.PARAGRAPH_SEPARATOR:
   858                         if (n.equals("Specials"))
   788                         type = CHAR_SEPARATOR;
   859                             r1.addRange(0xfff0, 0xfffd);
   789                         break;
   860                         if (n.equals("Private Use")) {
   790                       case Character.CONTROL:
   861                             r1.addRange(0xF0000,0xFFFFD);
   791                       case Character.FORMAT:
   862                             r1.addRange(0x100000,0x10FFFD);
   792                       case Character.SURROGATE:
   863                         }
   793                       case Character.PRIVATE_USE:
   864                         tmpCat.put(n, r1);
   794                       case Character.UNASSIGNED:
   865                         tmpCat2.put(n, Token.complementRanges(r1));
   795                         type = CHAR_OTHER;
   866                         buffer.setLength(0);
   796                         break;
   867                         buffer.append("Is");
   797                       case Character.CONNECTOR_PUNCTUATION:
   868                         if (n.indexOf(' ') >= 0) {
   798                       case Character.DASH_PUNCTUATION:
   869                             for (int ci = 0;  ci < n.length();  ci ++)
   799                       case Character.START_PUNCTUATION:
   870                                 if (n.charAt(ci) != ' ')  buffer.append(n.charAt(ci));
   800                       case Character.END_PUNCTUATION:
   871                         }
   801                       case CHAR_INIT_QUOTE:
   872                         else {
   802                       case CHAR_FINAL_QUOTE:
   873                             buffer.append(n);
   803                       case Character.OTHER_PUNCTUATION:
   874                         }
   804                         type = CHAR_PUNCTUATION;
   875                         Token.setAlias(tmpCat, tmpCat2, buffer.toString(), n, true);
   805                         break;
       
   806                       case Character.MATH_SYMBOL:
       
   807                       case Character.CURRENCY_SYMBOL:
       
   808                       case Character.MODIFIER_SYMBOL:
       
   809                       case Character.OTHER_SYMBOL:
       
   810                         type = CHAR_SYMBOL;
       
   811                         break;
       
   812                       default:
       
   813                         throw new RuntimeException("org.apache.xerces.utils.regex.Token#getRange(): Unknown Unicode category: "+type);
       
   814                     }
   876                     }
   815                     ranges[type].addRange(i, i);
   877 
   816                 } // for all characters
   878                     // TR#18 1.2
   817                 ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
   879                     Token.setAlias(tmpCat, tmpCat2, "ASSIGNED", "Cn", false);
   818 
   880                     Token.setAlias(tmpCat, tmpCat2, "UNASSIGNED", "Cn", true);
   819                 for (int i = 0;  i < ranges.length;  i ++) {
   881                     Token all = Token.createRange();
   820                     if (Token.categoryNames[i] != null) {
   882                     all.addRange(0, Token.UTF16_MAX);
   821                         if (i == Character.UNASSIGNED) { // Unassigned
   883                     tmpCat.put("ALL", all);
   822                             ranges[i].addRange(0x10000, Token.UTF16_MAX);
   884                     tmpCat2.put("ALL", Token.complementRanges(all));
   823                         }
   885                     Token.registerNonXS("ASSIGNED");
   824                         Token.categories.put(Token.categoryNames[i], ranges[i]);
   886                     Token.registerNonXS("UNASSIGNED");
   825                         Token.categories2.put(Token.categoryNames[i],
   887                     Token.registerNonXS("ALL");
   826                                               Token.complementRanges(ranges[i]));
   888 
   827                     }
   889                     Token isalpha = Token.createRange();
   828                 }
   890                     isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]); // Lu
   829                 //REVISIT: do we really need to support block names as in Unicode 3.1
   891                     isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]); // Ll
   830                 //         or we can just create all the names in IsBLOCKNAME format (XML Schema REC)?
   892                     isalpha.mergeRanges(ranges[Character.OTHER_LETTER]); // Lo
   831                 //
   893                     tmpCat.put("IsAlpha", isalpha);
   832                 StringBuilder buffer = new StringBuilder(50);
   894                     tmpCat2.put("IsAlpha", Token.complementRanges(isalpha));
   833                 for (int i = 0;  i < Token.blockNames.length;  i ++) {
   895                     Token.registerNonXS("IsAlpha");
   834                     Token r1 = Token.createRange();
   896 
   835                     int location;
   897                     Token isalnum = Token.createRange();
   836                     if (i < NONBMP_BLOCK_START) {
   898                     isalnum.mergeRanges(isalpha);   // Lu Ll Lo
   837                         location = i*2;
   899                     isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]); // Nd
   838                         int rstart = Token.blockRanges.charAt(location);
   900                     tmpCat.put("IsAlnum", isalnum);
   839                         int rend = Token.blockRanges.charAt(location+1);
   901                     tmpCat2.put("IsAlnum", Token.complementRanges(isalnum));
   840                         //DEBUGING
   902                     Token.registerNonXS("IsAlnum");
   841                         //System.out.println(n+" " +Integer.toHexString(rstart)
   903 
   842                         //                     +"-"+ Integer.toHexString(rend));
   904                     Token isspace = Token.createRange();
   843                         r1.addRange(rstart, rend);
   905                     isspace.mergeRanges(Token.token_spaces);
   844                     } else {
   906                     isspace.mergeRanges(ranges[CHAR_SEPARATOR]); // Z
   845                         location = (i - NONBMP_BLOCK_START) * 2;
   907                     tmpCat.put("IsSpace", isspace);
   846                         r1.addRange(Token.nonBMPBlockRanges[location],
   908                     tmpCat2.put("IsSpace", Token.complementRanges(isspace));
   847                                     Token.nonBMPBlockRanges[location + 1]);
   909                     Token.registerNonXS("IsSpace");
   848                     }
   910 
   849                     String n = Token.blockNames[i];
   911                     Token isword = Token.createRange();
   850                     if (n.equals("Specials"))
   912                     isword.mergeRanges(isalnum);     // Lu Ll Lo Nd
   851                         r1.addRange(0xfff0, 0xfffd);
   913                     isword.addRange('_', '_');
   852                     if (n.equals("Private Use")) {
   914                     tmpCat.put("IsWord", isword);
   853                         r1.addRange(0xF0000,0xFFFFD);
   915                     tmpCat2.put("IsWord", Token.complementRanges(isword));
   854                         r1.addRange(0x100000,0x10FFFD);
   916                     Token.registerNonXS("IsWord");
   855                     }
   917 
   856                     Token.categories.put(n, r1);
   918                     Token isascii = Token.createRange();
   857                     Token.categories2.put(n, Token.complementRanges(r1));
   919                     isascii.addRange(0, 127);
   858                     buffer.setLength(0);
   920                     tmpCat.put("IsASCII", isascii);
   859                     buffer.append("Is");
   921                     tmpCat2.put("IsASCII", Token.complementRanges(isascii));
   860                     if (n.indexOf(' ') >= 0) {
   922                     Token.registerNonXS("IsASCII");
   861                         for (int ci = 0;  ci < n.length();  ci ++)
   923 
   862                             if (n.charAt(ci) != ' ')  buffer.append(n.charAt(ci));
   924                     Token isnotgraph = Token.createRange();
   863                     }
   925                     isnotgraph.mergeRanges(ranges[CHAR_OTHER]);
   864                     else {
   926                     isnotgraph.addRange(' ', ' ');
   865                         buffer.append(n);
   927                     tmpCat.put("IsGraph", Token.complementRanges(isnotgraph));
   866                     }
   928                     tmpCat2.put("IsGraph", isnotgraph);
   867                     Token.setAlias(buffer.toString(), n, true);
   929                     Token.registerNonXS("IsGraph");
   868                 }
   930 
   869 
   931                     Token isxdigit = Token.createRange();
   870                 // TR#18 1.2
   932                     isxdigit.addRange('0', '9');
   871                 Token.setAlias("ASSIGNED", "Cn", false);
   933                     isxdigit.addRange('A', 'F');
   872                 Token.setAlias("UNASSIGNED", "Cn", true);
   934                     isxdigit.addRange('a', 'f');
   873                 Token all = Token.createRange();
   935                     tmpCat.put("IsXDigit", Token.complementRanges(isxdigit));
   874                 all.addRange(0, Token.UTF16_MAX);
   936                     tmpCat2.put("IsXDigit", isxdigit);
   875                 Token.categories.put("ALL", all);
   937                     Token.registerNonXS("IsXDigit");
   876                 Token.categories2.put("ALL", Token.complementRanges(all));
   938 
   877                 Token.registerNonXS("ASSIGNED");
   939                     Token.setAlias(tmpCat, tmpCat2, "IsDigit", "Nd", true);
   878                 Token.registerNonXS("UNASSIGNED");
   940                     Token.setAlias(tmpCat, tmpCat2, "IsUpper", "Lu", true);
   879                 Token.registerNonXS("ALL");
   941                     Token.setAlias(tmpCat, tmpCat2, "IsLower", "Ll", true);
   880 
   942                     Token.setAlias(tmpCat, tmpCat2, "IsCntrl", "C", true);
   881                 Token isalpha = Token.createRange();
   943                     Token.setAlias(tmpCat, tmpCat2, "IsPrint", "C", false);
   882                 isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]); // Lu
   944                     Token.setAlias(tmpCat, tmpCat2, "IsPunct", "P", true);
   883                 isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]); // Ll
   945                     Token.registerNonXS("IsDigit");
   884                 isalpha.mergeRanges(ranges[Character.OTHER_LETTER]); // Lo
   946                     Token.registerNonXS("IsUpper");
   885                 Token.categories.put("IsAlpha", isalpha);
   947                     Token.registerNonXS("IsLower");
   886                 Token.categories2.put("IsAlpha", Token.complementRanges(isalpha));
   948                     Token.registerNonXS("IsCntrl");
   887                 Token.registerNonXS("IsAlpha");
   949                     Token.registerNonXS("IsPrint");
   888 
   950                     Token.registerNonXS("IsPunct");
   889                 Token isalnum = Token.createRange();
   951 
   890                 isalnum.mergeRanges(isalpha);   // Lu Ll Lo
   952                     Token.setAlias(tmpCat, tmpCat2, "alpha", "IsAlpha", true);
   891                 isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]); // Nd
   953                     Token.setAlias(tmpCat, tmpCat2, "alnum", "IsAlnum", true);
   892                 Token.categories.put("IsAlnum", isalnum);
   954                     Token.setAlias(tmpCat, tmpCat2, "ascii", "IsASCII", true);
   893                 Token.categories2.put("IsAlnum", Token.complementRanges(isalnum));
   955                     Token.setAlias(tmpCat, tmpCat2, "cntrl", "IsCntrl", true);
   894                 Token.registerNonXS("IsAlnum");
   956                     Token.setAlias(tmpCat, tmpCat2, "digit", "IsDigit", true);
   895 
   957                     Token.setAlias(tmpCat, tmpCat2, "graph", "IsGraph", true);
   896                 Token isspace = Token.createRange();
   958                     Token.setAlias(tmpCat, tmpCat2, "lower", "IsLower", true);
   897                 isspace.mergeRanges(Token.token_spaces);
   959                     Token.setAlias(tmpCat, tmpCat2, "print", "IsPrint", true);
   898                 isspace.mergeRanges(ranges[CHAR_SEPARATOR]); // Z
   960                     Token.setAlias(tmpCat, tmpCat2, "punct", "IsPunct", true);
   899                 Token.categories.put("IsSpace", isspace);
   961                     Token.setAlias(tmpCat, tmpCat2, "space", "IsSpace", true);
   900                 Token.categories2.put("IsSpace", Token.complementRanges(isspace));
   962                     Token.setAlias(tmpCat, tmpCat2, "upper", "IsUpper", true);
   901                 Token.registerNonXS("IsSpace");
   963                     Token.setAlias(tmpCat, tmpCat2, "word", "IsWord", true); // Perl extension
   902 
   964                     Token.setAlias(tmpCat, tmpCat2, "xdigit", "IsXDigit", true);
   903                 Token isword = Token.createRange();
   965                     Token.registerNonXS("alpha");
   904                 isword.mergeRanges(isalnum);     // Lu Ll Lo Nd
   966                     Token.registerNonXS("alnum");
   905                 isword.addRange('_', '_');
   967                     Token.registerNonXS("ascii");
   906                 Token.categories.put("IsWord", isword);
   968                     Token.registerNonXS("cntrl");
   907                 Token.categories2.put("IsWord", Token.complementRanges(isword));
   969                     Token.registerNonXS("digit");
   908                 Token.registerNonXS("IsWord");
   970                     Token.registerNonXS("graph");
   909 
   971                     Token.registerNonXS("lower");
   910                 Token isascii = Token.createRange();
   972                     Token.registerNonXS("print");
   911                 isascii.addRange(0, 127);
   973                     Token.registerNonXS("punct");
   912                 Token.categories.put("IsASCII", isascii);
   974                     Token.registerNonXS("space");
   913                 Token.categories2.put("IsASCII", Token.complementRanges(isascii));
   975                     Token.registerNonXS("upper");
   914                 Token.registerNonXS("IsASCII");
   976                     Token.registerNonXS("word");
   915 
   977                     Token.registerNonXS("xdigit");
   916                 Token isnotgraph = Token.createRange();
   978                     Token.categories = localCat = Collections.unmodifiableMap(tmpCat);
   917                 isnotgraph.mergeRanges(ranges[CHAR_OTHER]);
   979                     Token.categories2 = Collections.unmodifiableMap(tmpCat2);
   918                 isnotgraph.addRange(' ', ' ');
   980                 } // localCat == null
   919                 Token.categories.put("IsGraph", Token.complementRanges(isnotgraph));
       
   920                 Token.categories2.put("IsGraph", isnotgraph);
       
   921                 Token.registerNonXS("IsGraph");
       
   922 
       
   923                 Token isxdigit = Token.createRange();
       
   924                 isxdigit.addRange('0', '9');
       
   925                 isxdigit.addRange('A', 'F');
       
   926                 isxdigit.addRange('a', 'f');
       
   927                 Token.categories.put("IsXDigit", Token.complementRanges(isxdigit));
       
   928                 Token.categories2.put("IsXDigit", isxdigit);
       
   929                 Token.registerNonXS("IsXDigit");
       
   930 
       
   931                 Token.setAlias("IsDigit", "Nd", true);
       
   932                 Token.setAlias("IsUpper", "Lu", true);
       
   933                 Token.setAlias("IsLower", "Ll", true);
       
   934                 Token.setAlias("IsCntrl", "C", true);
       
   935                 Token.setAlias("IsPrint", "C", false);
       
   936                 Token.setAlias("IsPunct", "P", true);
       
   937                 Token.registerNonXS("IsDigit");
       
   938                 Token.registerNonXS("IsUpper");
       
   939                 Token.registerNonXS("IsLower");
       
   940                 Token.registerNonXS("IsCntrl");
       
   941                 Token.registerNonXS("IsPrint");
       
   942                 Token.registerNonXS("IsPunct");
       
   943 
       
   944                 Token.setAlias("alpha", "IsAlpha", true);
       
   945                 Token.setAlias("alnum", "IsAlnum", true);
       
   946                 Token.setAlias("ascii", "IsASCII", true);
       
   947                 Token.setAlias("cntrl", "IsCntrl", true);
       
   948                 Token.setAlias("digit", "IsDigit", true);
       
   949                 Token.setAlias("graph", "IsGraph", true);
       
   950                 Token.setAlias("lower", "IsLower", true);
       
   951                 Token.setAlias("print", "IsPrint", true);
       
   952                 Token.setAlias("punct", "IsPunct", true);
       
   953                 Token.setAlias("space", "IsSpace", true);
       
   954                 Token.setAlias("upper", "IsUpper", true);
       
   955                 Token.setAlias("word", "IsWord", true); // Perl extension
       
   956                 Token.setAlias("xdigit", "IsXDigit", true);
       
   957                 Token.registerNonXS("alpha");
       
   958                 Token.registerNonXS("alnum");
       
   959                 Token.registerNonXS("ascii");
       
   960                 Token.registerNonXS("cntrl");
       
   961                 Token.registerNonXS("digit");
       
   962                 Token.registerNonXS("graph");
       
   963                 Token.registerNonXS("lower");
       
   964                 Token.registerNonXS("print");
       
   965                 Token.registerNonXS("punct");
       
   966                 Token.registerNonXS("space");
       
   967                 Token.registerNonXS("upper");
       
   968                 Token.registerNonXS("word");
       
   969                 Token.registerNonXS("xdigit");
       
   970             } // synchronized
   981             } // synchronized
   971         } // if null
   982         } // if null
   972         RangeToken tok = positive ? (RangeToken)Token.categories.get(name)
   983         return positive ? (RangeToken)localCat.get(name)
   973             : (RangeToken)Token.categories2.get(name);
   984             : (RangeToken)Token.categories2.get(name);
   974         //if (tok == null) System.out.println(name);
       
   975         return tok;
       
   976     }
   985     }
   977     static protected RangeToken getRange(String name, boolean positive, boolean xs) {
   986     static protected RangeToken getRange(String name, boolean positive, boolean xs) {
   978         RangeToken range = Token.getRange(name, positive);
   987         RangeToken range = Token.getRange(name, positive);
   979         if (xs && range != null && Token.isRegisterNonXS(name))
   988         if (xs && range != null && Token.isRegisterNonXS(name))
   980             range = null;
   989             range = null;
   992 
  1001 
   993     static protected boolean isRegisterNonXS(String name) {
  1002     static protected boolean isRegisterNonXS(String name) {
   994         return Token.nonxs.contains(name);
  1003         return Token.nonxs.contains(name);
   995     }
  1004     }
   996 
  1005 
   997     private static void setAlias(String newName, String name, boolean positive) {
  1006     private static void setAlias(Map<String, Token> tmpCat, Map<String, Token> tmpCat2,
   998         Token t1 = Token.categories.get(name);
  1007             String newName, String name, boolean positive) {
   999         Token t2 = Token.categories2.get(name);
  1008         Token t1 = tmpCat.get(name);
       
  1009         Token t2 = tmpCat2.get(name);
  1000         if (positive) {
  1010         if (positive) {
  1001             Token.categories.put(newName, t1);
  1011             tmpCat.put(newName, t1);
  1002             Token.categories2.put(newName, t2);
  1012             tmpCat2.put(newName, t2);
  1003         } else {
  1013         } else {
  1004             Token.categories2.put(newName, t1);
  1014             tmpCat2.put(newName, t1);
  1005             Token.categories.put(newName, t2);
  1015             tmpCat.put(newName, t2);
  1006         }
  1016         }
  1007     }
  1017     }
  1008 
  1018 
  1009     // ------------------------------------------------------
  1019     // ------------------------------------------------------
  1010 
  1020