# HG changeset patch # User redestad # Date 1486034903 -3600 # Node ID aec39566b45ea06f1c571ea73ffc1cde77c29f95 # Parent 243c346dc9055316507a30c7a0a46cfdf7bdd0f8 8160302: Reduce number of lambdas created when loading java.util.regex.Pattern Reviewed-by: sherman, martin diff -r 243c346dc905 -r aec39566b45e jdk/src/java.base/share/classes/java/util/regex/CharPredicates.java --- a/jdk/src/java.base/share/classes/java/util/regex/CharPredicates.java Thu Feb 02 10:28:47 2017 +0100 +++ b/jdk/src/java.base/share/classes/java/util/regex/CharPredicates.java Thu Feb 02 12:28:23 2017 +0100 @@ -32,164 +32,195 @@ class CharPredicates { - static final CharPredicate ALPHABETIC = Character::isAlphabetic; + static final CharPredicate ALPHABETIC() { + return Character::isAlphabetic; + } // \p{gc=Decimal_Number} - static final CharPredicate DIGIT = Character::isDigit; + static final CharPredicate DIGIT() { + return Character::isDigit; + } - static final CharPredicate LETTER = Character::isLetter; + static final CharPredicate LETTER() { + return Character::isLetter; + } - static final CharPredicate IDEOGRAPHIC = Character::isIdeographic; + static final CharPredicate IDEOGRAPHIC() { + return Character::isIdeographic; + } - static final CharPredicate LOWERCASE = Character::isLowerCase; + static final CharPredicate LOWERCASE() { + return Character::isLowerCase; + } - static final CharPredicate UPPERCASE = Character::isUpperCase; + static final CharPredicate UPPERCASE() { + return Character::isUpperCase; + } - static final CharPredicate TITLECASE = Character::isTitleCase; + static final CharPredicate TITLECASE() { + return Character::isTitleCase; + } // \p{Whitespace} - static final CharPredicate WHITE_SPACE = ch -> - ((((1 << Character.SPACE_SEPARATOR) | - (1 << Character.LINE_SEPARATOR) | - (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) - != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85); + static final CharPredicate WHITE_SPACE() { + return ch -> + ((((1 << Character.SPACE_SEPARATOR) | + (1 << Character.LINE_SEPARATOR) | + (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) + != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85); + } // \p{gc=Control} - static final CharPredicate CONTROL = ch -> - Character.getType(ch) == Character.CONTROL; + static final CharPredicate CONTROL() { + return ch -> Character.getType(ch) == Character.CONTROL; + } // \p{gc=Punctuation} - static final CharPredicate PUNCTUATION = ch -> - ((((1 << Character.CONNECTOR_PUNCTUATION) | - (1 << Character.DASH_PUNCTUATION) | - (1 << Character.START_PUNCTUATION) | - (1 << Character.END_PUNCTUATION) | - (1 << Character.OTHER_PUNCTUATION) | - (1 << Character.INITIAL_QUOTE_PUNCTUATION) | - (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) - != 0; + static final CharPredicate PUNCTUATION() { + return ch -> + ((((1 << Character.CONNECTOR_PUNCTUATION) | + (1 << Character.DASH_PUNCTUATION) | + (1 << Character.START_PUNCTUATION) | + (1 << Character.END_PUNCTUATION) | + (1 << Character.OTHER_PUNCTUATION) | + (1 << Character.INITIAL_QUOTE_PUNCTUATION) | + (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) + != 0; + } // \p{gc=Decimal_Number} // \p{Hex_Digit} -> PropList.txt: Hex_Digit - static final CharPredicate HEX_DIGIT = DIGIT.union( - ch -> (ch >= 0x0030 && ch <= 0x0039) || - (ch >= 0x0041 && ch <= 0x0046) || - (ch >= 0x0061 && ch <= 0x0066) || - (ch >= 0xFF10 && ch <= 0xFF19) || - (ch >= 0xFF21 && ch <= 0xFF26) || - (ch >= 0xFF41 && ch <= 0xFF46)); + static final CharPredicate HEX_DIGIT() { + return DIGIT().union(ch -> (ch >= 0x0030 && ch <= 0x0039) || + (ch >= 0x0041 && ch <= 0x0046) || + (ch >= 0x0061 && ch <= 0x0066) || + (ch >= 0xFF10 && ch <= 0xFF19) || + (ch >= 0xFF21 && ch <= 0xFF26) || + (ch >= 0xFF41 && ch <= 0xFF46)); + } - static final CharPredicate ASSIGNED = ch -> - Character.getType(ch) != Character.UNASSIGNED; + static final CharPredicate ASSIGNED() { + return ch -> Character.getType(ch) != Character.UNASSIGNED; + } // PropList.txt:Noncharacter_Code_Point - static final CharPredicate NONCHARACTER_CODE_POINT = ch -> - (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); + static final CharPredicate NONCHARACTER_CODE_POINT() { + return ch -> (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); + } // \p{alpha} // \p{digit} - static final CharPredicate ALNUM = ALPHABETIC.union(DIGIT); + static final CharPredicate ALNUM() { + return ALPHABETIC().union(DIGIT()); + } // \p{Whitespace} -- // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 // \p{gc=Line_Separator} // \p{gc=Paragraph_Separator}] - static final CharPredicate BLANK = ch -> - Character.getType(ch) == Character.SPACE_SEPARATOR || - ch == 0x9; // \N{HT} + static final CharPredicate BLANK() { + return ch -> + Character.getType(ch) == Character.SPACE_SEPARATOR || + ch == 0x9; // \N{HT} + } // [^ // \p{space} // \p{gc=Control} // \p{gc=Surrogate} // \p{gc=Unassigned}] - static final CharPredicate GRAPH = ch -> - ((((1 << Character.SPACE_SEPARATOR) | - (1 << Character.LINE_SEPARATOR) | - (1 << Character.PARAGRAPH_SEPARATOR) | - (1 << Character.CONTROL) | - (1 << Character.SURROGATE) | - (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1) - == 0; + static final CharPredicate GRAPH() { + return ch -> + ((((1 << Character.SPACE_SEPARATOR) | + (1 << Character.LINE_SEPARATOR) | + (1 << Character.PARAGRAPH_SEPARATOR) | + (1 << Character.CONTROL) | + (1 << Character.SURROGATE) | + (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1) + == 0; + } // \p{graph} // \p{blank} // -- \p{cntrl} - static final CharPredicate PRINT = GRAPH.union(BLANK).and(CONTROL.negate()); + static final CharPredicate PRINT() { + return GRAPH().union(BLANK()).and(CONTROL().negate()); + } // 200C..200D PropList.txt:Join_Control - static final CharPredicate JOIN_CONTROL = ch -> ch == 0x200C || ch == 0x200D; + static final CharPredicate JOIN_CONTROL() { + return ch -> ch == 0x200C || ch == 0x200D; + } // \p{alpha} // \p{gc=Mark} // \p{digit} // \p{gc=Connector_Punctuation} // \p{Join_Control} 200C..200D - static final CharPredicate WORD = - ALPHABETIC.union(ch -> ((((1 << Character.NON_SPACING_MARK) | + static final CharPredicate WORD() { + return ALPHABETIC().union(ch -> ((((1 << Character.NON_SPACING_MARK) | (1 << Character.ENCLOSING_MARK) | (1 << Character.COMBINING_SPACING_MARK) | (1 << Character.DECIMAL_DIGIT_NUMBER) | (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) != 0, - JOIN_CONTROL); + JOIN_CONTROL()); + } ///////////////////////////////////////////////////////////////////////////// - private static final HashMap posix = new HashMap<>(12); - private static final HashMap uprops = new HashMap<>(18); - - private static void defPosix(String name, CharPredicate p) { - posix.put(name, p); - } - private static void defUProp(String name, CharPredicate p) { - uprops.put(name, p); + private static CharPredicate getPosixPredicate(String name) { + switch (name) { + case "ALPHA": return ALPHABETIC(); + case "LOWER": return LOWERCASE(); + case "UPPER": return UPPERCASE(); + case "SPACE": return WHITE_SPACE(); + case "PUNCT": return PUNCTUATION(); + case "XDIGIT": return HEX_DIGIT(); + case "ALNUM": return ALNUM(); + case "CNTRL": return CONTROL(); + case "DIGIT": return DIGIT(); + case "BLANK": return BLANK(); + case "GRAPH": return GRAPH(); + case "PRINT": return PRINT(); + default: return null; + } } - static { - defPosix("ALPHA", ALPHABETIC); - defPosix("LOWER", LOWERCASE); - defPosix("UPPER", UPPERCASE); - defPosix("SPACE", WHITE_SPACE); - defPosix("PUNCT", PUNCTUATION); - defPosix("XDIGIT",HEX_DIGIT); - defPosix("ALNUM", ALNUM); - defPosix("CNTRL", CONTROL); - defPosix("DIGIT", DIGIT); - defPosix("BLANK", BLANK); - defPosix("GRAPH", GRAPH); - defPosix("PRINT", PRINT); - - defUProp("ALPHABETIC", ALPHABETIC); - defUProp("ASSIGNED", ASSIGNED); - defUProp("CONTROL", CONTROL); - defUProp("HEXDIGIT", HEX_DIGIT); - defUProp("IDEOGRAPHIC", IDEOGRAPHIC); - defUProp("JOINCONTROL", JOIN_CONTROL); - defUProp("LETTER", LETTER); - defUProp("LOWERCASE", LOWERCASE); - defUProp("NONCHARACTERCODEPOINT", NONCHARACTER_CODE_POINT); - defUProp("TITLECASE", TITLECASE); - defUProp("PUNCTUATION", PUNCTUATION); - defUProp("UPPERCASE", UPPERCASE); - defUProp("WHITESPACE", WHITE_SPACE); - defUProp("WORD", WORD); - defUProp("WHITE_SPACE", WHITE_SPACE); - defUProp("HEX_DIGIT", HEX_DIGIT); - defUProp("NONCHARACTER_CODE_POINT", NONCHARACTER_CODE_POINT); - defUProp("JOIN_CONTROL", JOIN_CONTROL); + private static CharPredicate getUnicodePredicate(String name) { + switch (name) { + case "ALPHABETIC": return ALPHABETIC(); + case "ASSIGNED": return ASSIGNED(); + case "CONTROL": return CONTROL(); + case "HEXDIGIT": return HEX_DIGIT(); + case "IDEOGRAPHIC": return IDEOGRAPHIC(); + case "JOINCONTROL": return JOIN_CONTROL(); + case "LETTER": return LETTER(); + case "LOWERCASE": return LOWERCASE(); + case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT(); + case "TITLECASE": return TITLECASE(); + case "PUNCTUATION": return PUNCTUATION(); + case "UPPERCASE": return UPPERCASE(); + case "WHITESPACE": return WHITE_SPACE(); + case "WORD": return WORD(); + case "WHITE_SPACE": return WHITE_SPACE(); + case "HEX_DIGIT": return HEX_DIGIT(); + case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT(); + case "JOIN_CONTROL": return JOIN_CONTROL(); + default: return null; + } } public static CharPredicate forUnicodeProperty(String propName) { propName = propName.toUpperCase(Locale.ROOT); - CharPredicate p = uprops.get(propName); + CharPredicate p = getUnicodePredicate(propName); if (p != null) return p; - return posix.get(propName); + return getPosixPredicate(propName); } public static CharPredicate forPOSIXName(String propName) { - return posix.get(propName.toUpperCase(Locale.ENGLISH)); + return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH)); } ///////////////////////////////////////////////////////////////////////////// @@ -223,145 +254,130 @@ // unicode categories, aliases, properties, java methods ... - private static final HashMap props = new HashMap<>(128); - - /** - * Returns a predicate matching all characters in a named property. - */ static CharPredicate forProperty(String name) { - return props.get(name); - } - - private static void defProp(String name, CharPredicate p) { - props.put(name, p); - } - - private static void defCategory(String name, final int typeMask) { - CharPredicate p = ch -> (typeMask & (1 << Character.getType(ch))) != 0; - props.put(name, p); - } - - private static void defRange(String name, final int lower, final int upper) { - BmpCharPredicate p = ch -> lower <= ch && ch <= upper; - props.put(name, p); - } - - private static void defCtype(String name, final int ctype) { - BmpCharPredicate p = ch -> ch < 128 && ASCII.isType(ch, ctype); - // PrintPattern.pmap.put(p, name); - props.put(name, p); - } - - static { // Unicode character property aliases, defined in // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt - defCategory("Cn", 1< true); + switch (name) { + case "Cn": return category(1< (typeMask & (1 << Character.getType(ch))) != 0; + } + + private static CharPredicate range(final int lower, final int upper) { + return (BmpCharPredicate)ch -> lower <= ch && ch <= upper; + } + + private static CharPredicate ctype(final int ctype) { + return (BmpCharPredicate)ch -> ch < 128 && ASCII.isType(ch, ctype); } ///////////////////////////////////////////////////////////////////////////// @@ -369,8 +385,14 @@ /** * Posix ASCII variants, not in the lookup map */ - static final BmpCharPredicate ASCII_DIGIT = ch -> ch < 128 && ASCII.isDigit(ch); - static final BmpCharPredicate ASCII_WORD = ch -> ch < 128 && ASCII.isWord(ch); - static final BmpCharPredicate ASCII_SPACE = ch -> ch < 128 && ASCII.isSpace(ch); + static final BmpCharPredicate ASCII_DIGIT() { + return ch -> ch < 128 && ASCII.isDigit(ch); + } + static final BmpCharPredicate ASCII_WORD() { + return ch -> ch < 128 && ASCII.isWord(ch); + } + static final BmpCharPredicate ASCII_SPACE() { + return ch -> ch < 128 && ASCII.isSpace(ch); + } } diff -r 243c346dc905 -r aec39566b45e jdk/src/java.base/share/classes/java/util/regex/Pattern.java --- a/jdk/src/java.base/share/classes/java/util/regex/Pattern.java Thu Feb 02 10:28:47 2017 +0100 +++ b/jdk/src/java.base/share/classes/java/util/regex/Pattern.java Thu Feb 02 12:28:23 2017 +0100 @@ -1495,7 +1495,7 @@ altns.add(seq); produceEquivalentAlternation(nfd, altns); dst.append("(?:"); - altns.forEach( s -> dst.append(s + "|")); + altns.forEach( s -> dst.append(s).append('|')); dst.delete(dst.length() - 1, dst.length()); dst.append(")"); continue; @@ -2142,12 +2142,12 @@ case '.': next(); if (has(DOTALL)) { - node = new CharProperty(ALL); + node = new CharProperty(ALL()); } else { if (has(UNIX_LINES)) { - node = new CharProperty(UNIXDOT); + node = new CharProperty(UNIXDOT()); } else { - node = new CharProperty(DOT); + node = new CharProperty(DOT()); } } break; @@ -2376,7 +2376,7 @@ case 'D': if (create) { predicate = has(UNICODE_CHARACTER_CLASS) ? - CharPredicates.DIGIT : CharPredicates.ASCII_DIGIT; + CharPredicates.DIGIT() : CharPredicates.ASCII_DIGIT(); predicate = predicate.negate(); if (!inclass) root = newCharProperty(predicate); @@ -2391,7 +2391,7 @@ return -1; case 'H': if (create) { - predicate = HorizWS.negate(); + predicate = HorizWS().negate(); if (!inclass) root = newCharProperty(predicate); } @@ -2415,7 +2415,7 @@ case 'S': if (create) { predicate = has(UNICODE_CHARACTER_CLASS) ? - CharPredicates.WHITE_SPACE : CharPredicates.ASCII_SPACE; + CharPredicates.WHITE_SPACE() : CharPredicates.ASCII_SPACE(); predicate = predicate.negate(); if (!inclass) root = newCharProperty(predicate); @@ -2426,7 +2426,7 @@ break; case 'V': if (create) { - predicate = VertWS.negate(); + predicate = VertWS().negate(); if (!inclass) root = newCharProperty(predicate); } @@ -2434,7 +2434,7 @@ case 'W': if (create) { predicate = has(UNICODE_CHARACTER_CLASS) ? - CharPredicates.WORD : CharPredicates.ASCII_WORD; + CharPredicates.WORD() : CharPredicates.ASCII_WORD(); predicate = predicate.negate(); if (!inclass) root = newCharProperty(predicate); @@ -2480,7 +2480,7 @@ case 'd': if (create) { predicate = has(UNICODE_CHARACTER_CLASS) ? - CharPredicates.DIGIT : CharPredicates.ASCII_DIGIT; + CharPredicates.DIGIT() : CharPredicates.ASCII_DIGIT(); if (!inclass) root = newCharProperty(predicate); } @@ -2493,7 +2493,7 @@ break; case 'h': if (create) { - predicate = HorizWS; + predicate = HorizWS(); if (!inclass) root = newCharProperty(predicate); } @@ -2531,7 +2531,7 @@ case 's': if (create) { predicate = has(UNICODE_CHARACTER_CLASS) ? - CharPredicates.WHITE_SPACE : CharPredicates.ASCII_SPACE; + CharPredicates.WHITE_SPACE() : CharPredicates.ASCII_SPACE(); if (!inclass) root = newCharProperty(predicate); } @@ -2552,7 +2552,7 @@ if (isrange) return '\013'; if (create) { - predicate = VertWS; + predicate = VertWS(); if (!inclass) root = newCharProperty(predicate); } @@ -2560,7 +2560,7 @@ case 'w': if (create) { predicate = has(UNICODE_CHARACTER_CLASS) ? - CharPredicates.WORD : CharPredicates.ASCII_WORD; + CharPredicates.WORD() : CharPredicates.ASCII_WORD(); if (!inclass) root = newCharProperty(predicate); } @@ -2704,7 +2704,6 @@ (6)AngstromSign u+212b toLowerCase(u+212b) ==> u+00e5 */ - int d; if (ch < 256 && !(has(CASE_INSENSITIVE) && has(UNICODE_CASE) && (ch == 0xff || ch == 0xb5 || @@ -5384,7 +5383,7 @@ } boolean isWord(int ch) { - return useUWORD ? CharPredicates.WORD.is(ch) + return useUWORD ? CharPredicates.WORD().is(ch) : (ch == '_' || Character.isLetterOrDigit(ch)); } @@ -5680,33 +5679,45 @@ /** * matches a Perl vertical whitespace */ - static BmpCharPredicate VertWS = cp -> - (cp >= 0x0A && cp <= 0x0D) || cp == 0x85 || cp == 0x2028 || cp == 0x2029; + static BmpCharPredicate VertWS() { + return cp -> (cp >= 0x0A && cp <= 0x0D) || + cp == 0x85 || cp == 0x2028 || cp == 0x2029; + } /** * matches a Perl horizontal whitespace */ - static BmpCharPredicate HorizWS = cp -> - cp == 0x09 || cp == 0x20 || cp == 0xa0 || cp == 0x1680 || - cp == 0x180e || cp >= 0x2000 && cp <= 0x200a || cp == 0x202f || - cp == 0x205f || cp == 0x3000; + static BmpCharPredicate HorizWS() { + return cp -> + cp == 0x09 || cp == 0x20 || cp == 0xa0 || cp == 0x1680 || + cp == 0x180e || cp >= 0x2000 && cp <= 0x200a || cp == 0x202f || + cp == 0x205f || cp == 0x3000; + } /** * for the Unicode category ALL and the dot metacharacter when * in dotall mode. */ - static CharPredicate ALL = ch -> true; + static CharPredicate ALL() { + return ch -> true; + } /** * for the dot metacharacter when dotall is not enabled. */ - static CharPredicate DOT = ch -> (ch != '\n' && ch != '\r' - && (ch|1) != '\u2029' - && ch != '\u0085'); + static CharPredicate DOT() { + return ch -> + (ch != '\n' && ch != '\r' + && (ch|1) != '\u2029' + && ch != '\u0085'); + } + /** * the dot metacharacter when dotall is not enabled but UNIX_LINES is enabled. */ - static CharPredicate UNIXDOT = ch -> ch != '\n'; + static CharPredicate UNIXDOT() { + return ch -> ch != '\n'; + } /** * Indicate that matches a Supplementary Unicode character diff -r 243c346dc905 -r aec39566b45e jdk/src/java.base/share/classes/java/util/regex/PrintPattern.java --- a/jdk/src/java.base/share/classes/java/util/regex/PrintPattern.java Thu Feb 02 10:28:47 2017 +0100 +++ b/jdk/src/java.base/share/classes/java/util/regex/PrintPattern.java Thu Feb 02 12:28:23 2017 +0100 @@ -27,7 +27,6 @@ import java.util.HashMap; import java.util.regex.Pattern.CharPredicate; -import java.util.regex.CharPredicates; import static java.util.regex.ASCII.*; /** @@ -106,15 +105,15 @@ static HashMap pmap; static { pmap = new HashMap<>(); - pmap.put(Pattern.ALL, "All"); - pmap.put(Pattern.DOT, "Dot"); - pmap.put(Pattern.UNIXDOT, "UnixDot"); - pmap.put(Pattern.VertWS, "VertWS"); - pmap.put(Pattern.HorizWS, "HorizWS"); + pmap.put(Pattern.ALL(), "All"); + pmap.put(Pattern.DOT(), "Dot"); + pmap.put(Pattern.UNIXDOT(), "UnixDot"); + pmap.put(Pattern.VertWS(), "VertWS"); + pmap.put(Pattern.HorizWS(), "HorizWS"); - pmap.put(CharPredicates.ASCII_DIGIT, "ASCII.DIGIT"); - pmap.put(CharPredicates.ASCII_WORD, "ASCII.WORD"); - pmap.put(CharPredicates.ASCII_SPACE, "ASCII.SPACE"); + pmap.put(CharPredicates.ASCII_DIGIT(), "ASCII.DIGIT"); + pmap.put(CharPredicates.ASCII_WORD(), "ASCII.WORD"); + pmap.put(CharPredicates.ASCII_SPACE(), "ASCII.SPACE"); } static void walk(Pattern.Node node, int depth) {