# HG changeset patch # User hannesw # Date 1365184210 -7200 # Node ID 41eadf003eff0c67448c6688fd6689e4949c1f2c # Parent f23743ec1a93d417039e75eb64f1cfe01629c14f 8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines Reviewed-by: jlaskey, lagergren diff -r f23743ec1a93 -r 41eadf003eff nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java --- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java Thu Apr 04 18:32:00 2013 +0200 +++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java Fri Apr 05 19:50:10 2013 +0200 @@ -26,11 +26,10 @@ package jdk.nashorn.internal.runtime.regexp; import java.util.HashMap; -import java.util.LinkedHashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.regex.PatternSyntaxException; import jdk.nashorn.internal.parser.Lexer; @@ -58,7 +57,7 @@ private final List caps = new LinkedList<>(); /** Forward references to capturing parenthesis to be resolved later.*/ - private final Set forwardReferences = new LinkedHashSet<>(); + private final LinkedList forwardReferences = new LinkedList<>(); /** Current level of zero-width negative lookahead assertions. */ private int negativeLookaheadLevel; @@ -104,10 +103,20 @@ return; } - for (final Integer ref : forwardReferences) { - if (ref.intValue() > caps.size()) { - neverMatches = true; - break; + Iterator iterator = forwardReferences.descendingIterator(); + while (iterator.hasNext()) { + final int pos = iterator.next(); + final int num = iterator.next(); + if (num > caps.size()) { + // Non-existing reference should never match, if smaller than 8 convert to octal escape + // to be compatible with other engines. + if (num < 8) { + String escape = "\\x0" + num; + sb.insert(pos, escape); + } else { + neverMatches = true; + break; + } } } @@ -402,6 +411,10 @@ if (ch0 == '}') { pop('}'); commit(1); + } else { + // Bad quantifier should be rejected but is accepted by all major engines + restart(startIn, startOut); + return false; } return true; @@ -637,7 +650,16 @@ throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException } // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here. - if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) { + if (ch0 == 'c') { + // Ignore invalid control letter escape if within a character class + if (inCharClass && ch1 != ']') { + sb.setLength(sb.length() - 1); + skip(2); + return true; + } else { + sb.append('\\'); // Treat invalid \c control sequence as \\c + } + } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) { sb.setLength(sb.length() - 1); } return commit(1); @@ -677,8 +699,9 @@ // Forward reference to a capture group. Forward references are always undefined so we // can omit it from the output buffer. Additionally, if the capture group does not exist // the whole regexp becomes invalid, so register the reference for later processing. + sb.setLength(sb.length() - 1); forwardReferences.add(num); - sb.setLength(sb.length() - 1); + forwardReferences.add(sb.length()); skip(1); return true; } diff -r f23743ec1a93 -r 41eadf003eff nashorn/test/script/basic/JDK-8009230.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nashorn/test/script/basic/JDK-8009230.js Fri Apr 05 19:50:10 2013 +0200 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * JDK-8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines + * + * @test + * @run + */ + + +// Invalid ControlEscape/IdentityEscape character treated as literal. +print(/\z/.exec("z")); // Invalid escape, same as /z/ +// Incomplete/Invalid ControlEscape treated as "\\c" +print(/\c/.exec("\\c")); // same as /\\c/ +print(/\c2/.exec("\\c2")); // same as /\\c2/ +print(/\C/.exec("C")); // same as /C/ +print(/\C2/.exec("C2")); // same as /C2/ +// Incomplete HexEscapeSequence escape treated as "x". +print(/\x/.exec("x")); // incomplete x-escape +print(/\x1/.exec("x1")); // incomplete x-escape +print(/\x1z/.exec("x1z")); // incomplete x-escape +// Incomplete UnicodeEscapeSequence escape treated as "u". +print(/\u/.exec("u")); // incomplete u-escape +print(/\uz/.exec("uz")); // incomplete u-escape +print(/\u1/.exec("u1")); // incomplete u-escape +print(/\u1z/.exec("u1z")); // incomplete u-escape +print(/\u12/.exec("u12")); // incomplete u-escape +print(/\u12z/.exec("u12z")); // incomplete u-escape +print(/\u123/.exec("u123")); // incomplete u-escape +print(/\u123z/.exec("u123z")); // incomplete u-escape +// Bad quantifier range: +print(/x{z/.exec("x{z")); // same as /x\{z/ +print(/x{1z/.exec("x{1z")); // same as /x\{1z/ +print(/x{1,z/.exec("x{1,z")); // same as /x\{1,z/ +print(/x{1,2z/.exec("x{1,2z")); // same as /x\{1,2z/ +print(/x{10000,20000z/.exec("x{10000,20000z")); // same as /x\{10000,20000z/ +// Notice: It needs arbitrary lookahead to determine the invalidity, +// except Mozilla that limits the numbers. + +// Zero-initialized Octal escapes. +/\012/; // same as /\x0a/ + +// Nonexisting back-references smaller than 8 treated as octal escapes: +print(/\5/.exec("\u0005")); // same as /\x05/ +print(/\7/.exec("\u0007")); // same as /\x07/ +print(/\8/.exec("\u0008")); // does not match + +// Invalid PatternCharacter accepted unescaped +print(/]/.exec("]")); +print(/{/.exec("{")); +print(/}/.exec("}")); + +// Bad escapes also inside CharacterClass. +print(/[\z]/.exec("z")); +print(/[\c]/.exec("c")); +print(/[\c2]/.exec("c")); +print(/[\x]/.exec("x")); +print(/[\x1]/.exec("x1")); +print(/[\x1z]/.exec("x1z")); +print(/[\u]/.exec("u")); +print(/[\uz]/.exec("u")); +print(/[\u1]/.exec("u")); +print(/[\u1z]/.exec("u")); +print(/[\u12]/.exec("u")); +print(/[\u12z]/.exec("u")); +print(/[\u123]/.exec("u")); +print(/[\u123z]/.exec("u")); +print(/[\012]/.exec("0")); +print(/[\5]/.exec("5")); +// And in addition: +print(/[\B]/.exec("B")); +print(/()()[\2]/.exec("")); // Valid backreference should be invalid. diff -r f23743ec1a93 -r 41eadf003eff nashorn/test/script/basic/JDK-8009230.js.EXPECTED --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nashorn/test/script/basic/JDK-8009230.js.EXPECTED Fri Apr 05 19:50:10 2013 +0200 @@ -0,0 +1,45 @@ +z +\c +\c2 +C +C2 +x +x1 +x1z +u +uz +u1 +u1z +u12 +u12z +u123 +u123z +x{z +x{1z +x{1,z +x{1,2z +x{10000,20000z + + +null +] +{ +} +z +c +null +x +x +x +u +u +u +u +u +u +u +u +null +null +B +null