# HG changeset patch # User hannesw # Date 1373446459 -7200 # Node ID f582e6cdeae5110d502320aadd923d180e9b18ea # Parent 8b6a01b38cb84026b0cf8ec741f49f44fa85d7c8 8016681: regex capture behaves differently than on V8 Reviewed-by: lagergren, sundar diff -r 8b6a01b38cb8 -r f582e6cdeae5 nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java --- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java Wed Jul 10 13:25:07 2013 +0530 +++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java Wed Jul 10 10:54:19 2013 +0200 @@ -57,7 +57,10 @@ private final LinkedList forwardReferences = new LinkedList<>(); /** Current level of zero-width negative lookahead assertions. */ - private int negativeLookaheadLevel; + private int negLookaheadLevel; + + /** Sequential id of current top-level zero-width negative lookahead assertion. */ + private int negLookaheadGroup; /** Are we currently inside a character class? */ private boolean inCharClass = false; @@ -68,17 +71,18 @@ private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?-"; private static class Capture { - /** - * Zero-width negative lookaheads enclosing the capture. - */ - private final int negativeLookaheadLevel; + /** Zero-width negative lookaheads enclosing the capture. */ + private final int negLookaheadLevel; + /** Sequential id of top-level negative lookaheads containing the capture. */ + private final int negLookaheadGroup; - Capture(final int negativeLookaheadLevel) { - this.negativeLookaheadLevel = negativeLookaheadLevel; + Capture(final int negLookaheadGroup, final int negLookaheadLevel) { + this.negLookaheadGroup = negLookaheadGroup; + this.negLookaheadLevel = negLookaheadLevel; } - public int getNegativeLookaheadLevel() { - return negativeLookaheadLevel; + boolean isContained(final int group, final int level) { + return group == this.negLookaheadGroup && level >= this.negLookaheadLevel; } } @@ -152,7 +156,7 @@ BitVector vec = null; for (int i = 0; i < caps.size(); i++) { final Capture cap = caps.get(i); - if (cap.getNegativeLookaheadLevel() > 0) { + if (cap.negLookaheadLevel > 0) { if (vec == null) { vec = new BitVector(caps.size() + 1); } @@ -311,11 +315,14 @@ commit(3); if (isNegativeLookahead) { - negativeLookaheadLevel++; + if (negLookaheadLevel == 0) { + negLookaheadGroup++; + } + negLookaheadLevel++; } disjunction(); if (isNegativeLookahead) { - negativeLookaheadLevel--; + negLookaheadLevel--; } if (ch0 == ')') { @@ -432,20 +439,17 @@ } if (ch0 == '(') { - boolean capturingParens = true; commit(1); if (ch0 == '?' && ch1 == ':') { - capturingParens = false; commit(2); + } else { + caps.add(new Capture(negLookaheadGroup, negLookaheadLevel)); } disjunction(); if (ch0 == ')') { commit(1); - if (capturingParens) { - caps.add(new Capture(negativeLookaheadLevel)); - } return true; } } @@ -675,24 +679,22 @@ sb.setLength(sb.length() - 1); octalOrLiteral(Integer.toString(decimalValue), sb); - } else if (decimalValue <= caps.size() && caps.get(decimalValue - 1).getNegativeLookaheadLevel() > 0) { - // Captures that live inside a negative lookahead are dead after the - // lookahead and will be undefined if referenced from outside. - if (caps.get(decimalValue - 1).getNegativeLookaheadLevel() > negativeLookaheadLevel) { + } else if (decimalValue <= caps.size()) { + // Captures inside a negative lookahead are undefined when referenced from the outside. + if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) { + // Reference to capture in negative lookahead, omit from output buffer. sb.setLength(sb.length() - 1); } else { + // Append backreference to output buffer. sb.append(decimalValue); } - } else if (decimalValue > caps.size()) { - // Forward reference to a capture group. Forward references are always undefined so we can omit - // it from the output buffer. However, if the target capture does not exist, we need to rewrite - // the reference as hex escape or literal string, so register the reference for later processing. + } else { + // Forward references to a capture group are always undefined so we can omit it from the output buffer. + // However, if the target capture does not exist, we need to rewrite the reference as hex escape + // or literal string, so register the reference for later processing. sb.setLength(sb.length() - 1); forwardReferences.add(decimalValue); forwardReferences.add(sb.length()); - } else { - // Append as backreference - sb.append(decimalValue); } } diff -r 8b6a01b38cb8 -r f582e6cdeae5 nashorn/test/script/basic/JDK-8016681.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nashorn/test/script/basic/JDK-8016681.js Wed Jul 10 10:54:19 2013 +0200 @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * JDK-8016681: regex capture behaves differently than on V8 + * + * @test + * @run + */ + +// regexp similar to the one used in marked.js +/^((?:[^\n]+\n?(?!( *[-*_]){3,} *(?:\n+|$)| *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)|([^\n]+)\n *(=|-){3,} *\n*))+)\n*/ + .exec("a\n\nb") + .forEach(function(e) { print(e); }); + +// simplified regexp +/(x(?!(a))(?!(b))y)/ + .exec("xy") + .forEach(function(e) { print(e); }); + +// should not match as cross-negative-lookeahead backreference \2 should be undefined +print(/(x(?!(a))(?!(b)\2))/.exec("xbc")); diff -r 8b6a01b38cb8 -r f582e6cdeae5 nashorn/test/script/basic/JDK-8016681.js.EXPECTED --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nashorn/test/script/basic/JDK-8016681.js.EXPECTED Wed Jul 10 10:54:19 2013 +0200 @@ -0,0 +1,15 @@ +a + + +a + +undefined +undefined +undefined +undefined +undefined +xy +xy +undefined +undefined +null