8137240: Negative lookahead in RegEx breaks backreference
authorhannesw
Fri, 24 Jun 2016 12:39:42 +0200
changeset 39167 c543a06f002f
parent 39166 ae7797bbdf86
child 39168 e02c166da4dd
8137240: Negative lookahead in RegEx breaks backreference Reviewed-by: mhaupt
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java
nashorn/test/script/basic/JDK-8137240.js
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Thu Jun 23 21:13:33 2016 +0000
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Fri Jun 24 12:39:42 2016 +0200
@@ -80,8 +80,17 @@
             this.negLookaheadLevel = negLookaheadLevel;
         }
 
-        boolean isContained(final int group, final int level) {
-            return group == this.negLookaheadGroup && level >= this.negLookaheadLevel;
+        /**
+         * Returns true if this Capture can be referenced from the position specified by the
+         * group and level parameters. This is the case if either the group is not within
+         * a negative lookahead, or the position of the referrer is in the same negative lookahead.
+         *
+         * @param group current negative lookahead group
+         * @param level current negative lokahead level
+         * @return true if this capture group can be referenced from the given position
+         */
+        boolean canBeReferencedFrom(final int group, final int level) {
+            return this.negLookaheadLevel == 0 || (group == this.negLookaheadGroup && level >= this.negLookaheadLevel);
         }
 
     }
@@ -671,8 +680,9 @@
 
                 } else if (decimalValue <= caps.size()) {
                     //  Captures inside a negative lookahead are undefined when referenced from the outside.
-                    if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) {
-                        // Reference to capture in negative lookahead, omit from output buffer.
+                    final Capture capture = caps.get(decimalValue - 1);
+                    if (!capture.canBeReferencedFrom(negLookaheadGroup, negLookaheadLevel)) {
+                        // Outside reference to capture in negative lookahead, omit from output buffer.
                         sb.setLength(sb.length() - 1);
                     } else {
                         // Append backreference to output buffer.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8137240.js	Fri Jun 24 12:39:42 2016 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8137240: Negative lookahead in RegEx breaks backreference
+ *
+ * @test
+ * @run
+ */
+
+
+Assert.assertEquals('aa'.replace(/(a)(?!b)\1/gm, 'c'), 'c');
+
+var result = 'aa'.match(/(a)(?!b)\1/);
+Assert.assertTrue(result.length === 2);
+Assert.assertTrue(result[0] === 'aa');
+Assert.assertTrue(result[1] === 'a');
+
+result = 'aa'.match(/(a)(?!(b))\2(a)/);
+Assert.assertTrue(result.length === 4);
+Assert.assertTrue(result[0] === 'aa');
+Assert.assertTrue(result[1] === 'a');
+Assert.assertTrue(result[2] === undefined);
+Assert.assertTrue(result[3] === 'a');