8011749: Bugs with empty character class handling
authorhannesw
Wed, 10 Apr 2013 14:05:11 +0200
changeset 16939 9e3a9eda5775
parent 16938 1a8ffed97564
child 16940 d5c597aa3d47
8011749: Bugs with empty character class handling Reviewed-by: lagergren, attila
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java
nashorn/test/script/basic/JDK-8011749.js
nashorn/test/script/basic/JDK-8011749.js.EXPECTED
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Wed Apr 10 14:00:11 2013 +0200
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Wed Apr 10 14:05:11 2013 +0200
@@ -47,9 +47,6 @@
      */
     private final StringBuilder sb;
 
-    /** Is this the special case of a regexp that never matches anything */
-    private boolean neverMatches;
-
     /** Expected token table */
     private final Map<Character, Integer> expected = new HashMap<>();
 
@@ -99,9 +96,6 @@
     }
 
     private void processForwardReferences() {
-        if (neverMatches()) {
-            return;
-        }
 
         Iterator<Integer> iterator = forwardReferences.descendingIterator();
         while (iterator.hasNext()) {
@@ -136,9 +130,6 @@
         }
 
         scanner.processForwardReferences();
-        if (scanner.neverMatches()) {
-            return null; // never matches
-        }
 
         // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors
         if (scanner.position != string.length()) {
@@ -147,16 +138,6 @@
         }
 
         return scanner;
-     }
-
-    /**
-     * Does this regexp ever match anything? Use of e.g. [], which is legal in JavaScript,
-     * is an example where we never match
-     *
-     * @return boolean
-     */
-    private boolean neverMatches() {
-        return neverMatches;
     }
 
     final StringBuilder getStringBuilder() {
@@ -278,23 +259,16 @@
         }
 
         if (atom()) {
-            boolean emptyCharacterClass = false;
+            // Check for character classes that never or always match
             if (sb.toString().endsWith("[]")) {
-                emptyCharacterClass = true;
+                sb.setLength(sb.length() - 1);
+                sb.append("^\\s\\S]");
             } else if (sb.toString().endsWith("[^]")) {
                 sb.setLength(sb.length() - 2);
                 sb.append("\\s\\S]");
             }
 
-            boolean quantifier = quantifier();
-
-            if (emptyCharacterClass) {
-                if (!quantifier) {
-                    neverMatches = true; //never matches ever.
-                }
-                // Note: we could check if quantifier has min zero to mark empty character class as dead.
-            }
-
+            quantifier();
             return true;
         }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8011749.js	Wed Apr 10 14:05:11 2013 +0200
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8011749: Bugs with empty character class handling
+ *
+ * @test
+ * @run
+ */
+
+// empty class in alternative
+print(/[]|[^]/.exec("a"));
+print(/[]|[]/.test("a"));
+print(/[]|[]|[a]/.exec("a"));
+
+// empty class in negative lookahead
+print(/(?![])/.test(""));
+print(/(?![])./.exec("a"));
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8011749.js.EXPECTED	Wed Apr 10 14:05:11 2013 +0200
@@ -0,0 +1,5 @@
+a
+false
+a
+true
+a