8011756: Wrong characters supported in RegExp \c escape
authorhannesw
Wed, 10 Apr 2013 14:08:00 +0200
changeset 16940 d5c597aa3d47
parent 16939 9e3a9eda5775
child 16941 f5088aaca810
8011756: Wrong characters supported in RegExp \c escape Reviewed-by: lagergren, attila
nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java
nashorn/test/script/basic/JDK-8011756.js
nashorn/test/script/basic/JDK-8011756.js.EXPECTED
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Wed Apr 10 14:05:11 2013 +0200
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Wed Apr 10 14:08:00 2013 +0200
@@ -596,13 +596,14 @@
      *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
      */
     private boolean controlLetter() {
-        final char c = Character.toUpperCase(ch0);
-        if (c >= 'A' && c <= 'Z') {
+        // To match other engines we also accept '0'..'9' and '_' as control letters inside a character class.
+        if ((ch0 >= 'A' && ch0 <= 'Z') || (ch0 >= 'a' && ch0 <= 'z')
+                || (inCharClass && (isDecimalDigit(ch0) || ch0 == '_'))) {
             // for some reason java regexps don't like control characters on the
             // form "\\ca".match([string with ascii 1 at char0]). Translating
             // them to unicode does it though.
             sb.setLength(sb.length() - 1);
-            unicode(c - 'A' + 1, sb);
+            unicode(ch0 % 32, sb);
             skip(1);
             return true;
         }
@@ -621,14 +622,7 @@
         }
         // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
         if (ch0 == 'c') {
-            // Ignore invalid control letter escape if within a character class
-            if (inCharClass && ch1 != ']') {
-                sb.setLength(sb.length() - 1);
-                skip(2);
-                return true;
-            } else {
-                sb.append('\\'); // Treat invalid \c control sequence as \\c
-            }
+            sb.append('\\'); // Treat invalid \c control sequence as \\c
         } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
             sb.setLength(sb.length() - 1);
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8011756.js	Wed Apr 10 14:08:00 2013 +0200
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * 
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ * 
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * 
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8011756: Wrong characters supported in RegExp \c escape
+ *
+ * @test
+ * @run
+ */
+
+
+// Invalid control letters should be escaped:
+print(/\cı/.test("\x09"));
+print(/\cı/.test("\\cı"));
+
+print(/\cſ/.test("\x13"));
+print(/\cſ/.test("\\cſ"));
+
+print(/[\cſ]/.test("\x13"));
+print(/[\cſ]/.test("\\"));
+print(/[\cſ]/.test("c"));
+print(/[\cſ]/.test("ſ"));
+
+print(/[\c#]/.test("\\"));
+print(/[\c#]/.test("c"));
+print(/[\c#]/.test("#"));
+
+// The characters that are supported by other engines are '0'-'9', '_':
+print(/[\c0]/.test("\x10"));
+print(/[\c1]/.test("\x11"));
+print(/[\c2]/.test("\x12"));
+print(/[\c3]/.test("\x13"));
+print(/[\c4]/.test("\x14"));
+print(/[\c5]/.test("\x15"));
+print(/[\c6]/.test("\x16"));
+print(/[\c7]/.test("\x17"));
+print(/[\c8]/.test("\x18"));
+print(/[\c9]/.test("\x19"));
+print(/[\c_]/.test("\x1F"));
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8011756.js.EXPECTED	Wed Apr 10 14:08:00 2013 +0200
@@ -0,0 +1,22 @@
+false
+true
+false
+true
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true