8019963: empty char range in regex
authorhannesw
Mon, 08 Jul 2013 19:34:55 +0200
changeset 18858 802ac572529a
parent 18856 ef883ef43731
child 18859 7c4d0146ccd5
8019963: empty char range in regex Reviewed-by: jlaskey, sundar
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java
nashorn/test/script/basic/JDK-8019963.js
nashorn/test/script/basic/JDK-8019963.js.EXPECTED
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java	Mon Jul 08 18:36:10 2013 +0530
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java	Mon Jul 08 19:34:55 2013 +0200
@@ -183,7 +183,7 @@
 
     // add_code_range, be aware of it returning null!
     public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
-        if (from >to) {
+        if (from > to) {
             if (env.syntax.allowEmptyRangeInCC()) {
                 return pbuf;
             } else {
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java	Mon Jul 08 18:36:10 2013 +0530
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java	Mon Jul 08 19:34:55 2013 +0200
@@ -125,32 +125,8 @@
                 break;
 
             case RAW_BYTE:
-                if (token.base != 0) { /* tok->base != 0 : octal or hexadec. */
-                    byte[] buf = new byte[4];
-                    int psave = p;
-                    int base = token.base;
-                    buf[0] = (byte)token.getC();
-                    int i;
-                    for (i=1; i<4; i++) {
-                        fetchTokenInCC();
-                        if (token.type != TokenType.RAW_BYTE || token.base != base) {
-                            fetched = true;
-                            break;
-                        }
-                        buf[i] = (byte)token.getC();
-                    }
-
-                    if (i == 1) {
-                        arg.v = buf[0] & 0xff;
-                        arg.inType = CCVALTYPE.SB; // goto raw_single
-                    } else {
-                        arg.v = EncodingHelper.mbcToCode(buf, 0, buf.length);
-                        arg.inType = CCVALTYPE.CODE_POINT;
-                    }
-                } else {
-                    arg.v = token.getC();
-                    arg.inType = CCVALTYPE.SB; // raw_single:
-                }
+                arg.v = token.getC();
+                arg.inType = CCVALTYPE.SB; // raw_single:
                 arg.vIsRaw = true;
                 parseCharClassValEntry2(cc, arg); // goto val_entry2
                 break;
@@ -615,31 +591,10 @@
         StringNode node = new StringNode((char)token.getC());
         node.setRaw();
 
-        int len = 1;
-        while (true) {
-            if (len >= 1) {
-                if (len == 1) {
-                    fetchToken();
-                    node.clearRaw();
-                    // !goto string_end;!
-                    return parseExpRepeat(node, group);
-                }
-            }
-
-            fetchToken();
-            if (token.type != TokenType.RAW_BYTE) {
-                /* Don't use this, it is wrong for little endian encodings. */
-                // USE_PAD_TO_SHORT_BYTE_CHAR ...
-
-                newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
-            }
-
-            // important: we don't use 0xff mask here neither in the compiler
-            // (in the template string) so we won't have to mask target
-            // strings when comparing against them in the matcher
-            node.cat((char)token.getC());
-            len++;
-        } // while
+        fetchToken();
+        node.clearRaw();
+        // !goto string_end;!
+        return parseExpRepeat(node, group);
     }
 
     private Node parseExpRepeat(Node target, boolean group) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8019963.js	Mon Jul 08 19:34:55 2013 +0200
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * 
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ * 
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * 
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8019963: empty char range in regex
+ *
+ * @test
+ * @run
+ */
+
+var re1 = /[\x00-\x08\x0B\x0C\x0E-\x9F\uD800-\uDFFF\uFFFE\uFFFF]/;
+
+print(re1.test("\x00"));
+print(re1.test("\x04"));
+print(re1.test("\x08"));
+print(re1.test("\x0a"));
+print(re1.test("\x0B"));
+print(re1.test("\x0C"));
+print(re1.test("\x0E"));
+print(re1.test("\x10"));
+print(re1.test("\x1A"));
+print(re1.test("\x2F"));
+print(re1.test("\x8E"));
+print(re1.test("\x8F"));
+print(re1.test("\x9F"));
+print(re1.test("\xA0"));
+print(re1.test("\xAF"));
+print(re1.test("\uD800"));
+print(re1.test("\xDA00"));
+print(re1.test("\xDCFF"));
+print(re1.test("\xDFFF"));
+print(re1.test("\xFFFE"));
+print(re1.test("\xFFFF"));
+
+var re2 = /[\x1F\x7F-\x84\x86]/;
+
+print(re2.test("\x1F"));
+print(re2.test("\x2F"));
+print(re2.test("\x3F"));
+print(re2.test("\x7F"));
+print(re2.test("\x80"));
+print(re2.test("\x84"));
+print(re2.test("\x85"));
+print(re2.test("\x86"));
+
+var re3 = /^([\x00-\x7F]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8019963.js.EXPECTED	Mon Jul 08 19:34:55 2013 +0200
@@ -0,0 +1,29 @@
+true
+true
+true
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+false
+false
+true
+true
+true
+true
+true
+true
+true
+false
+false
+true
+true
+true
+false
+true