--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java Mon Jul 08 18:36:10 2013 +0530
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/CodeRangeBuffer.java Mon Jul 08 19:34:55 2013 +0200
@@ -183,7 +183,7 @@
// add_code_range, be aware of it returning null!
public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
- if (from >to) {
+ if (from > to) {
if (env.syntax.allowEmptyRangeInCC()) {
return pbuf;
} else {
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java Mon Jul 08 18:36:10 2013 +0530
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Parser.java Mon Jul 08 19:34:55 2013 +0200
@@ -125,32 +125,8 @@
break;
case RAW_BYTE:
- if (token.base != 0) { /* tok->base != 0 : octal or hexadec. */
- byte[] buf = new byte[4];
- int psave = p;
- int base = token.base;
- buf[0] = (byte)token.getC();
- int i;
- for (i=1; i<4; i++) {
- fetchTokenInCC();
- if (token.type != TokenType.RAW_BYTE || token.base != base) {
- fetched = true;
- break;
- }
- buf[i] = (byte)token.getC();
- }
-
- if (i == 1) {
- arg.v = buf[0] & 0xff;
- arg.inType = CCVALTYPE.SB; // goto raw_single
- } else {
- arg.v = EncodingHelper.mbcToCode(buf, 0, buf.length);
- arg.inType = CCVALTYPE.CODE_POINT;
- }
- } else {
- arg.v = token.getC();
- arg.inType = CCVALTYPE.SB; // raw_single:
- }
+ arg.v = token.getC();
+ arg.inType = CCVALTYPE.SB; // raw_single:
arg.vIsRaw = true;
parseCharClassValEntry2(cc, arg); // goto val_entry2
break;
@@ -615,31 +591,10 @@
StringNode node = new StringNode((char)token.getC());
node.setRaw();
- int len = 1;
- while (true) {
- if (len >= 1) {
- if (len == 1) {
- fetchToken();
- node.clearRaw();
- // !goto string_end;!
- return parseExpRepeat(node, group);
- }
- }
-
- fetchToken();
- if (token.type != TokenType.RAW_BYTE) {
- /* Don't use this, it is wrong for little endian encodings. */
- // USE_PAD_TO_SHORT_BYTE_CHAR ...
-
- newValueException(ERR_TOO_SHORT_MULTI_BYTE_STRING);
- }
-
- // important: we don't use 0xff mask here neither in the compiler
- // (in the template string) so we won't have to mask target
- // strings when comparing against them in the matcher
- node.cat((char)token.getC());
- len++;
- } // while
+ fetchToken();
+ node.clearRaw();
+ // !goto string_end;!
+ return parseExpRepeat(node, group);
}
private Node parseExpRepeat(Node target, boolean group) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8019963.js Mon Jul 08 19:34:55 2013 +0200
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8019963: empty char range in regex
+ *
+ * @test
+ * @run
+ */
+
+var re1 = /[\x00-\x08\x0B\x0C\x0E-\x9F\uD800-\uDFFF\uFFFE\uFFFF]/;
+
+print(re1.test("\x00"));
+print(re1.test("\x04"));
+print(re1.test("\x08"));
+print(re1.test("\x0a"));
+print(re1.test("\x0B"));
+print(re1.test("\x0C"));
+print(re1.test("\x0E"));
+print(re1.test("\x10"));
+print(re1.test("\x1A"));
+print(re1.test("\x2F"));
+print(re1.test("\x8E"));
+print(re1.test("\x8F"));
+print(re1.test("\x9F"));
+print(re1.test("\xA0"));
+print(re1.test("\xAF"));
+print(re1.test("\uD800"));
+print(re1.test("\xDA00"));
+print(re1.test("\xDCFF"));
+print(re1.test("\xDFFF"));
+print(re1.test("\xFFFE"));
+print(re1.test("\xFFFF"));
+
+var re2 = /[\x1F\x7F-\x84\x86]/;
+
+print(re2.test("\x1F"));
+print(re2.test("\x2F"));
+print(re2.test("\x3F"));
+print(re2.test("\x7F"));
+print(re2.test("\x80"));
+print(re2.test("\x84"));
+print(re2.test("\x85"));
+print(re2.test("\x86"));
+
+var re3 = /^([\x00-\x7F]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8019963.js.EXPECTED Mon Jul 08 19:34:55 2013 +0200
@@ -0,0 +1,29 @@
+true
+true
+true
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+false
+false
+true
+true
+true
+true
+true
+true
+true
+false
+false
+true
+true
+true
+false
+true