8023650: Regexp m flag does not recognize CRNL or CR
Reviewed-by: jlaskey, lagergren
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java Mon Aug 26 15:59:41 2013 +0200
@@ -26,7 +26,6 @@
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
-import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl;
import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
@@ -500,7 +499,7 @@
private void opAnyChar() {
if (s >= range) {opFail(); return;}
- if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;}
+ if (isNewLine(chars[s])) {opFail(); return;}
s++;
sprev = sbegin; // break;
}
@@ -538,7 +537,7 @@
while (s < range) {
char b = chars[s];
if (c == b) pushAlt(ip + 1, s, sprev);
- if (b == EncodingHelper.NEW_LINE) {opFail(); return;}
+ if (isNewLine(b)) {opFail(); return;}
sprev = s;
s++;
}
@@ -617,7 +616,7 @@
if (s == str) {
if (isNotBol(msaOptions)) opFail();
return;
- } else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) {
+ } else if (isNewLine(chars, sprev, end) && s != end) {
return;
}
opFail();
@@ -626,7 +625,7 @@
private void opEndLine() {
if (s == end) {
if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
- if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) {
+ if (str == end || !isNewLine(chars, sprev, end)) {
if (isNotEol(msaOptions)) opFail();
}
return;
@@ -634,7 +633,7 @@
if (isNotEol(msaOptions)) opFail();
return;
}
- } else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) {
+ } else if (isNewLine(chars, s, end)) {
return;
}
opFail();
@@ -653,9 +652,6 @@
}
} else if (isNewLine(chars, s, end) && s + 1 == end) {
return;
- } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) {
- int ss = s + 2;
- if (ss == end) return;
}
opFail();
}
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java Mon Aug 26 15:59:41 2013 +0200
@@ -29,7 +29,6 @@
final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30);
final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN;
- final boolean USE_CRNL_AS_LINE_TERMINATOR = false;
final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java Mon Aug 26 15:59:41 2013 +0200
@@ -24,10 +24,12 @@
import java.util.Arrays;
-public class EncodingHelper {
+public final class EncodingHelper {
- public final static char NEW_LINE = 0xa;
- public final static char RETURN = 0xd;
+ final static int NEW_LINE = 0x000a;
+ final static int RETURN = 0x000d;
+ final static int LINE_SEPARATOR = 0x2028;
+ final static int PARAGRAPH_SEPARATOR = 0x2029;
final static char[] EMPTYCHARS = new char[0];
final static int[][] codeRanges = new int[15][];
@@ -64,15 +66,11 @@
}
public static boolean isNewLine(int code) {
- return code == NEW_LINE;
+ return code == NEW_LINE || code == RETURN || code == LINE_SEPARATOR || code == PARAGRAPH_SEPARATOR;
}
public static boolean isNewLine(char[] chars, int p, int end) {
- return p < end && chars[p] == NEW_LINE;
- }
-
- public static boolean isCrnl(char[] chars, int p, int end) {
- return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE;
+ return p < end && isNewLine(chars[p]);
}
// Encoding.prevCharHead
@@ -194,7 +192,7 @@
int type;
switch (ctype) {
case CharacterType.NEWLINE:
- return code == EncodingHelper.NEW_LINE;
+ return isNewLine(code);
case CharacterType.ALPHA:
return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
case CharacterType.BLANK:
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java Mon Aug 26 15:59:41 2013 +0200
@@ -732,7 +732,7 @@
if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
break;
case '$':
- if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
+ if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
break;
case '[':
if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN;
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java Mon Aug 26 15:59:41 2013 +0200
@@ -141,7 +141,7 @@
continue retry;
}
}
- } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
+ } else if (!EncodingHelper.isNewLine(chars, p, end)) {
//if () break;
// goto retry_gate;
pprev = p;
@@ -226,7 +226,7 @@
continue retry;
}
}
- } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
+ } else if (!EncodingHelper.isNewLine(chars, p, end)) {
p = EncodingHelper.prevCharHead(adjrange, p);
if (p == -1) return false;
continue retry;
@@ -330,12 +330,6 @@
maxSemiEnd = end;
if (EncodingHelper.isNewLine(chars, preEnd, end)) {
minSemiEnd = preEnd;
- if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
- preEnd = EncodingHelper.stepBack(str, preEnd, 1);
- if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
- minSemiEnd = preEnd;
- }
- }
if (minSemiEnd > str && start <= minSemiEnd) {
// !goto end_buf;!
if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
--- a/nashorn/src/jdk/nashorn/tools/Shell.java Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/tools/Shell.java Mon Aug 26 15:59:41 2013 +0200
@@ -445,7 +445,7 @@
continue;
}
- if (res != null && res != ScriptRuntime.UNDEFINED) {
+ if (res != ScriptRuntime.UNDEFINED) {
err.println(JSType.toString(res));
}
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8023650.js Mon Aug 26 15:59:41 2013 +0200
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8023650: Regexp m flag does not recognize CRNL or CR
+ *
+ * @test
+ * @run
+ */
+
+if (!/^Connection: close$/m.test('\r\n\r\nConnection: close\r\n\r\n')) {
+ throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\n\nConnection: close\n\n')) {
+ throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\r\rConnection: close\r\r')) {
+ throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\u2028\u2028Connection: close\u2028\u2028')) {
+ throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\u2029\u2029Connection: close\u2029\u2029')) {
+ throw new Error();
+}
+
+var result = /a(.*)/.exec("a\r");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/m.exec("a\r");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/.exec("a\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/m.exec("a\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/.exec("a\r\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/m.exec("a\r\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/.exec("a\u2028");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+result = /a(.*)/m.exec("a\u2029");
+if (!result || result[0] != 'a' || result[1] != '') {
+ throw new Error();
+}
+
+if (/a$/.test("a\n")) {
+ throw new Error();
+}
+
+if (/a$/.test("a\r")) {
+ throw new Error();
+}
+
+if (/a$/.test("a\r\n")) {
+ throw new Error();
+}
+
+if (/a$/.test("a\u2028")) {
+ throw new Error();
+}
+
+if (/a$/.test("a\u2029")) {
+ throw new Error();
+}