8023650: Regexp m flag does not recognize CRNL or CR
authorhannesw
Mon, 26 Aug 2013 15:59:41 +0200
changeset 19634 67426988370f
parent 19633 e1158153db51
child 19635 b1a895c53316
8023650: Regexp m flag does not recognize CRNL or CR Reviewed-by: jlaskey, lagergren
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java
nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java
nashorn/src/jdk/nashorn/tools/Shell.java
nashorn/test/script/basic/JDK-8023650.js
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java	Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java	Mon Aug 26 15:59:41 2013 +0200
@@ -26,7 +26,6 @@
 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
-import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl;
 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
 
 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
@@ -500,7 +499,7 @@
 
     private void opAnyChar() {
         if (s >= range) {opFail(); return;}
-        if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;}
+        if (isNewLine(chars[s])) {opFail(); return;}
         s++;
         sprev = sbegin; // break;
     }
@@ -538,7 +537,7 @@
         while (s < range) {
             char b = chars[s];
             if (c == b) pushAlt(ip + 1, s, sprev);
-            if (b == EncodingHelper.NEW_LINE) {opFail(); return;}
+            if (isNewLine(b)) {opFail(); return;}
             sprev = s;
             s++;
         }
@@ -617,7 +616,7 @@
         if (s == str) {
             if (isNotBol(msaOptions)) opFail();
             return;
-        } else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) {
+        } else if (isNewLine(chars, sprev, end) && s != end) {
             return;
         }
         opFail();
@@ -626,7 +625,7 @@
     private void opEndLine()  {
         if (s == end) {
             if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
-                if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) {
+                if (str == end || !isNewLine(chars, sprev, end)) {
                     if (isNotEol(msaOptions)) opFail();
                 }
                 return;
@@ -634,7 +633,7 @@
                 if (isNotEol(msaOptions)) opFail();
                 return;
             }
-        } else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) {
+        } else if (isNewLine(chars, s, end)) {
             return;
         }
         opFail();
@@ -653,9 +652,6 @@
             }
         } else if (isNewLine(chars, s, end) && s + 1 == end) {
             return;
-        } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) {
-            int ss = s + 2;
-            if (ss == end) return;
         }
         opFail();
     }
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java	Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java	Mon Aug 26 15:59:41 2013 +0200
@@ -29,7 +29,6 @@
     final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30);
     final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
     final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN;
-    final boolean USE_CRNL_AS_LINE_TERMINATOR = false;
 
     final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
     final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true;     /* /\n$/ =~ "\n" */
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java	Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java	Mon Aug 26 15:59:41 2013 +0200
@@ -24,10 +24,12 @@
 
 import java.util.Arrays;
 
-public class EncodingHelper {
+public final class EncodingHelper {
 
-    public final static char NEW_LINE = 0xa;
-    public final static char RETURN   = 0xd;
+    final static int NEW_LINE            = 0x000a;
+    final static int RETURN              = 0x000d;
+    final static int LINE_SEPARATOR      = 0x2028;
+    final static int PARAGRAPH_SEPARATOR = 0x2029;
 
     final static char[] EMPTYCHARS = new char[0];
     final static int[][] codeRanges = new int[15][];
@@ -64,15 +66,11 @@
     }
 
     public static boolean isNewLine(int code) {
-        return code == NEW_LINE;
+        return code == NEW_LINE || code == RETURN || code == LINE_SEPARATOR || code == PARAGRAPH_SEPARATOR;
     }
 
     public static boolean isNewLine(char[] chars, int p, int end) {
-        return p < end && chars[p] == NEW_LINE;
-    }
-
-    public static boolean isCrnl(char[] chars, int p, int end) {
-        return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE;
+        return p < end && isNewLine(chars[p]);
     }
 
     // Encoding.prevCharHead
@@ -194,7 +192,7 @@
         int type;
         switch (ctype) {
             case CharacterType.NEWLINE:
-                return code == EncodingHelper.NEW_LINE;
+                return isNewLine(code);
             case CharacterType.ALPHA:
                 return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
             case CharacterType.BLANK:
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java	Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java	Mon Aug 26 15:59:41 2013 +0200
@@ -732,7 +732,7 @@
                         if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
                         break;
                     case '$':
-                        if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
+                        if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
                         break;
                     case '[':
                         if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN;
--- a/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java	Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java	Mon Aug 26 15:59:41 2013 +0200
@@ -141,7 +141,7 @@
                                     continue retry;
                                 }
                             }
-                        } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
+                        } else if (!EncodingHelper.isNewLine(chars, p, end)) {
                             //if () break;
                             // goto retry_gate;
                             pprev = p;
@@ -226,7 +226,7 @@
                                     continue retry;
                                 }
                             }
-                        } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
+                        } else if (!EncodingHelper.isNewLine(chars, p, end)) {
                             p = EncodingHelper.prevCharHead(adjrange, p);
                             if (p == -1) return false;
                             continue retry;
@@ -330,12 +330,6 @@
                 maxSemiEnd = end;
                 if (EncodingHelper.isNewLine(chars, preEnd, end)) {
                     minSemiEnd = preEnd;
-                    if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
-                        preEnd = EncodingHelper.stepBack(str, preEnd, 1);
-                        if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
-                            minSemiEnd = preEnd;
-                        }
-                    }
                     if (minSemiEnd > str && start <= minSemiEnd) {
                         // !goto end_buf;!
                         if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
--- a/nashorn/src/jdk/nashorn/tools/Shell.java	Fri Aug 23 12:20:19 2013 -0300
+++ b/nashorn/src/jdk/nashorn/tools/Shell.java	Mon Aug 26 15:59:41 2013 +0200
@@ -445,7 +445,7 @@
                     continue;
                 }
 
-                if (res != null && res != ScriptRuntime.UNDEFINED) {
+                if (res != ScriptRuntime.UNDEFINED) {
                     err.println(JSType.toString(res));
                 }
             }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8023650.js	Mon Aug 26 15:59:41 2013 +0200
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * 
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ * 
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * 
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8023650: Regexp m flag does not recognize CRNL or CR
+ *
+ * @test
+ * @run
+ */
+
+if (!/^Connection: close$/m.test('\r\n\r\nConnection: close\r\n\r\n')) {
+    throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\n\nConnection: close\n\n')) {
+    throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\r\rConnection: close\r\r')) {
+    throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\u2028\u2028Connection: close\u2028\u2028')) {
+    throw new Error();
+}
+
+if (!/^Connection: close$/m.test('\u2029\u2029Connection: close\u2029\u2029')) {
+    throw new Error();
+}
+
+var result = /a(.*)/.exec("a\r");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/m.exec("a\r");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/.exec("a\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/m.exec("a\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/.exec("a\r\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/m.exec("a\r\n");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/.exec("a\u2028");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+result = /a(.*)/m.exec("a\u2029");
+if (!result || result[0] != 'a' || result[1] != '') {
+    throw new Error();
+}
+
+if (/a$/.test("a\n")) {
+    throw new Error();
+}
+
+if (/a$/.test("a\r")) {
+    throw new Error();
+}
+
+if (/a$/.test("a\r\n")) {
+    throw new Error();
+}
+
+if (/a$/.test("a\u2028")) {
+    throw new Error();
+}
+
+if (/a$/.test("a\u2029")) {
+    throw new Error();
+}