8016518: Parsing of octal string escapes is broken
authorhannesw
Thu, 13 Jun 2013 12:52:09 +0200
changeset 18329 392aaae366d6
parent 18328 ebd24057f163
child 18330 0d7d60a0eec6
8016518: Parsing of octal string escapes is broken Reviewed-by: sundar, lagergren
nashorn/src/jdk/nashorn/internal/parser/Lexer.java
nashorn/test/script/basic/JDK-8016518.js
nashorn/test/script/basic/JDK-8016518.js.EXPECTED
--- a/nashorn/src/jdk/nashorn/internal/parser/Lexer.java	Thu Jun 13 16:08:35 2013 +0530
+++ b/nashorn/src/jdk/nashorn/internal/parser/Lexer.java	Thu Jun 13 12:52:09 2013 +0200
@@ -666,37 +666,24 @@
 
 
     /**
-     * Get the value of a numeric sequence.
+     * Get the value of a hexadecimal numeric sequence.
      *
-     * @param base  Numeric base.
-     * @param max   Maximum number of digits.
-     * @param skip  Skip over escape first.
-     * @param check Tells whether to throw error if a digit is invalid for the given base.
-     * @param type  Type of token to report against.
-     *
+     * @param length Number of digits.
+     * @param type   Type of token to report against.
      * @return Value of sequence or < 0 if no digits.
      */
-    private int valueOfSequence(final int base, final int max, final boolean skip, final boolean check, final TokenType type) {
-        assert base == 16 || base == 8 : "base other than 16 or 8";
-        final boolean isHex = base == 16;
-        final int shift = isHex ? 4 : 3;
+    private int hexSequence(final int length, final TokenType type) {
         int value = 0;
 
-        if (skip) {
-            skip(2);
-        }
-
-        for (int i = 0; i < max; i++) {
-            final int digit = convertDigit(ch0, base);
+        for (int i = 0; i < length; i++) {
+            final int digit = convertDigit(ch0, 16);
 
             if (digit == -1) {
-                if (check) {
-                    error(Lexer.message("invalid." + (isHex ? "hex" : "octal")), type, position, limit);
-                }
+                error(Lexer.message("invalid.hex"), type, position, limit);
                 return i == 0 ? -1 : value;
             }
 
-            value = value << shift | digit;
+            value = digit | value << 4;
             skip(1);
         }
 
@@ -704,6 +691,30 @@
     }
 
     /**
+     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
+     *
+     * @return Value of sequence.
+     */
+    private int octalSequence() {
+        int value = 0;
+
+        for (int i = 0; i < 3; i++) {
+            final int digit = convertDigit(ch0, 8);
+
+            if (digit == -1) {
+                break;
+            }
+            value = digit | value << 3;
+            skip(1);
+
+            if (i == 1 && value >= 32) {
+                break;
+            }
+        }
+        return value;
+    }
+
+    /**
      * Convert a string to a JavaScript identifier.
      *
      * @param start  Position in source content.
@@ -724,7 +735,8 @@
         while (!atEOF() && position < end && !isEOL(ch0)) {
             // If escape character.
             if (ch0 == '\\' && ch1 == 'u') {
-                final int ch = valueOfSequence(16, 4, true, true, TokenType.IDENT);
+                skip(2);
+                final int ch = hexSequence(4, TokenType.IDENT);
                 if (isWhitespace((char)ch)) {
                     return null;
                 }
@@ -815,7 +827,7 @@
                     }
                     reset(afterSlash);
                     // Octal sequence.
-                    final int ch = valueOfSequence(8, 3, false, false, STRING);
+                    final int ch = octalSequence();
 
                     if (ch < 0) {
                         sb.append('\\');
@@ -862,7 +874,7 @@
                     break;
                 case 'x': {
                     // Hex sequence.
-                    final int ch = valueOfSequence(16, 2, false, true, STRING);
+                    final int ch = hexSequence(2, STRING);
 
                     if (ch < 0) {
                         sb.append('\\');
@@ -874,7 +886,7 @@
                     break;
                 case 'u': {
                     // Unicode sequence.
-                    final int ch = valueOfSequence(16, 4, false, true, STRING);
+                    final int ch = hexSequence(4, STRING);
 
                     if (ch < 0) {
                         sb.append('\\');
@@ -1191,7 +1203,8 @@
 
         // Make sure first character is valid start character.
         if (ch0 == '\\' && ch1 == 'u') {
-            final int ch = valueOfSequence(16, 4, true, true, TokenType.IDENT);
+            skip(2);
+            final int ch = hexSequence(4, TokenType.IDENT);
 
             if (!Character.isJavaIdentifierStart(ch)) {
                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
@@ -1204,7 +1217,8 @@
         // Make sure remaining characters are valid part characters.
         while (!atEOF()) {
             if (ch0 == '\\' && ch1 == 'u') {
-                final int ch = valueOfSequence(16, 4, true, true, TokenType.IDENT);
+                skip(2);
+                final int ch = hexSequence(4, TokenType.IDENT);
 
                 if (!Character.isJavaIdentifierPart(ch)) {
                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8016518.js	Thu Jun 13 12:52:09 2013 +0200
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * 
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ * 
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * 
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8016518: Parsing of octal string escapes is broken
+ *
+ * @test
+ * @run
+ */
+
+print("\471".charCodeAt(0));
+print("\471".charCodeAt(1));
+
+print("\377".length);
+print("\377".charCodeAt(0));
+print("\400".length);
+print("\400".charCodeAt(0));
+print("\400".charCodeAt(1));
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8016518.js.EXPECTED	Thu Jun 13 12:52:09 2013 +0200
@@ -0,0 +1,7 @@
+39
+49
+1
+255
+2
+32
+48