8015347: Parsing issue with decodeURIComponent
authorsundar
Wed, 19 Jun 2013 21:07:59 +0530
changeset 18607 e85e37a9ac0a
parent 18606 5704d7a4a0a8
child 18608 cb5ba414975f
8015347: Parsing issue with decodeURIComponent Reviewed-by: jlaskey, hannesw
nashorn/src/jdk/nashorn/internal/runtime/URIUtils.java
nashorn/test/script/basic/JDK-8015347.js
--- a/nashorn/src/jdk/nashorn/internal/runtime/URIUtils.java	Wed Jun 19 09:10:49 2013 -0300
+++ b/nashorn/src/jdk/nashorn/internal/runtime/URIUtils.java	Wed Jun 19 21:07:59 2013 +0530
@@ -27,8 +27,6 @@
 
 import static jdk.nashorn.internal.runtime.ECMAErrors.uriError;
 
-import java.io.UnsupportedEncodingException;
-
 /**
  * URI handling global functions. ECMA 15.1.3 URI Handling Function Properties
  *
@@ -127,6 +125,7 @@
 
             k += 2;
             char C;
+            // Most significant bit is zero
             if ((B & 0x80) == 0) {
                 C = (char) B;
                 if (!component && URI_RESERVED.indexOf(C) >= 0) {
@@ -137,49 +136,68 @@
                     sb.append(C);
                 }
             } else {
-                int n;
-                for (n = 1; n < 6; n++) {
-                    if (((B << n) & 0x80) == 0) {
-                        break;
-                    }
-                }
+                // n is utf8 length, V is codepoint and minV is lower bound
+                int n, V, minV;
 
-                if (n == 1 || n > 4) {
+                if ((B & 0xC0) == 0x80) {
+                    // 10xxxxxx - illegal first byte
+                    return error(string, k);
+                } else if ((B & 0x20) == 0) {
+                    // 110xxxxx 10xxxxxx
+                    n = 2;
+                    V = B & 0x1F;
+                    minV = 0x80;
+                } else if ((B & 0x10) == 0) {
+                    // 1110xxxx 10xxxxxx 10xxxxxx
+                    n = 3;
+                    V = B & 0x0F;
+                    minV = 0x800;
+                } else if ((B & 0x08) == 0) {
+                    // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    n = 4;
+                    V = B & 0x07;
+                    minV = 0x10000;
+                } else if ((B & 0x04) == 0) {
+                    // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    n = 5;
+                    V =  B & 0x03;
+                    minV = 0x200000;
+                } else if ((B & 0x02) == 0) {
+                    // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                    n = 6;
+                    V = B & 0x01;
+                    minV = 0x4000000;
+                } else {
                     return error(string, k);
                 }
 
-                if ((k + (3 * (n - 1))) >= len) {
+                // check bound for sufficient chars
+                if (k + (3*(n-1)) >= len) {
                     return error(string, k);
                 }
 
-                final byte[] bbuf = new byte[n];
-                bbuf[0] = (byte) B;
-
                 for (int j = 1; j < n; j++) {
                     k++;
                     if (string.charAt(k) != '%') {
                         return error(string, k);
                     }
 
-                    if (k + 2 == len) {
-                        return error(string, k);
-                    }
-
                     B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
                     if (B < 0 || (B & 0xC0) != 0x80) {
                         return error(string, k + 1);
                     }
 
+                    V = (V << 6) | (B & 0x3F);
                     k += 2;
-                    bbuf[j] = (byte) B;
                 }
 
-                int V;
-                try {
-                    V = ucs4Char(bbuf);
-                } catch (final Exception e) {
-                    throw uriError(e, "bad.uri", string, Integer.toString(k));
+                // Check for overlongs and invalid codepoints.
+                // The high and low surrogate halves used by UTF-16
+                // (U+D800 through U+DFFF) are not legal Unicode values.
+                if ((V < minV) || (V >= 0xD800 && V <= 0xDFFF)) {
+                    V = Integer.MAX_VALUE;
                 }
+
                 if (V < 0x10000) {
                     C = (char) V;
                     if (!component && URI_RESERVED.indexOf(C) >= 0) {
@@ -224,10 +242,6 @@
         return -1;
     }
 
-    private static int ucs4Char(final byte[] utf8) throws UnsupportedEncodingException {
-        return new String(utf8, "UTF-8").codePointAt(0);
-    }
-
     private static String toHexEscape(final int u0) {
         int u = u0;
         int len;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/basic/JDK-8015347.js	Wed Jun 19 21:07:59 2013 +0530
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ * 
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ * 
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ * 
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * 
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * JDK-8015347: Parsing issue with decodeURIComponent
+ *
+ * @test
+ * @run
+ */
+
+try {
+    decodeURIComponent("%C0%80");
+    fail("Should have thrown URIError");
+} catch (e) {
+    if (! (e instanceof URIError)) {
+        fail("Expected URIError, but got " + e);
+    }
+}
+