8144221: fix Nashorn shebang argument handling on Mac/Linux
authormhaupt
Mon, 14 Dec 2015 14:02:59 +0100
changeset 34731 585a112b3719
parent 34572 4edcff1b9a88
child 34732 6605efbe8447
8144221: fix Nashorn shebang argument handling on Mac/Linux Reviewed-by: jlaskey, lagergren
nashorn/make/build.xml
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/ScriptingFunctions.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/tools/Shell.java
nashorn/test/script/nosecurity/JDK-8144221.js
nashorn/test/script/nosecurity/JDK-8144221.js.EXPECTED
nashorn/test/script/nosecurity/os-not-windows.js
--- a/nashorn/make/build.xml	Wed Jul 05 21:07:43 2017 +0200
+++ b/nashorn/make/build.xml	Mon Dec 14 14:02:59 2015 +0100
@@ -48,12 +48,12 @@
     <condition property="git.executable" value="/usr/local/bin/git" else="git">
       <available file="/usr/local/bin/git"/>
     </condition>
-    <!-- check if testng.jar is avaiable, and download it if it isn't -->
+    <!-- check if testng.jar is available, and download it if it isn't -->
     <available property="testng.already.present" file="${file.reference.testng.jar}"/>
     <antcall target="get-testng"/>
     <available property="testng.available" file="${file.reference.testng.jar}"/>
 
-    <!-- check if asmtools-6.0.jar is avaiable, and download it if it isn't -->
+    <!-- check if asmtools-6.0.jar is available, and download it if it isn't -->
     <!--
     <available property="asmtools.already.present" file="${file.reference.asmtools.jar}"/>
     <antcall target="get-asmtools"/>
@@ -84,6 +84,12 @@
     <condition property="jfr.options" value="${run.test.jvmargs.jfr}" else="">
       <istrue value="${jfr}"/>
     </condition>
+
+    <condition property="test-sys-prop-no-security.os.not.windows">
+      <not>
+        <os family="windows"/>
+      </not>
+    </condition>
   </target>
 
   <!-- check minimum ant version required to be 1.8.4 -->
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/ScriptingFunctions.java	Wed Jul 05 21:07:43 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/ScriptingFunctions.java	Mon Dec 14 14:02:59 2015 +0100
@@ -278,9 +278,8 @@
      * @param str a {@link String} to tokenize.
      * @return a {@link List} of {@link String}s representing the tokens that
      * constitute the string.
-     * @throws IOException in case {@link StreamTokenizer#nextToken()} raises it.
      */
-    public static List<String> tokenizeString(final String str) throws IOException {
+    public static List<String> tokenizeString(final String str) {
         final StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(str));
         tokenizer.resetSyntax();
         tokenizer.wordChars(0, 255);
@@ -290,7 +289,7 @@
         tokenizer.quoteChar('\'');
         final List<String> tokenList = new ArrayList<>();
         final StringBuilder toAppend = new StringBuilder();
-        while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
+        while (nextToken(tokenizer) != StreamTokenizer.TT_EOF) {
             final String s = tokenizer.sval;
             // The tokenizer understands about honoring quoted strings and recognizes
             // them as one token that possibly contains multiple space-separated words.
@@ -309,4 +308,12 @@
         }
         return tokenList;
     }
+
+    private static int nextToken(final StreamTokenizer tokenizer) {
+        try {
+            return tokenizer.nextToken();
+        } catch (final IOException ioe) {
+            return StreamTokenizer.TT_EOF;
+        }
+    }
 }
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/tools/Shell.java	Wed Jul 05 21:07:43 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/tools/Shell.java	Mon Dec 14 14:02:59 2015 +0100
@@ -25,23 +25,6 @@
 
 package jdk.nashorn.tools;
 
-import static jdk.nashorn.internal.runtime.Source.sourceFor;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.io.PrintWriter;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.*;
-import java.util.stream.Collectors;
-
 import jdk.nashorn.api.scripting.NashornException;
 import jdk.nashorn.internal.codegen.Compiler;
 import jdk.nashorn.internal.codegen.Compiler.CompilationPhases;
@@ -60,10 +43,32 @@
 import jdk.nashorn.internal.runtime.ScriptFunction;
 import jdk.nashorn.internal.runtime.ScriptObject;
 import jdk.nashorn.internal.runtime.ScriptRuntime;
+import jdk.nashorn.internal.runtime.ScriptingFunctions;
 import jdk.nashorn.internal.runtime.Symbol;
 import jdk.nashorn.internal.runtime.arrays.ArrayLikeIterator;
 import jdk.nashorn.internal.runtime.options.Options;
 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import static jdk.nashorn.internal.runtime.Source.sourceFor;
+
 /**
  * Command line Shell for processing JavaScript files.
  */
@@ -203,8 +208,7 @@
         // parse options
         if (args != null) {
             try {
-                // FIXME: preprocessArgs does not yet work fine
-                final String[] prepArgs = args; // preprocessArgs(args);
+                final String[] prepArgs = preprocessArgs(args);
                 options.process(prepArgs);
             } catch (final IllegalArgumentException e) {
                 werr.println(bundle.getString("shell.usage"));
@@ -236,35 +240,53 @@
     }
 
     /**
-     * Preprocess the command line arguments passed in by the shell. This checks, for each of the arguments, whether it
-     * can be a file name, and if so, whether the file exists. If the file exists and begins with a shebang line, and
-     * the arguments on that line are a prefix of {@code args} with the file removed, it is assumed that a script file
-     * being executed via shebang was found, and it is moved to the appropriate position in the argument list. The first
-     * such match is used.
+     * Preprocess the command line arguments passed in by the shell. This method checks, for the first non-option
+     * argument, whether the file denoted by it begins with a shebang line. If so, it is assumed that execution in
+     * shebang mode is intended. The consequence of this is that the identified script file will be treated as the
+     * <em>only</em> script file, and all subsequent arguments will be regarded as arguments to the script.
      * <p>
-     * This method canonicalizes the command line arguments to the form {@code <options> <scripts> -- <arguments>},
-     * where the last of the {@code scripts} is the one being run in shebang fashion.
+     * This method canonicalizes the command line arguments to the form {@code <options> <script> -- <arguments>} if a
+     * shebang script is identified. On platforms that pass shebang arguments as single strings, the shebang arguments
+     * will be broken down into single arguments; whitespace is used as separator.
+     * <p>
+     * Shebang mode is entered regardless of whether the script is actually run directly from the shell, or indirectly
+     * via the {@code jjs} executable. It is the user's / script author's responsibility to ensure that the arguments
+     * given on the shebang line do not lead to a malformed argument sequence. In particular, the shebang arguments
+     * should not contain any whitespace for purposes other than separating arguments, as the different platforms deal
+     * with whitespace in different and incompatible ways.
      * <p>
      * @implNote Example:<ul>
-     * <li>Shebang line in {@code script.js}: {@code #!/path/to/jjs --language=es6 other.js -- arg1}</li>
+     * <li>Shebang line in {@code script.js}: {@code #!/path/to/jjs --language=es6}</li>
      * <li>Command line: {@code ./script.js arg2}</li>
-     * <li>{@code args} array passed to Nashorn: {@code --language=es6,other.js,--,arg1,./script.js,arg2}</li>
-     * <li>Required canonicalized arguments array: {@code --language=es6,other.js,./script.js,--,arg1,arg2}</li>
+     * <li>{@code args} array passed to Nashorn: {@code --language=es6,./script.js,arg}</li>
+     * <li>Required canonicalized arguments array: {@code --language=es6,./script.js,--,arg2}</li>
      * </ul>
      *
      * @param args the command line arguments as passed into Nashorn.
-     * @return a properly ordered argument list
+     * @return the passed and possibly canonicalized argument list
      */
     private static String[] preprocessArgs(final String[] args) {
-        final List<String> largs = new ArrayList<>();
-        Collections.addAll(largs, args);
-        final List<String> pa = new ArrayList<>();
-        String scriptFile = null;
-        boolean found = false;
-        for (int i = 0; i < args.length; ++i) {
-            final String a = args[i];
-            final Path p = Paths.get(a);
-            if (!found && (!a.startsWith("-") || a.length() == 1) && Files.exists(p)) {
+        if (args.length == 0) {
+            return args;
+        }
+
+        final List<String> processedArgs = new ArrayList<>();
+        processedArgs.addAll(Arrays.asList(args));
+
+        // Nashorn supports passing multiple shebang arguments. On platforms that pass anything following the
+        // shebang interpreter notice as one argument, the first element of the argument array needs to be special-cased
+        // as it might actually contain several arguments. Mac OS X splits shebang arguments, other platforms don't.
+        // This special handling is also only necessary if the first argument actually starts with an option.
+        if (args[0].startsWith("-") && !System.getProperty("os.name", "generic").startsWith("Mac OS X")) {
+            processedArgs.addAll(0, ScriptingFunctions.tokenizeString(processedArgs.remove(0)));
+        }
+
+        int shebangFilePos = -1; // -1 signifies "none found"
+        // identify a shebang file and its position in the arguments array (if any)
+        for (int i = 0; i < processedArgs.size(); ++i) {
+            final String a = processedArgs.get(i);
+            if (!a.startsWith("-")) {
+                final Path p = Paths.get(a);
                 String l = "";
                 try (final BufferedReader r = Files.newBufferedReader(p)) {
                     l = r.readLine();
@@ -272,35 +294,18 @@
                     // ignore
                 }
                 if (l.startsWith("#!")) {
-                    List<String> shebangArgs = Arrays.asList(l.split(" "));
-                    shebangArgs = shebangArgs.subList(1, shebangArgs.size()); // remove #! part
-                    final int ssize = shebangArgs.size();
-                    final List<String> filteredArgs = largs.stream().filter(x -> !x.equals(a)).collect(Collectors.toList());
-                    if (filteredArgs.size() >= ssize && shebangArgs.equals(filteredArgs.subList(0, ssize))) {
-                        scriptFile = a;
-                        found = true;
-                        continue;
-                    }
+                    shebangFilePos = i;
                 }
+                // We're only checking the first non-option argument. If it's not a shebang file, we're in normal
+                // execution mode.
+                break;
             }
-            pa.add(a);
         }
-        if (scriptFile != null) {
-            // Insert the found script file name either before a -- argument, or at the end of the options list, before
-            // any other arguments, with an extra --.
-            int argidx = pa.indexOf("--");
-            if (argidx == -1) {
-                for (String s : pa) {
-                    ++argidx;
-                    if (s.charAt(0) != '-') {
-                        pa.add(argidx, "--");
-                        break;
-                    }
-                }
-            }
-            pa.add(argidx, scriptFile);
+        if (shebangFilePos != -1) {
+            // Insert the argument separator after the shebang script file.
+            processedArgs.add(shebangFilePos + 1, "--");
         }
-        return pa.stream().toArray(String[]::new);
+        return processedArgs.stream().toArray(String[]::new);
     }
 
     /**
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/nosecurity/JDK-8144221.js	Mon Dec 14 14:02:59 2015 +0100
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Test that shebang handling works properly.
+ *
+ * @test
+ * @runif os.not.windows
+ * @option -scripting
+ * @run
+ */
+
+// The test generates three different JavaScript source files. The first two
+// are generated at the beginning of the test and do not change.
+// * a.js
+//   print("A: " + arguments)
+// * b.js
+//   #!<path_to_jjs> -lalelu -- ignore
+//   print("B: " + arguments)
+//
+// The third file, shebang.js, is generated differently for each particular
+// test case, containing different shebang lines and one statement:
+// * shebang.js
+//   #!<path_to_jjs> <shebang_line>
+//   print("S: " + arguments)
+//
+// The path_to_jjs is extracted from the environment based on JAVA_HOME, so the
+// latter must be set properly.
+//
+// Each shebang.js is run four times, in all possible combinations of values
+// from the following two axes:
+// * without passing any arguments, and passing the arguments 'a.js' and
+//   '"hello world"' (the latter being a quoted string);
+// * run via jjs, and via direct shell execution (using shebang).
+
+var pseudosheb  = "#!${jjs} -lalelu -- ignore",
+    System      = Java.type('java.lang.System'),
+    Paths       = Java.type('java.nio.file.Paths'),
+    Files       = Java.type('java.nio.file.Files'),
+    Opt         = Java.type('java.nio.file.StandardOpenOption'),
+    Arrays      = Java.type('java.util.Arrays')
+
+var sep      = Java.type('java.io.File').separator,
+    win      = System.getProperty("os.name").startsWith("Windows"),
+    jjsName  = "jjs" + (win ? ".exe" : ""),
+    javaHome = System.getProperty("java.home")
+
+var jjs = javaHome + "/../bin/".replace(/\//g, sep) + jjsName
+if (!Files.exists(Paths.get(jjs))) {
+    jjs = javaHome + "/bin/".replace(/\//g, sep) + jjsName
+}
+
+// Create and cwd to a temporary directory.
+
+var tmpdir = Files.createTempDirectory(null),
+    tmp    = tmpdir.toAbsolutePath().toString(),
+    curpwd = $ENV.PWD
+
+$ENV.PWD = tmp
+
+// Test cases. Each case is documented with the expected output for the four
+// different executions.
+
+var shebs = [
+        // No arguments on the shebang line.
+        // noargs jjs/shebang -> no output but "S" prefix
+        // args jjs/shebang   -> output the arguments with "S" prefix
+        "",
+        // One interpreter argument.
+        // noargs jjs/shebang -> no output but "S" prefix
+        // args jjs/shebang   -> output the arguments with "S" prefix
+        "--language=es6",
+        // Two interpreter arguments.
+        // noargs jjs/shebang -> no output but "S" prefix
+        // args jjs/shebang   -> output the arguments with "S" prefix
+        "--language=es6 -scripting",
+        // One interpreter argument and a JavaScript file without shebang.
+        // (For shebang execution, this is a pathological example, as the
+        // JavaScript file passed as a shebang argument will be analyzed and
+        // shebang mode will not be entered.)
+        // noargs jjs     -> no output but "S" prefix
+        // args jjs       -> output the arguments with "S" prefix
+        // noargs shebang -> no output but "A" and "S" prefixes
+        // args shebang   -> output "A", "S", and "A" prefixes, then the error
+        //                   message:
+        //                   "java.io.IOException: hello world is not a file"
+        "-scripting a.js",
+        // One interpreter argument and a JavaScript file with shebang. (This
+        // is another pathological example, as it will force shebang mode,
+        // leading to all subsequent arguments, including shebang.js, being
+        // treated as arguments to the script b.js.)
+        // noargs jjs     -> no output but the "S" prefix
+        // args jjs       -> output the arguments with "S" prefix
+        // noargs shebang -> output shebang.js with "B" prefix
+        // args shebang   -> output shebang.js and the arguments with "B"
+        //                   prefix
+        "-scripting b.js"
+    ]
+
+function write(file, lines) {
+    Files.write(Paths.get(tmp, file), Arrays.asList(lines), Opt.CREATE, Opt.WRITE)
+}
+
+function insn(name) {
+    return "print('${name}:' + arguments)"
+}
+
+function run(viajjs, name, arg1, arg2) {
+    var prefix = viajjs ? "${jjs} -scripting " : ''
+    $EXEC("${prefix}./shebang.js ${arg1} ${arg2}")
+    print("* ${name} via ${viajjs ? 'jjs' : 'shebang'}")
+    print($OUT.trim())
+    print($ERR.trim())
+}
+
+write('a.js', insn('A'))
+write('b.js', [pseudosheb, insn('B')])
+
+shebs.forEach(function(sheb) {
+    var shebang = "#!${jjs} ${sheb}"
+    print("<<< ${sheb} >>>")
+    write('shebang.js', [shebang, insn('S')])
+    $EXEC('chmod +x shebang.js')
+    run(false, 'noargs', '', '')
+    run(true, 'noargs', '', '')
+    run(false, 'withargs', 'a.js', '"hello world"')
+    run(true, 'withargs', 'a.js', '"hello world"')
+    $EXEC('rm shebang.js')
+})
+
+// Cleanup.
+
+$EXEC('rm a.js b.js')
+$ENV.PWD = curpwd
+Files.delete(tmpdir)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/nosecurity/JDK-8144221.js.EXPECTED	Mon Dec 14 14:02:59 2015 +0100
@@ -0,0 +1,68 @@
+<<<  >>>
+* noargs via shebang
+S:
+
+* noargs via jjs
+S:
+
+* withargs via shebang
+S:a.js,hello world
+
+* withargs via jjs
+S:a.js,hello world
+
+<<< --language=es6 >>>
+* noargs via shebang
+S:
+
+* noargs via jjs
+S:
+
+* withargs via shebang
+S:a.js,hello world
+
+* withargs via jjs
+S:a.js,hello world
+
+<<< --language=es6 -scripting >>>
+* noargs via shebang
+S:
+
+* noargs via jjs
+S:
+
+* withargs via shebang
+S:a.js,hello world
+
+* withargs via jjs
+S:a.js,hello world
+
+<<< -scripting a.js >>>
+* noargs via shebang
+A:
+S:
+
+* noargs via jjs
+S:
+
+* withargs via shebang
+A:
+S:
+A:
+java.io.IOException: hello world is not a file
+* withargs via jjs
+S:a.js,hello world
+
+<<< -scripting b.js >>>
+* noargs via shebang
+B:./shebang.js
+
+* noargs via jjs
+S:
+
+* withargs via shebang
+B:./shebang.js,a.js,hello world
+
+* withargs via jjs
+S:a.js,hello world
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/script/nosecurity/os-not-windows.js	Mon Dec 14 14:02:59 2015 +0100
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Test that we're not running on Windows. The test actually checks if the os.not.windows property is set and processed
+ * by runif correctly.
+ *
+ * @test
+ * @runif os.not.windows
+ * @run
+ */
+
+var os = java.lang.System.getProperty("os.name")
+
+if (os.startsWith("Windows")) {
+    throw "This test should not be run on Windows."
+}