jdk/src/solaris/native/java/lang/UNIXProcess_md.c
changeset 19399 e2e5122cd62e
parent 16860 8fecebee12b0
child 22597 7515a991bb37
equal deleted inserted replaced
19398:0cd1d4845b2d 19399:e2e5122cd62e
    41 #include <sys/types.h>
    41 #include <sys/types.h>
    42 #include <ctype.h>
    42 #include <ctype.h>
    43 #include <sys/wait.h>
    43 #include <sys/wait.h>
    44 #include <signal.h>
    44 #include <signal.h>
    45 #include <string.h>
    45 #include <string.h>
    46 #include <errno.h>
    46 
    47 #include <dirent.h>
    47 #if defined(__solaris__) || defined(_ALLBSD_SOURCE)
    48 #include <unistd.h>
    48 #include <spawn.h>
    49 #include <fcntl.h>
    49 #endif
    50 #include <limits.h>
    50 
    51 
    51 #include "childproc.h"
    52 #ifdef __APPLE__
       
    53 #include <crt_externs.h>
       
    54 #define environ (*_NSGetEnviron())
       
    55 #else
       
    56 /* This is one of the rare times it's more portable to declare an
       
    57  * external symbol explicitly, rather than via a system header.
       
    58  * The declaration is standardized as part of UNIX98, but there is
       
    59  * no standard (not even de-facto) header file where the
       
    60  * declaration is to be found.  See:
       
    61  * http://www.opengroup.org/onlinepubs/009695399/functions/environ.html
       
    62  * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_02.html
       
    63  *
       
    64  * "All identifiers in this volume of IEEE Std 1003.1-2001, except
       
    65  * environ, are defined in at least one of the headers" (!)
       
    66  */
       
    67 extern char **environ;
       
    68 #endif
       
    69 
    52 
    70 /*
    53 /*
    71  * There are 3 possible strategies we might use to "fork":
    54  * There are 4 possible strategies we might use to "fork":
    72  *
    55  *
    73  * - fork(2).  Very portable and reliable but subject to
    56  * - fork(2).  Very portable and reliable but subject to
    74  *   failure due to overcommit (see the documentation on
    57  *   failure due to overcommit (see the documentation on
    75  *   /proc/sys/vm/overcommit_memory in Linux proc(5)).
    58  *   /proc/sys/vm/overcommit_memory in Linux proc(5)).
    76  *   This is the ancient problem of spurious failure whenever a large
    59  *   This is the ancient problem of spurious failure whenever a large
   101  *     #  Error: pthread_getattr_np failed with errno = 3 (ESRCH)
    84  *     #  Error: pthread_getattr_np failed with errno = 3 (ESRCH)
   102  *   We believe this is a glibc bug, reported here:
    85  *   We believe this is a glibc bug, reported here:
   103  *     http://sources.redhat.com/bugzilla/show_bug.cgi?id=10311
    86  *     http://sources.redhat.com/bugzilla/show_bug.cgi?id=10311
   104  *   but the glibc maintainers closed it as WONTFIX.
    87  *   but the glibc maintainers closed it as WONTFIX.
   105  *
    88  *
       
    89  * - posix_spawn(). While posix_spawn() is a fairly elaborate and
       
    90  *   complicated system call, it can't quite do everything that the old
       
    91  *   fork()/exec() combination can do, so the only feasible way to do
       
    92  *   this, is to use posix_spawn to launch a new helper executable
       
    93  *   "jprochelper", which in turn execs the target (after cleaning
       
    94  *   up file-descriptors etc.) The end result is the same as before,
       
    95  *   a child process linked to the parent in the same way, but it
       
    96  *   avoids the problem of duplicating the parent (VM) process
       
    97  *   address space temporarily, before launching the target command.
       
    98  *
   106  * Based on the above analysis, we are currently using vfork() on
    99  * Based on the above analysis, we are currently using vfork() on
   107  * Linux and fork() on other Unix systems, but the code to use clone()
   100  * Linux and spawn() on other Unix systems, but the code to use clone()
   108  * remains.
   101  * and fork() remains.
   109  */
   102  */
   110 
       
   111 #define START_CHILD_USE_CLONE 0  /* clone() currently disabled; see above. */
       
   112 
       
   113 #ifndef START_CHILD_USE_CLONE
       
   114   #ifdef __linux__
       
   115     #define START_CHILD_USE_CLONE 1
       
   116   #else
       
   117     #define START_CHILD_USE_CLONE 0
       
   118   #endif
       
   119 #endif
       
   120 
       
   121 /* By default, use vfork() on Linux. */
       
   122 #ifndef START_CHILD_USE_VFORK
       
   123   #ifdef __linux__
       
   124     #define START_CHILD_USE_VFORK 1
       
   125   #else
       
   126     #define START_CHILD_USE_VFORK 0
       
   127   #endif
       
   128 #endif
       
   129 
       
   130 #if START_CHILD_USE_CLONE
       
   131 #include <sched.h>
       
   132 #define START_CHILD_SYSTEM_CALL "clone"
       
   133 #elif START_CHILD_USE_VFORK
       
   134 #define START_CHILD_SYSTEM_CALL "vfork"
       
   135 #else
       
   136 #define START_CHILD_SYSTEM_CALL "fork"
       
   137 #endif
       
   138 
       
   139 #ifndef STDIN_FILENO
       
   140 #define STDIN_FILENO 0
       
   141 #endif
       
   142 
       
   143 #ifndef STDOUT_FILENO
       
   144 #define STDOUT_FILENO 1
       
   145 #endif
       
   146 
       
   147 #ifndef STDERR_FILENO
       
   148 #define STDERR_FILENO 2
       
   149 #endif
       
   150 
       
   151 #ifndef SA_NOCLDSTOP
       
   152 #define SA_NOCLDSTOP 0
       
   153 #endif
       
   154 
       
   155 #ifndef SA_RESTART
       
   156 #define SA_RESTART 0
       
   157 #endif
       
   158 
       
   159 #define FAIL_FILENO (STDERR_FILENO + 1)
       
   160 
       
   161 /* TODO: Refactor. */
       
   162 #define RESTARTABLE(_cmd, _result) do { \
       
   163   do { \
       
   164     _result = _cmd; \
       
   165   } while((_result == -1) && (errno == EINTR)); \
       
   166 } while(0)
       
   167 
   103 
   168 
   104 
   169 static void
   105 static void
   170 setSIGCHLDHandler(JNIEnv *env)
   106 setSIGCHLDHandler(JNIEnv *env)
   171 {
   107 {
   264     }
   200     }
   265     pathv[count] = NULL;
   201     pathv[count] = NULL;
   266     return pathv;
   202     return pathv;
   267 }
   203 }
   268 
   204 
   269 /**
       
   270  * The cached and split version of the JDK's effective PATH.
       
   271  * (We don't support putenv("PATH=...") in native code)
       
   272  */
       
   273 static const char * const *parentPathv;
       
   274 
       
   275 JNIEXPORT void JNICALL
   205 JNIEXPORT void JNICALL
   276 Java_java_lang_UNIXProcess_init(JNIEnv *env, jclass clazz)
   206 Java_java_lang_UNIXProcess_init(JNIEnv *env, jclass clazz)
   277 {
   207 {
   278     parentPathv = effectivePathv(env);
   208     parentPathv = effectivePathv(env);
   279 
       
   280     setSIGCHLDHandler(env);
   209     setSIGCHLDHandler(env);
   281 }
   210 }
   282 
   211 
   283 
   212 
   284 #ifndef WIFEXITED
   213 #ifndef WIFEXITED
   341          */
   270          */
   342         return status;
   271         return status;
   343     }
   272     }
   344 }
   273 }
   345 
   274 
   346 static ssize_t
       
   347 restartableWrite(int fd, const void *buf, size_t count)
       
   348 {
       
   349     ssize_t result;
       
   350     RESTARTABLE(write(fd, buf, count), result);
       
   351     return result;
       
   352 }
       
   353 
       
   354 static int
       
   355 restartableDup2(int fd_from, int fd_to)
       
   356 {
       
   357     int err;
       
   358     RESTARTABLE(dup2(fd_from, fd_to), err);
       
   359     return err;
       
   360 }
       
   361 
       
   362 static int
       
   363 restartableClose(int fd)
       
   364 {
       
   365     int err;
       
   366     RESTARTABLE(close(fd), err);
       
   367     return err;
       
   368 }
       
   369 
       
   370 static int
       
   371 closeSafely(int fd)
       
   372 {
       
   373     return (fd == -1) ? 0 : restartableClose(fd);
       
   374 }
       
   375 
       
   376 static int
       
   377 isAsciiDigit(char c)
       
   378 {
       
   379   return c >= '0' && c <= '9';
       
   380 }
       
   381 
       
   382 #ifdef _ALLBSD_SOURCE
       
   383 #define FD_DIR "/dev/fd"
       
   384 #define dirent64 dirent
       
   385 #define readdir64 readdir
       
   386 #else
       
   387 #define FD_DIR "/proc/self/fd"
       
   388 #endif
       
   389 
       
   390 static int
       
   391 closeDescriptors(void)
       
   392 {
       
   393     DIR *dp;
       
   394     struct dirent64 *dirp;
       
   395     int from_fd = FAIL_FILENO + 1;
       
   396 
       
   397     /* We're trying to close all file descriptors, but opendir() might
       
   398      * itself be implemented using a file descriptor, and we certainly
       
   399      * don't want to close that while it's in use.  We assume that if
       
   400      * opendir() is implemented using a file descriptor, then it uses
       
   401      * the lowest numbered file descriptor, just like open().  So we
       
   402      * close a couple explicitly.  */
       
   403 
       
   404     restartableClose(from_fd);          /* for possible use by opendir() */
       
   405     restartableClose(from_fd + 1);      /* another one for good luck */
       
   406 
       
   407     if ((dp = opendir(FD_DIR)) == NULL)
       
   408         return 0;
       
   409 
       
   410     /* We use readdir64 instead of readdir to work around Solaris bug
       
   411      * 6395699: /proc/self/fd fails to report file descriptors >= 1024 on Solaris 9
       
   412      */
       
   413     while ((dirp = readdir64(dp)) != NULL) {
       
   414         int fd;
       
   415         if (isAsciiDigit(dirp->d_name[0]) &&
       
   416             (fd = strtol(dirp->d_name, NULL, 10)) >= from_fd + 2)
       
   417             restartableClose(fd);
       
   418     }
       
   419 
       
   420     closedir(dp);
       
   421 
       
   422     return 1;
       
   423 }
       
   424 
       
   425 static int
       
   426 moveDescriptor(int fd_from, int fd_to)
       
   427 {
       
   428     if (fd_from != fd_to) {
       
   429         if ((restartableDup2(fd_from, fd_to) == -1) ||
       
   430             (restartableClose(fd_from) == -1))
       
   431             return -1;
       
   432     }
       
   433     return 0;
       
   434 }
       
   435 
       
   436 static const char *
   275 static const char *
   437 getBytes(JNIEnv *env, jbyteArray arr)
   276 getBytes(JNIEnv *env, jbyteArray arr)
   438 {
   277 {
   439     return arr == NULL ? NULL :
   278     return arr == NULL ? NULL :
   440         (const char*) (*env)->GetByteArrayElements(env, arr, NULL);
   279         (const char*) (*env)->GetByteArrayElements(env, arr, NULL);
   443 static void
   282 static void
   444 releaseBytes(JNIEnv *env, jbyteArray arr, const char* parr)
   283 releaseBytes(JNIEnv *env, jbyteArray arr, const char* parr)
   445 {
   284 {
   446     if (parr != NULL)
   285     if (parr != NULL)
   447         (*env)->ReleaseByteArrayElements(env, arr, (jbyte*) parr, JNI_ABORT);
   286         (*env)->ReleaseByteArrayElements(env, arr, (jbyte*) parr, JNI_ABORT);
   448 }
       
   449 
       
   450 static void
       
   451 initVectorFromBlock(const char**vector, const char* block, int count)
       
   452 {
       
   453     int i;
       
   454     const char *p;
       
   455     for (i = 0, p = block; i < count; i++) {
       
   456         /* Invariant: p always points to the start of a C string. */
       
   457         vector[i] = p;
       
   458         while (*(p++));
       
   459     }
       
   460     vector[count] = NULL;
       
   461 }
   287 }
   462 
   288 
   463 static void
   289 static void
   464 throwIOException(JNIEnv *env, int errnum, const char *defaultDetail)
   290 throwIOException(JNIEnv *env, int errnum, const char *defaultDetail)
   465 {
   291 {
   501     va_end(ap);
   327     va_end(ap);
   502     fclose(tty);
   328     fclose(tty);
   503 }
   329 }
   504 #endif /* DEBUG_PROCESS */
   330 #endif /* DEBUG_PROCESS */
   505 
   331 
   506 /**
       
   507  * Exec FILE as a traditional Bourne shell script (i.e. one without #!).
       
   508  * If we could do it over again, we would probably not support such an ancient
       
   509  * misfeature, but compatibility wins over sanity.  The original support for
       
   510  * this was imported accidentally from execvp().
       
   511  */
       
   512 static void
       
   513 execve_as_traditional_shell_script(const char *file,
       
   514                                    const char *argv[],
       
   515                                    const char *const envp[])
       
   516 {
       
   517     /* Use the extra word of space provided for us in argv by caller. */
       
   518     const char *argv0 = argv[0];
       
   519     const char *const *end = argv;
       
   520     while (*end != NULL)
       
   521         ++end;
       
   522     memmove(argv+2, argv+1, (end-argv) * sizeof (*end));
       
   523     argv[0] = "/bin/sh";
       
   524     argv[1] = file;
       
   525     execve(argv[0], (char **) argv, (char **) envp);
       
   526     /* Can't even exec /bin/sh?  Big trouble, but let's soldier on... */
       
   527     memmove(argv+1, argv+2, (end-argv) * sizeof (*end));
       
   528     argv[0] = argv0;
       
   529 }
       
   530 
       
   531 /**
       
   532  * Like execve(2), except that in case of ENOEXEC, FILE is assumed to
       
   533  * be a shell script and the system default shell is invoked to run it.
       
   534  */
       
   535 static void
       
   536 execve_with_shell_fallback(const char *file,
       
   537                            const char *argv[],
       
   538                            const char *const envp[])
       
   539 {
       
   540 #if START_CHILD_USE_CLONE || START_CHILD_USE_VFORK
       
   541     /* shared address space; be very careful. */
       
   542     execve(file, (char **) argv, (char **) envp);
       
   543     if (errno == ENOEXEC)
       
   544         execve_as_traditional_shell_script(file, argv, envp);
       
   545 #else
       
   546     /* unshared address space; we can mutate environ. */
       
   547     environ = (char **) envp;
       
   548     execvp(file, (char **) argv);
       
   549 #endif
       
   550 }
       
   551 
       
   552 /**
       
   553  * 'execvpe' should have been included in the Unix standards,
       
   554  * and is a GNU extension in glibc 2.10.
       
   555  *
       
   556  * JDK_execvpe is identical to execvp, except that the child environment is
       
   557  * specified via the 3rd argument instead of being inherited from environ.
       
   558  */
       
   559 static void
       
   560 JDK_execvpe(const char *file,
       
   561             const char *argv[],
       
   562             const char *const envp[])
       
   563 {
       
   564     if (envp == NULL || (char **) envp == environ) {
       
   565         execvp(file, (char **) argv);
       
   566         return;
       
   567     }
       
   568 
       
   569     if (*file == '\0') {
       
   570         errno = ENOENT;
       
   571         return;
       
   572     }
       
   573 
       
   574     if (strchr(file, '/') != NULL) {
       
   575         execve_with_shell_fallback(file, argv, envp);
       
   576     } else {
       
   577         /* We must search PATH (parent's, not child's) */
       
   578         char expanded_file[PATH_MAX];
       
   579         int filelen = strlen(file);
       
   580         int sticky_errno = 0;
       
   581         const char * const * dirs;
       
   582         for (dirs = parentPathv; *dirs; dirs++) {
       
   583             const char * dir = *dirs;
       
   584             int dirlen = strlen(dir);
       
   585             if (filelen + dirlen + 2 >= PATH_MAX) {
       
   586                 errno = ENAMETOOLONG;
       
   587                 continue;
       
   588             }
       
   589             memcpy(expanded_file, dir, dirlen);
       
   590             if (expanded_file[dirlen - 1] != '/')
       
   591                 expanded_file[dirlen++] = '/';
       
   592             memcpy(expanded_file + dirlen, file, filelen);
       
   593             expanded_file[dirlen + filelen] = '\0';
       
   594             execve_with_shell_fallback(expanded_file, argv, envp);
       
   595             /* There are 3 responses to various classes of errno:
       
   596              * return immediately, continue (especially for ENOENT),
       
   597              * or continue with "sticky" errno.
       
   598              *
       
   599              * From exec(3):
       
   600              *
       
   601              * If permission is denied for a file (the attempted
       
   602              * execve returned EACCES), these functions will continue
       
   603              * searching the rest of the search path.  If no other
       
   604              * file is found, however, they will return with the
       
   605              * global variable errno set to EACCES.
       
   606              */
       
   607             switch (errno) {
       
   608             case EACCES:
       
   609                 sticky_errno = errno;
       
   610                 /* FALLTHRU */
       
   611             case ENOENT:
       
   612             case ENOTDIR:
       
   613 #ifdef ELOOP
       
   614             case ELOOP:
       
   615 #endif
       
   616 #ifdef ESTALE
       
   617             case ESTALE:
       
   618 #endif
       
   619 #ifdef ENODEV
       
   620             case ENODEV:
       
   621 #endif
       
   622 #ifdef ETIMEDOUT
       
   623             case ETIMEDOUT:
       
   624 #endif
       
   625                 break; /* Try other directories in PATH */
       
   626             default:
       
   627                 return;
       
   628             }
       
   629         }
       
   630         if (sticky_errno != 0)
       
   631             errno = sticky_errno;
       
   632     }
       
   633 }
       
   634 
       
   635 /*
       
   636  * Reads nbyte bytes from file descriptor fd into buf,
       
   637  * The read operation is retried in case of EINTR or partial reads.
       
   638  *
       
   639  * Returns number of bytes read (normally nbyte, but may be less in
       
   640  * case of EOF).  In case of read errors, returns -1 and sets errno.
       
   641  */
       
   642 static ssize_t
       
   643 readFully(int fd, void *buf, size_t nbyte)
       
   644 {
       
   645     ssize_t remaining = nbyte;
       
   646     for (;;) {
       
   647         ssize_t n = read(fd, buf, remaining);
       
   648         if (n == 0) {
       
   649             return nbyte - remaining;
       
   650         } else if (n > 0) {
       
   651             remaining -= n;
       
   652             if (remaining <= 0)
       
   653                 return nbyte;
       
   654             /* We were interrupted in the middle of reading the bytes.
       
   655              * Unlikely, but possible. */
       
   656             buf = (void *) (((char *)buf) + n);
       
   657         } else if (errno == EINTR) {
       
   658             /* Strange signals like SIGJVM1 are possible at any time.
       
   659              * See http://www.dreamsongs.com/WorseIsBetter.html */
       
   660         } else {
       
   661             return -1;
       
   662         }
       
   663     }
       
   664 }
       
   665 
       
   666 typedef struct _ChildStuff
       
   667 {
       
   668     int in[2];
       
   669     int out[2];
       
   670     int err[2];
       
   671     int fail[2];
       
   672     int fds[3];
       
   673     const char **argv;
       
   674     const char **envv;
       
   675     const char *pdir;
       
   676     jboolean redirectErrorStream;
       
   677 #if START_CHILD_USE_CLONE
       
   678     void *clone_stack;
       
   679 #endif
       
   680 } ChildStuff;
       
   681 
       
   682 static void
   332 static void
   683 copyPipe(int from[2], int to[2])
   333 copyPipe(int from[2], int to[2])
   684 {
   334 {
   685     to[0] = from[0];
   335     to[0] = from[0];
   686     to[1] = from[1];
   336     to[1] = from[1];
   687 }
   337 }
   688 
   338 
       
   339 /* arg is an array of pointers to 0 terminated strings. array is terminated
       
   340  * by a null element.
       
   341  *
       
   342  * *nelems and *nbytes receive the number of elements of array (incl 0)
       
   343  * and total number of bytes (incl. 0)
       
   344  * Note. An empty array will have one null element
       
   345  * But if arg is null, then *nelems set to 0, and *nbytes to 0
       
   346  */
       
   347 static void arraysize(const char * const *arg, int *nelems, int *nbytes)
       
   348 {
       
   349     int i, bytes, count;
       
   350     const char * const *a = arg;
       
   351     char *p;
       
   352     int *q;
       
   353     if (arg == 0) {
       
   354         *nelems = 0;
       
   355         *nbytes = 0;
       
   356         return;
       
   357     }
       
   358     /* count the array elements and number of bytes */
       
   359     for (count=0, bytes=0; *a != 0; count++, a++) {
       
   360         bytes += strlen(*a)+1;
       
   361     }
       
   362     *nbytes = bytes;
       
   363     *nelems = count+1;
       
   364 }
       
   365 
       
   366 /* copy the strings from arg[] into buf, starting at given offset
       
   367  * return new offset to next free byte
       
   368  */
       
   369 static int copystrings(char *buf, int offset, const char * const *arg) {
       
   370     char *p;
       
   371     const char * const *a;
       
   372     int count=0;
       
   373 
       
   374     if (arg == 0) {
       
   375         return offset;
       
   376     }
       
   377     for (p=buf+offset, a=arg; *a != 0; a++) {
       
   378         int len = strlen(*a) +1;
       
   379         memcpy(p, *a, len);
       
   380         p += len;
       
   381         count += len;
       
   382     }
       
   383     return offset+count;
       
   384 }
       
   385 
   689 /**
   386 /**
   690  * Child process after a successful fork() or clone().
       
   691  * This function must not return, and must be prepared for either all
       
   692  * of its address space to be shared with its parent, or to be a copy.
       
   693  * It must not modify global variables such as "environ".
       
   694  */
       
   695 static int
       
   696 childProcess(void *arg)
       
   697 {
       
   698     const ChildStuff* p = (const ChildStuff*) arg;
       
   699 
       
   700     /* Close the parent sides of the pipes.
       
   701        Closing pipe fds here is redundant, since closeDescriptors()
       
   702        would do it anyways, but a little paranoia is a good thing. */
       
   703     if ((closeSafely(p->in[1])   == -1) ||
       
   704         (closeSafely(p->out[0])  == -1) ||
       
   705         (closeSafely(p->err[0])  == -1) ||
       
   706         (closeSafely(p->fail[0]) == -1))
       
   707         goto WhyCantJohnnyExec;
       
   708 
       
   709     /* Give the child sides of the pipes the right fileno's. */
       
   710     /* Note: it is possible for in[0] == 0 */
       
   711     if ((moveDescriptor(p->in[0] != -1 ?  p->in[0] : p->fds[0],
       
   712                         STDIN_FILENO) == -1) ||
       
   713         (moveDescriptor(p->out[1]!= -1 ? p->out[1] : p->fds[1],
       
   714                         STDOUT_FILENO) == -1))
       
   715         goto WhyCantJohnnyExec;
       
   716 
       
   717     if (p->redirectErrorStream) {
       
   718         if ((closeSafely(p->err[1]) == -1) ||
       
   719             (restartableDup2(STDOUT_FILENO, STDERR_FILENO) == -1))
       
   720             goto WhyCantJohnnyExec;
       
   721     } else {
       
   722         if (moveDescriptor(p->err[1] != -1 ? p->err[1] : p->fds[2],
       
   723                            STDERR_FILENO) == -1)
       
   724             goto WhyCantJohnnyExec;
       
   725     }
       
   726 
       
   727     if (moveDescriptor(p->fail[1], FAIL_FILENO) == -1)
       
   728         goto WhyCantJohnnyExec;
       
   729 
       
   730     /* close everything */
       
   731     if (closeDescriptors() == 0) { /* failed,  close the old way */
       
   732         int max_fd = (int)sysconf(_SC_OPEN_MAX);
       
   733         int fd;
       
   734         for (fd = FAIL_FILENO + 1; fd < max_fd; fd++)
       
   735             if (restartableClose(fd) == -1 && errno != EBADF)
       
   736                 goto WhyCantJohnnyExec;
       
   737     }
       
   738 
       
   739     /* change to the new working directory */
       
   740     if (p->pdir != NULL && chdir(p->pdir) < 0)
       
   741         goto WhyCantJohnnyExec;
       
   742 
       
   743     if (fcntl(FAIL_FILENO, F_SETFD, FD_CLOEXEC) == -1)
       
   744         goto WhyCantJohnnyExec;
       
   745 
       
   746     JDK_execvpe(p->argv[0], p->argv, p->envv);
       
   747 
       
   748  WhyCantJohnnyExec:
       
   749     /* We used to go to an awful lot of trouble to predict whether the
       
   750      * child would fail, but there is no reliable way to predict the
       
   751      * success of an operation without *trying* it, and there's no way
       
   752      * to try a chdir or exec in the parent.  Instead, all we need is a
       
   753      * way to communicate any failure back to the parent.  Easy; we just
       
   754      * send the errno back to the parent over a pipe in case of failure.
       
   755      * The tricky thing is, how do we communicate the *success* of exec?
       
   756      * We use FD_CLOEXEC together with the fact that a read() on a pipe
       
   757      * yields EOF when the write ends (we have two of them!) are closed.
       
   758      */
       
   759     {
       
   760         int errnum = errno;
       
   761         restartableWrite(FAIL_FILENO, &errnum, sizeof(errnum));
       
   762     }
       
   763     restartableClose(FAIL_FILENO);
       
   764     _exit(-1);
       
   765     return 0;  /* Suppress warning "no return value from function" */
       
   766 }
       
   767 
       
   768 /**
       
   769  * Start a child process running function childProcess.
       
   770  * This function only returns in the parent.
       
   771  * We are unusually paranoid; use of clone/vfork is
   387  * We are unusually paranoid; use of clone/vfork is
   772  * especially likely to tickle gcc/glibc bugs.
   388  * especially likely to tickle gcc/glibc bugs.
   773  */
   389  */
   774 #ifdef __attribute_noinline__  /* See: sys/cdefs.h */
   390 #ifdef __attribute_noinline__  /* See: sys/cdefs.h */
   775 __attribute_noinline__
   391 __attribute_noinline__
   776 #endif
   392 #endif
       
   393 
       
   394 #define START_CHILD_USE_CLONE 0  /* clone() currently disabled; see above. */
       
   395 
       
   396 #ifdef START_CHILD_USE_CLONE
   777 static pid_t
   397 static pid_t
   778 startChild(ChildStuff *c) {
   398 cloneChild(ChildStuff *c) {
   779 #if START_CHILD_USE_CLONE
   399 #ifdef __linux__
   780 #define START_CHILD_CLONE_STACK_SIZE (64 * 1024)
   400 #define START_CHILD_CLONE_STACK_SIZE (64 * 1024)
   781     /*
   401     /*
   782      * See clone(2).
   402      * See clone(2).
   783      * Instead of worrying about which direction the stack grows, just
   403      * Instead of worrying about which direction the stack grows, just
   784      * allocate twice as much and start the stack in the middle.
   404      * allocate twice as much and start the stack in the middle.
   788         return -1;
   408         return -1;
   789     return clone(childProcess,
   409     return clone(childProcess,
   790                  c->clone_stack + START_CHILD_CLONE_STACK_SIZE,
   410                  c->clone_stack + START_CHILD_CLONE_STACK_SIZE,
   791                  CLONE_VFORK | CLONE_VM | SIGCHLD, c);
   411                  CLONE_VFORK | CLONE_VM | SIGCHLD, c);
   792 #else
   412 #else
   793   #if START_CHILD_USE_VFORK
   413 /* not available on Solaris / Mac */
       
   414     assert(0);
       
   415     return -1;
       
   416 #endif
       
   417 }
       
   418 #endif
       
   419 
       
   420 static pid_t
       
   421 vforkChild(ChildStuff *c) {
       
   422     volatile pid_t resultPid;
       
   423 
   794     /*
   424     /*
   795      * We separate the call to vfork into a separate function to make
   425      * We separate the call to vfork into a separate function to make
   796      * very sure to keep stack of child from corrupting stack of parent,
   426      * very sure to keep stack of child from corrupting stack of parent,
   797      * as suggested by the scary gcc warning:
   427      * as suggested by the scary gcc warning:
   798      *  warning: variable 'foo' might be clobbered by 'longjmp' or 'vfork'
   428      *  warning: variable 'foo' might be clobbered by 'longjmp' or 'vfork'
   799      */
   429      */
   800     volatile pid_t resultPid = vfork();
   430     resultPid = vfork();
   801   #else
   431 
       
   432     if (resultPid == 0) {
       
   433         childProcess(c);
       
   434     }
       
   435     assert(resultPid != 0);  /* childProcess never returns */
       
   436     return resultPid;
       
   437 }
       
   438 
       
   439 static pid_t
       
   440 forkChild(ChildStuff *c) {
       
   441     pid_t resultPid;
       
   442 
   802     /*
   443     /*
   803      * From Solaris fork(2): In Solaris 10, a call to fork() is
   444      * From Solaris fork(2): In Solaris 10, a call to fork() is
   804      * identical to a call to fork1(); only the calling thread is
   445      * identical to a call to fork1(); only the calling thread is
   805      * replicated in the child process. This is the POSIX-specified
   446      * replicated in the child process. This is the POSIX-specified
   806      * behavior for fork().
   447      * behavior for fork().
   807      */
   448      */
   808     pid_t resultPid = fork();
   449     resultPid = fork();
   809   #endif
   450 
   810     if (resultPid == 0)
   451     if (resultPid == 0) {
   811         childProcess(c);
   452         childProcess(c);
       
   453     }
   812     assert(resultPid != 0);  /* childProcess never returns */
   454     assert(resultPid != 0);  /* childProcess never returns */
   813     return resultPid;
   455     return resultPid;
   814 #endif /* ! START_CHILD_USE_CLONE */
   456 }
       
   457 
       
   458 #if defined(__solaris__) || defined(_ALLBSD_SOURCE)
       
   459 static pid_t
       
   460 spawnChild(JNIEnv *env, jobject process, ChildStuff *c, const char *helperpath) {
       
   461     pid_t resultPid;
       
   462     jboolean isCopy;
       
   463     int i, offset, rval, bufsize, magic;
       
   464     char *buf, buf1[16];
       
   465     char *hlpargs[2];
       
   466     SpawnInfo sp;
       
   467 
       
   468     /* need to tell helper which fd is for receiving the childstuff
       
   469      * and which fd to send response back on
       
   470      */
       
   471     snprintf(buf1, sizeof(buf1), "%d:%d", c->childenv[0], c->fail[1]);
       
   472     /* put the fd string as argument to the helper cmd */
       
   473     hlpargs[0] = buf1;
       
   474     hlpargs[1] = 0;
       
   475 
       
   476     /* Following items are sent down the pipe to the helper
       
   477      * after it is spawned.
       
   478      * All strings are null terminated. All arrays of strings
       
   479      * have an empty string for termination.
       
   480      * - the ChildStuff struct
       
   481      * - the SpawnInfo struct
       
   482      * - the argv strings array
       
   483      * - the envv strings array
       
   484      * - the home directory string
       
   485      * - the parentPath string
       
   486      * - the parentPathv array
       
   487      */
       
   488     /* First calculate the sizes */
       
   489     arraysize(c->argv, &sp.nargv, &sp.argvBytes);
       
   490     bufsize = sp.argvBytes;
       
   491     arraysize(c->envv, &sp.nenvv, &sp.envvBytes);
       
   492     bufsize += sp.envvBytes;
       
   493     sp.dirlen = c->pdir == 0 ? 0 : strlen(c->pdir)+1;
       
   494     bufsize += sp.dirlen;
       
   495     arraysize(parentPathv, &sp.nparentPathv, &sp.parentPathvBytes);
       
   496     bufsize += sp.parentPathvBytes;
       
   497     /* We need to clear FD_CLOEXEC if set in the fds[].
       
   498      * Files are created FD_CLOEXEC in Java.
       
   499      * Otherwise, they will be closed when the target gets exec'd */
       
   500     for (i=0; i<3; i++) {
       
   501         if (c->fds[i] != -1) {
       
   502             int flags = fcntl(c->fds[i], F_GETFD);
       
   503             if (flags & FD_CLOEXEC) {
       
   504                 fcntl(c->fds[i], F_SETFD, flags & (~1));
       
   505             }
       
   506         }
       
   507     }
       
   508 
       
   509     rval = posix_spawn(&resultPid, helperpath, 0, 0, (char * const *) hlpargs, environ);
       
   510 
       
   511     if (rval != 0) {
       
   512         return -1;
       
   513     }
       
   514 
       
   515     /* now the lengths are known, copy the data */
       
   516     buf = NEW(char, bufsize);
       
   517     if (buf == 0) {
       
   518         return -1;
       
   519     }
       
   520     offset = copystrings(buf, 0, &c->argv[0]);
       
   521     offset = copystrings(buf, offset, &c->envv[0]);
       
   522     memcpy(buf+offset, c->pdir, sp.dirlen);
       
   523     offset += sp.dirlen;
       
   524     offset = copystrings(buf, offset, parentPathv);
       
   525     assert(offset == bufsize);
       
   526 
       
   527     magic = magicNumber();
       
   528 
       
   529     /* write the two structs and the data buffer */
       
   530     write(c->childenv[1], (char *)&magic, sizeof(magic)); // magic number first
       
   531     write(c->childenv[1], (char *)c, sizeof(*c));
       
   532     write(c->childenv[1], (char *)&sp, sizeof(sp));
       
   533     write(c->childenv[1], buf, bufsize);
       
   534     free(buf);
       
   535 
       
   536     /* In this mode an external main() in invoked which calls back into
       
   537      * childProcess() in this file, rather than directly
       
   538      * via the statement below */
       
   539     return resultPid;
       
   540 }
       
   541 #endif
       
   542 
       
   543 /*
       
   544  * Start a child process running function childProcess.
       
   545  * This function only returns in the parent.
       
   546  */
       
   547 static pid_t
       
   548 startChild(JNIEnv *env, jobject process, ChildStuff *c, const char *helperpath) {
       
   549     switch (c->mode) {
       
   550       case MODE_VFORK:
       
   551         return vforkChild(c);
       
   552       case MODE_FORK:
       
   553         return forkChild(c);
       
   554 #if defined(__solaris__) || defined(_ALLBSD_SOURCE)
       
   555       case MODE_POSIX_SPAWN:
       
   556         return spawnChild(env, process, c, helperpath);
       
   557 #endif
       
   558       default:
       
   559         return -1;
       
   560     }
   815 }
   561 }
   816 
   562 
   817 JNIEXPORT jint JNICALL
   563 JNIEXPORT jint JNICALL
   818 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
   564 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
   819                                        jobject process,
   565                                        jobject process,
       
   566                                        jint mode,
       
   567                                        jbyteArray helperpath,
   820                                        jbyteArray prog,
   568                                        jbyteArray prog,
   821                                        jbyteArray argBlock, jint argc,
   569                                        jbyteArray argBlock, jint argc,
   822                                        jbyteArray envBlock, jint envc,
   570                                        jbyteArray envBlock, jint envc,
   823                                        jbyteArray dir,
   571                                        jbyteArray dir,
   824                                        jintArray std_fds,
   572                                        jintArray std_fds,
   825                                        jboolean redirectErrorStream)
   573                                        jboolean redirectErrorStream)
   826 {
   574 {
   827     int errnum;
   575     int errnum;
   828     int resultPid = -1;
   576     int resultPid = -1;
   829     int in[2], out[2], err[2], fail[2];
   577     int in[2], out[2], err[2], fail[2], childenv[2];
   830     jint *fds = NULL;
   578     jint *fds = NULL;
       
   579     const char *phelperpath = NULL;
   831     const char *pprog = NULL;
   580     const char *pprog = NULL;
   832     const char *pargBlock = NULL;
   581     const char *pargBlock = NULL;
   833     const char *penvBlock = NULL;
   582     const char *penvBlock = NULL;
   834     ChildStuff *c;
   583     ChildStuff *c;
   835 
   584 
   836     in[0] = in[1] = out[0] = out[1] = err[0] = err[1] = fail[0] = fail[1] = -1;
   585     in[0] = in[1] = out[0] = out[1] = err[0] = err[1] = fail[0] = fail[1] = -1;
       
   586     childenv[0] = childenv[1] = -1;
   837 
   587 
   838     if ((c = NEW(ChildStuff, 1)) == NULL) return -1;
   588     if ((c = NEW(ChildStuff, 1)) == NULL) return -1;
   839     c->argv = NULL;
   589     c->argv = NULL;
   840     c->envv = NULL;
   590     c->envv = NULL;
   841     c->pdir = NULL;
   591     c->pdir = NULL;
   842 #if START_CHILD_USE_CLONE
       
   843     c->clone_stack = NULL;
   592     c->clone_stack = NULL;
   844 #endif
       
   845 
   593 
   846     /* Convert prog + argBlock into a char ** argv.
   594     /* Convert prog + argBlock into a char ** argv.
   847      * Add one word room for expansion of argv for use by
   595      * Add one word room for expansion of argv for use by
   848      * execve_as_traditional_shell_script.
   596      * execve_as_traditional_shell_script.
       
   597      * This word is also used when using spawn mode
   849      */
   598      */
   850     assert(prog != NULL && argBlock != NULL);
   599     assert(prog != NULL && argBlock != NULL);
       
   600     if ((phelperpath = getBytes(env, helperpath))   == NULL) goto Catch;
   851     if ((pprog     = getBytes(env, prog))       == NULL) goto Catch;
   601     if ((pprog     = getBytes(env, prog))       == NULL) goto Catch;
   852     if ((pargBlock = getBytes(env, argBlock))   == NULL) goto Catch;
   602     if ((pargBlock = getBytes(env, argBlock))   == NULL) goto Catch;
   853     if ((c->argv = NEW(const char *, argc + 3)) == NULL) goto Catch;
   603     if ((c->argv = NEW(const char *, argc + 3)) == NULL) goto Catch;
   854     c->argv[0] = pprog;
   604     c->argv[0] = pprog;
       
   605     c->argc = argc + 2;
   855     initVectorFromBlock(c->argv+1, pargBlock, argc);
   606     initVectorFromBlock(c->argv+1, pargBlock, argc);
   856 
   607 
   857     if (envBlock != NULL) {
   608     if (envBlock != NULL) {
   858         /* Convert envBlock into a char ** envv */
   609         /* Convert envBlock into a char ** envv */
   859         if ((penvBlock = getBytes(env, envBlock))   == NULL) goto Catch;
   610         if ((penvBlock = getBytes(env, envBlock))   == NULL) goto Catch;
   870     if (fds == NULL) goto Catch;
   621     if (fds == NULL) goto Catch;
   871 
   622 
   872     if ((fds[0] == -1 && pipe(in)  < 0) ||
   623     if ((fds[0] == -1 && pipe(in)  < 0) ||
   873         (fds[1] == -1 && pipe(out) < 0) ||
   624         (fds[1] == -1 && pipe(out) < 0) ||
   874         (fds[2] == -1 && pipe(err) < 0) ||
   625         (fds[2] == -1 && pipe(err) < 0) ||
       
   626         (pipe(childenv) < 0) ||
   875         (pipe(fail) < 0)) {
   627         (pipe(fail) < 0)) {
   876         throwIOException(env, errno, "Bad file descriptor");
   628         throwIOException(env, errno, "Bad file descriptor");
   877         goto Catch;
   629         goto Catch;
   878     }
   630     }
   879     c->fds[0] = fds[0];
   631     c->fds[0] = fds[0];
   882 
   634 
   883     copyPipe(in,   c->in);
   635     copyPipe(in,   c->in);
   884     copyPipe(out,  c->out);
   636     copyPipe(out,  c->out);
   885     copyPipe(err,  c->err);
   637     copyPipe(err,  c->err);
   886     copyPipe(fail, c->fail);
   638     copyPipe(fail, c->fail);
       
   639     copyPipe(childenv, c->childenv);
   887 
   640 
   888     c->redirectErrorStream = redirectErrorStream;
   641     c->redirectErrorStream = redirectErrorStream;
   889 
   642     c->mode = mode;
   890     resultPid = startChild(c);
   643 
       
   644     resultPid = startChild(env, process, c, phelperpath);
   891     assert(resultPid != 0);
   645     assert(resultPid != 0);
   892 
   646 
   893     if (resultPid < 0) {
   647     if (resultPid < 0) {
   894         throwIOException(env, errno, START_CHILD_SYSTEM_CALL " failed");
   648         switch (c->mode) {
       
   649           case MODE_VFORK:
       
   650             throwIOException(env, errno, "vfork failed");
       
   651             break;
       
   652           case MODE_FORK:
       
   653             throwIOException(env, errno, "fork failed");
       
   654             break;
       
   655           case MODE_POSIX_SPAWN:
       
   656             throwIOException(env, errno, "spawn failed");
       
   657             break;
       
   658         }
   895         goto Catch;
   659         goto Catch;
   896     }
   660     }
   897 
   661     close(fail[1]); fail[1] = -1; /* See: WhyCantJohnnyExec  (childproc.c)  */
   898     restartableClose(fail[1]); fail[1] = -1; /* See: WhyCantJohnnyExec */
       
   899 
   662 
   900     switch (readFully(fail[0], &errnum, sizeof(errnum))) {
   663     switch (readFully(fail[0], &errnum, sizeof(errnum))) {
   901     case 0: break; /* Exec succeeded */
   664     case 0: break; /* Exec succeeded */
   902     case sizeof(errnum):
   665     case sizeof(errnum):
   903         waitpid(resultPid, NULL, 0);
   666         waitpid(resultPid, NULL, 0);
   911     fds[0] = (in [1] != -1) ? in [1] : -1;
   674     fds[0] = (in [1] != -1) ? in [1] : -1;
   912     fds[1] = (out[0] != -1) ? out[0] : -1;
   675     fds[1] = (out[0] != -1) ? out[0] : -1;
   913     fds[2] = (err[0] != -1) ? err[0] : -1;
   676     fds[2] = (err[0] != -1) ? err[0] : -1;
   914 
   677 
   915  Finally:
   678  Finally:
   916 #if START_CHILD_USE_CLONE
       
   917     free(c->clone_stack);
   679     free(c->clone_stack);
   918 #endif
       
   919 
   680 
   920     /* Always clean up the child's side of the pipes */
   681     /* Always clean up the child's side of the pipes */
   921     closeSafely(in [0]);
   682     closeSafely(in [0]);
   922     closeSafely(out[1]);
   683     closeSafely(out[1]);
   923     closeSafely(err[1]);
   684     closeSafely(err[1]);
   924 
   685 
   925     /* Always clean up fail descriptors */
   686     /* Always clean up fail and childEnv descriptors */
   926     closeSafely(fail[0]);
   687     closeSafely(fail[0]);
   927     closeSafely(fail[1]);
   688     closeSafely(fail[1]);
       
   689     closeSafely(childenv[0]);
       
   690     closeSafely(childenv[1]);
   928 
   691 
   929     releaseBytes(env, prog,     pprog);
   692     releaseBytes(env, prog,     pprog);
   930     releaseBytes(env, argBlock, pargBlock);
   693     releaseBytes(env, argBlock, pargBlock);
   931     releaseBytes(env, envBlock, penvBlock);
   694     releaseBytes(env, envBlock, penvBlock);
   932     releaseBytes(env, dir,      c->pdir);
   695     releaseBytes(env, dir,      c->pdir);
   940 
   703 
   941     return resultPid;
   704     return resultPid;
   942 
   705 
   943  Catch:
   706  Catch:
   944     /* Clean up the parent's side of the pipes in case of failure only */
   707     /* Clean up the parent's side of the pipes in case of failure only */
   945     closeSafely(in [1]);
   708     closeSafely(in [1]); in[1] = -1;
   946     closeSafely(out[0]);
   709     closeSafely(out[0]); out[0] = -1;
   947     closeSafely(err[0]);
   710     closeSafely(err[0]); err[0] = -1;
   948     goto Finally;
   711     goto Finally;
   949 }
   712 }
   950 
   713 
   951 JNIEXPORT void JNICALL
   714 JNIEXPORT void JNICALL
   952 Java_java_lang_UNIXProcess_destroyProcess(JNIEnv *env,
   715 Java_java_lang_UNIXProcess_destroyProcess(JNIEnv *env,