8202884: SA: Attach/detach might fail on Linux if debugee application create/destroy threads during attaching
authorjgeorge
Thu, 13 Dec 2018 13:03:26 +0530
changeset 53004 b9d34a97a4be
parent 53003 ff1c86e85d02
child 53005 888592cdb2d0
8202884: SA: Attach/detach might fail on Linux if debugee application create/destroy threads during attaching Summary: While doing a ptrace attach, do not attach to threads which are in the process of exiting or are zombies -- skip these threads. Reviewed-by: jcbeyler, ysuenaga
src/jdk.hotspot.agent/linux/native/libsaproc/libproc_impl.c
src/jdk.hotspot.agent/linux/native/libsaproc/libproc_impl.h
src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc_impl.c	Thu Dec 13 08:26:07 2018 +0100
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc_impl.c	Thu Dec 13 13:03:26 2018 +0530
@@ -274,6 +274,26 @@
    return newthr;
 }
 
+void delete_thread_info(struct ps_prochandle* ph, thread_info* thr_to_be_removed) {
+    thread_info* current_thr = ph->threads;
+
+    if (thr_to_be_removed == ph->threads) {
+      ph->threads = ph->threads->next;
+    } else {
+      thread_info* previous_thr;
+      while (current_thr && current_thr != thr_to_be_removed) {
+        previous_thr = current_thr;
+        current_thr = current_thr->next;
+      }
+      if (current_thr == NULL) {
+        print_error("Could not find the thread to be removed");
+        return;
+      }
+      previous_thr->next = current_thr->next;
+    }
+    ph->num_threads--;
+    free(current_thr);
+}
 
 // struct used for client data from thread_db callback
 struct thread_db_client_data {
@@ -296,6 +316,11 @@
 
   print_debug("thread_db : pthread %d (lwp %d)\n", ti.ti_tid, ti.ti_lid);
 
+  if (ti.ti_state == TD_THR_UNKNOWN || ti.ti_state == TD_THR_ZOMBIE) {
+    print_debug("Skipping pthread %d (lwp %d)\n", ti.ti_tid, ti.ti_lid);
+    return TD_OK;
+  }
+
   if (ptr->callback(ptr->ph, ti.ti_tid, ti.ti_lid) != true)
     return TD_ERR;
 
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc_impl.h	Thu Dec 13 08:26:07 2018 +0100
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc_impl.h	Thu Dec 13 13:03:26 2018 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -113,6 +113,9 @@
 // reads thread info using libthread_db and calls above callback for each thread
 bool read_thread_info(struct ps_prochandle* ph, thread_info_callback cb);
 
+// deletes a thread from the thread list
+void delete_thread_info(struct ps_prochandle* ph, thread_info* thr);
+
 // adds a new shared object to lib list, returns NULL on failure
 lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base);
 
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c	Thu Dec 13 08:26:07 2018 +0100
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c	Thu Dec 13 13:03:26 2018 +0530
@@ -29,6 +29,7 @@
 #include <errno.h>
 #include <elf.h>
 #include <dirent.h>
+#include <ctype.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/ptrace.h>
@@ -46,6 +47,12 @@
 // This file has the libproc implementation specific to live process
 // For core files, refer to ps_core.c
 
+typedef enum {
+  ATTACH_SUCCESS,
+  ATTACH_FAIL,
+  ATTACH_THREAD_DEAD
+} attach_state_t;
+
 static inline uintptr_t align(uintptr_t ptr, size_t size) {
   return (ptr & ~(size - 1));
 }
@@ -168,9 +175,10 @@
 
 // waits until the ATTACH has stopped the process
 // by signal SIGSTOP
-static bool ptrace_waitpid(pid_t pid) {
+static attach_state_t ptrace_waitpid(pid_t pid) {
   int ret;
   int status;
+  errno = 0;
   while (true) {
     // Wait for debuggee to stop.
     ret = waitpid(pid, &status, 0);
@@ -185,15 +193,15 @@
         // will go to sleep.
         if (WSTOPSIG(status) == SIGSTOP) {
           // Debuggee stopped by SIGSTOP.
-          return true;
+          return ATTACH_SUCCESS;
         }
         if (!ptrace_continue(pid, WSTOPSIG(status))) {
           print_error("Failed to correctly attach to VM. VM might HANG! [PTRACE_CONT failed, stopped by %d]\n", WSTOPSIG(status));
-          return false;
+          return ATTACH_FAIL;
         }
       } else {
-        print_debug("waitpid(): Child process exited/terminated (status = 0x%x)\n", status);
-        return false;
+        print_debug("waitpid(): Child process %d exited/terminated (status = 0x%x)\n", pid, status);
+        return ATTACH_THREAD_DEAD;
       }
     } else {
       switch (errno) {
@@ -202,29 +210,89 @@
           break;
         case ECHILD:
           print_debug("waitpid() failed. Child process pid (%d) does not exist \n", pid);
-          break;
+          return ATTACH_THREAD_DEAD;
         case EINVAL:
-          print_debug("waitpid() failed. Invalid options argument.\n");
-          break;
+          print_error("waitpid() failed. Invalid options argument.\n");
+          return ATTACH_FAIL;
         default:
-          print_debug("waitpid() failed. Unexpected error %d\n",errno);
-          break;
+          print_error("waitpid() failed. Unexpected error %d\n", errno);
+          return ATTACH_FAIL;
       }
-      return false;
+    } // else
+  } // while
+}
+
+// checks the state of the thread/process specified by "pid", by reading
+// in the 'State:' value from the /proc/<pid>/status file. From the proc
+// man page, "Current state of the process. One of "R (running)",
+// "S (sleeping)", "D (disk sleep)", "T (stopped)", "T (tracing stop)",
+// "Z (zombie)", or "X (dead)"." Assumes that the thread is dead if we
+// don't find the status file or if the status is 'X' or 'Z'.
+static bool process_doesnt_exist(pid_t pid) {
+  char fname[32];
+  char buf[30];
+  FILE *fp = NULL;
+  const char state_string[] = "State:";
+
+  sprintf(fname, "/proc/%d/status", pid);
+  fp = fopen(fname, "r");
+  if (fp == NULL) {
+    print_debug("can't open /proc/%d/status file\n", pid);
+    // Assume the thread does not exist anymore.
+    return true;
+  }
+  bool found_state = false;
+  size_t state_len = strlen(state_string);
+  while (fgets(buf, sizeof(buf), fp) != NULL) {
+    char *state = NULL;
+    if (strncmp (buf, state_string, state_len) == 0) {
+      found_state = true;
+      state = buf + state_len;
+      // Skip the spaces
+      while (isspace(*state)) {
+        state++;
+      }
+      // A state value of 'X' indicates that the thread is dead. 'Z'
+      // indicates that the thread is a zombie.
+      if (*state == 'X' || *state == 'Z') {
+        fclose (fp);
+        return true;
+      }
+      break;
     }
   }
+  // If the state value is not 'X' or 'Z', the thread exists.
+  if (!found_state) {
+    // We haven't found the line beginning with 'State:'.
+    // Assuming the thread exists.
+    print_error("Could not find the 'State:' string in the /proc/%d/status file\n", pid);
+  }
+  fclose (fp);
+  return false;
 }
 
 // attach to a process/thread specified by "pid"
-static bool ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) {
+static attach_state_t ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) {
+  errno = 0;
   if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) < 0) {
+    if (errno == EPERM || errno == ESRCH) {
+      // Check if the process/thread is exiting or is a zombie
+      if (process_doesnt_exist(pid)) {
+        print_debug("Thread with pid %d does not exist\n", pid);
+        return ATTACH_THREAD_DEAD;
+      }
+    }
     char buf[200];
     char* msg = strerror_r(errno, buf, sizeof(buf));
     snprintf(err_buf, err_buf_len, "ptrace(PTRACE_ATTACH, ..) failed for %d: %s", pid, msg);
-    print_debug("%s\n", err_buf);
-    return false;
+    print_error("%s\n", err_buf);
+    return ATTACH_FAIL;
   } else {
-    return ptrace_waitpid(pid);
+    attach_state_t wait_ret = ptrace_waitpid(pid);
+    if (wait_ret == ATTACH_THREAD_DEAD) {
+      print_debug("Thread with pid %d does not exist\n", pid);
+    }
+    return wait_ret;
   }
 }
 
@@ -378,16 +446,20 @@
 Pgrab(pid_t pid, char* err_buf, size_t err_buf_len, bool is_in_container) {
   struct ps_prochandle* ph = NULL;
   thread_info* thr = NULL;
+  attach_state_t attach_status = ATTACH_SUCCESS;
 
   if ( (ph = (struct ps_prochandle*) calloc(1, sizeof(struct ps_prochandle))) == NULL) {
-     snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle");
-     print_debug("%s\n", err_buf);
-     return NULL;
+    snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle");
+    print_debug("%s\n", err_buf);
+    return NULL;
   }
 
-  if (ptrace_attach(pid, err_buf, err_buf_len) != true) {
-     free(ph);
-     return NULL;
+  if ((attach_status = ptrace_attach(pid, err_buf, err_buf_len)) != ATTACH_SUCCESS) {
+    if (attach_status == ATTACH_THREAD_DEAD) {
+       print_error("The process with pid %d does not exist.\n", pid);
+    }
+    free(ph);
+    return NULL;
   }
 
   // initialize ps_prochandle
@@ -431,14 +503,23 @@
 
   // attach to the threads
   thr = ph->threads;
+
   while (thr) {
-     // don't attach to the main thread again
-    if (ph->pid != thr->lwp_id && ptrace_attach(thr->lwp_id, err_buf, err_buf_len) != true) {
-        // even if one attach fails, we get return NULL
-        Prelease(ph);
-        return NULL;
-     }
-     thr = thr->next;
+    thread_info* current_thr = thr;
+    thr = thr->next;
+    // don't attach to the main thread again
+    if (ph->pid != current_thr->lwp_id) {
+      if ((attach_status = ptrace_attach(current_thr->lwp_id, err_buf, err_buf_len)) != ATTACH_SUCCESS) {
+        if (attach_status == ATTACH_THREAD_DEAD) {
+          // Remove this thread from the threads list
+          delete_thread_info(ph, current_thr);
+        }
+        else {
+          Prelease(ph);
+          return NULL;
+        } // ATTACH_THREAD_DEAD
+      } // !ATTACH_SUCCESS
+    }
   }
   return ph;
 }