8216314: SIGILL in CodeHeapState::print_names()
authorlucy
Wed, 16 Jan 2019 09:48:35 +0100
changeset 53353 a6620d37728b
parent 53352 ac431929db51
child 53354 528207d2e73e
8216314: SIGILL in CodeHeapState::print_names() Reviewed-by: thartmann, kvn
src/hotspot/share/code/codeHeapState.cpp
src/hotspot/share/code/codeHeapState.hpp
src/hotspot/share/compiler/compileBroker.cpp
--- a/src/hotspot/share/code/codeHeapState.cpp	Tue Jan 15 22:59:33 2019 -0800
+++ b/src/hotspot/share/code/codeHeapState.cpp	Wed Jan 16 09:48:35 2019 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2019 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,15 +40,17 @@
 // contain enough detail to gain initial insight while keeping the
 // internal structure sizes in check.
 //
-// The CodeHeap is a living thing. Therefore, the aggregate is collected
-// under the CodeCache_lock. The subsequent print steps are only locked
-// against concurrent aggregations. That keeps the impact on
-// "normal operation" (JIT compiler and sweeper activity) to a minimum.
-//
 // The second part, which consists of several, independent steps,
 // prints the previously collected information with emphasis on
 // various aspects.
 //
+// The CodeHeap is a living thing. Therefore, protection against concurrent
+// modification (by acquiring the CodeCache_lock) is necessary. It has
+// to be provided by the caller of the analysis functions.
+// If the CodeCache_lock is not held, the analysis functions may print
+// less detailed information or may just do nothing. It is by intention
+// that an unprotected invocation is not abnormally terminated.
+//
 // Data collection and printing is done on an "on request" basis.
 // While no request is being processed, there is no impact on performance.
 // The CodeHeap state analytics do have some memory footprint.
@@ -456,7 +458,7 @@
   bool  done             = false;
   const int min_granules = 256;
   const int max_granules = 512*K; // limits analyzable CodeHeap (with segment_granules) to 32M..128M
-                                  // results in StatArray size of 20M (= max_granules * 40 Bytes per element)
+                                  // results in StatArray size of 24M (= max_granules * 48 Bytes per element)
                                   // For a 1GB CodeHeap, the granule size must be at least 2kB to not violate the max_granles limit.
   const char* heapName   = get_heapName(heap);
   STRINGSTREAM_DECL(ast, out)
@@ -495,6 +497,12 @@
     return;
   }
 
+  if (!CodeCache_lock->owned_by_self()) {
+    printBox(ast, '-', "aggregate function called without holding the CodeCache_lock for ", heapName);
+    STRINGSTREAM_FLUSH("")
+    return;
+  }
+
   // Calculate granularity of analysis (and output).
   //   The CodeHeap is managed (allocated) in segments (units) of CodeCacheSegmentSize.
   //   The CodeHeap can become fairly large, in particular in productive real-life systems.
@@ -1028,6 +1036,7 @@
       STRINGSTREAM_FLUSH("\n")
 
       // This loop is intentionally printing directly to "out".
+      // It should not print anything, anyway.
       out->print("Verifying collected data...");
       size_t granule_segs = granule_size>>log2_seg_size;
       for (unsigned int ix = 0; ix < granules; ix++) {
@@ -1071,6 +1080,7 @@
       }
 
       // This loop is intentionally printing directly to "out".
+      // It should not print anything, anyway.
       if (used_topSizeBlocks > 0) {
         unsigned int j = 0;
         if (TopSizeArray[0].len != currMax) {
@@ -1166,6 +1176,7 @@
   //---<  calculate and fill remaining fields  >---
   if (FreeArray != NULL) {
     // This loop is intentionally printing directly to "out".
+    // It should not print anything, anyway.
     for (unsigned int ix = 0; ix < alloc_freeBlocks-1; ix++) {
       size_t lenSum = 0;
       FreeArray[ix].gap = (unsigned int)((address)FreeArray[ix+1].start - ((address)FreeArray[ix].start + FreeArray[ix].len));
@@ -1223,6 +1234,7 @@
   //----------------------------
   {
     char*     low_bound = heap->low_boundary();
+    bool      have_CodeCache_lock = CodeCache_lock->owned_by_self();
 
     printBox(ast, '-', "Largest Used Blocks in ", heapName);
     print_blobType_legend(ast);
@@ -1241,12 +1253,19 @@
       unsigned int printed_topSizeBlocks = 0;
       for (unsigned int i = 0; i != tsbStopper; i = TopSizeArray[i].index) {
         printed_topSizeBlocks++;
-        CodeBlob*   this_blob = (CodeBlob*)(heap->find_start(TopSizeArray[i].start));
         nmethod*           nm = NULL;
-        const char* blob_name = "unnamed blob";
-        if (this_blob != NULL) {
-          blob_name = this_blob->name();
-          nm        = this_blob->as_nmethod_or_null();
+        const char* blob_name = "unnamed blob or blob name unavailable";
+        // heap->find_start() is safe. Only works on _segmap.
+        // Returns NULL or void*. Returned CodeBlob may be uninitialized.
+        HeapBlock* heapBlock = TopSizeArray[i].start;
+        CodeBlob*  this_blob = (CodeBlob*)(heap->find_start(heapBlock));
+        bool    blob_is_safe = blob_access_is_safe(this_blob, NULL);
+        if (blob_is_safe) {
+          //---<  access these fields only if we own the CodeCache_lock  >---
+          if (have_CodeCache_lock) {
+            blob_name = this_blob->name();
+            nm        = this_blob->as_nmethod_or_null();
+          }
           //---<  blob address  >---
           ast->print(INTPTR_FORMAT, p2i(this_blob));
           ast->fill_to(19);
@@ -1262,10 +1281,18 @@
           ast->fill_to(33);
         }
 
-
         //---<  print size, name, and signature (for nMethods)  >---
-        if ((nm != NULL) && (nm->method() != NULL)) {
+        // access nmethod and Method fields only if we own the CodeCache_lock.
+        // This fact is implicitly transported via nm != NULL.
+        if (CompiledMethod::nmethod_access_is_safe(nm)) {
           ResourceMark rm;
+          Method* method = nm->method();
+          if (nm->is_in_use()) {
+            blob_name = method->name_and_sig_as_C_string();
+          }
+          if (nm->is_not_entrant()) {
+            blob_name = method->name_and_sig_as_C_string();
+          }
           //---<  nMethod size in hex  >---
           unsigned int total_size = nm->total_size();
           ast->print(PTR32_FORMAT, total_size);
@@ -1280,10 +1307,12 @@
           ast->print("%5d", nm->hotness_counter());
           //---<  name and signature  >---
           ast->fill_to(67+6);
-          if (nm->is_in_use())        {blob_name = nm->method()->name_and_sig_as_C_string(); }
-          if (nm->is_not_entrant())   {blob_name = nm->method()->name_and_sig_as_C_string(); }
-          if (nm->is_not_installed()) {ast->print("%s", " not (yet) installed method "); }
-          if (nm->is_zombie())        {ast->print("%s", " zombie method "); }
+          if (nm->is_not_installed()) {
+            ast->print(" not (yet) installed method ");
+          }
+          if (nm->is_zombie()) {
+            ast->print(" zombie method ");
+          }
           ast->print("%s", blob_name);
         } else {
           //---<  block size in hex  >---
@@ -2083,10 +2112,11 @@
   }
   STRINGSTREAM_DECL(ast, out)
 
-  unsigned int granules_per_line  = 128;
-  char*        low_bound          = heap->low_boundary();
-  CodeBlob*    last_blob          = NULL;
-  bool         name_in_addr_range = true;
+  unsigned int granules_per_line   = 128;
+  char*        low_bound           = heap->low_boundary();
+  CodeBlob*    last_blob           = NULL;
+  bool         name_in_addr_range  = true;
+  bool         have_CodeCache_lock = CodeCache_lock->owned_by_self();
 
   //---<  print at least 128K per block (i.e. between headers)  >---
   if (granules_per_line*granule_size < 128*K) {
@@ -2121,16 +2151,14 @@
                            StatArray[ix].stub_count + StatArray[ix].dead_count;
     if (nBlobs > 0 ) {
     for (unsigned int is = 0; is < granule_size; is+=(unsigned int)seg_size) {
-      // heap->find_start() is safe. Only working with _segmap. Returns NULL or void*. Returned CodeBlob may be uninitialized.
-      CodeBlob* this_blob = (CodeBlob *)(heap->find_start(low_bound+ix*granule_size+is));
-      bool blob_initialized = (this_blob != NULL) && (this_blob->header_size() >= 0) && (this_blob->relocation_size() >= 0) &&
-                              ((address)this_blob + this_blob->header_size() == (address)(this_blob->relocation_begin())) &&
-                              ((address)this_blob + CodeBlob::align_code_offset(this_blob->header_size() + this_blob->relocation_size()) == (address)(this_blob->content_begin())) &&
-                              os::is_readable_pointer((address)(this_blob->relocation_begin())) &&
-                              os::is_readable_pointer(this_blob->content_begin());
+      // heap->find_start() is safe. Only works on _segmap.
+      // Returns NULL or void*. Returned CodeBlob may be uninitialized.
+      char*     this_seg  = low_bound + ix*granule_size + is;
+      CodeBlob* this_blob = (CodeBlob*)(heap->find_start(this_seg));
+      bool   blob_is_safe = blob_access_is_safe(this_blob, NULL);
       // blob could have been flushed, freed, and merged.
       // this_blob < last_blob is an indicator for that.
-      if (blob_initialized && (this_blob > last_blob)) {
+      if (blob_is_safe && (this_blob > last_blob)) {
         last_blob          = this_blob;
 
         //---<  get type and name  >---
@@ -2138,12 +2166,22 @@
         if (segment_granules) {
           cbType = (blobType)StatArray[ix].type;
         } else {
-          cbType = get_cbType(this_blob);
+          //---<  access these fields only if we own the CodeCache_lock  >---
+          if (have_CodeCache_lock) {
+            cbType = get_cbType(this_blob);
+          }
         }
-        // this_blob->name() could return NULL if no name was given to CTOR. Inlined, maybe invisible on stack
-        const char* blob_name = this_blob->name();
-        if ((blob_name == NULL) || !os::is_readable_pointer(blob_name)) {
-          blob_name = "<unavailable>";
+
+        //---<  access these fields only if we own the CodeCache_lock  >---
+        const char* blob_name = "<unavailable>";
+        nmethod*           nm = NULL;
+        if (have_CodeCache_lock) {
+          blob_name = this_blob->name();
+          nm        = this_blob->as_nmethod_or_null();
+          // this_blob->name() could return NULL if no name was given to CTOR. Inlined, maybe invisible on stack
+          if ((blob_name == NULL) || !os::is_readable_pointer(blob_name)) {
+            blob_name = "<unavailable>";
+          }
         }
 
         //---<  print table header for new print range  >---
@@ -2163,8 +2201,8 @@
         ast->print("(+" PTR32_FORMAT ")", (unsigned int)((char*)this_blob-low_bound));
         ast->fill_to(33);
 
-        // this_blob->as_nmethod_or_null() is safe. Inlined, maybe invisible on stack.
-        nmethod*    nm     = this_blob->as_nmethod_or_null();
+        // access nmethod and Method fields only if we own the CodeCache_lock.
+        // This fact is implicitly transported via nm != NULL.
         if (CompiledMethod::nmethod_access_is_safe(nm)) {
           Method* method = nm->method();
           ResourceMark rm;
@@ -2201,14 +2239,17 @@
           } else {
             ast->print("%s", blob_name);
           }
-        } else {
+        } else if (blob_is_safe) {
           ast->fill_to(62+6);
           ast->print("%s", blobTypeName[cbType]);
           ast->fill_to(82+6);
           ast->print("%s", blob_name);
+        } else {
+          ast->fill_to(62+6);
+          ast->print("<stale blob>");
         }
         STRINGSTREAM_FLUSH_LOCKED("\n")
-      } else if (!blob_initialized && (this_blob != last_blob) && (this_blob != NULL)) {
+      } else if (!blob_is_safe && (this_blob != last_blob) && (this_blob != NULL)) {
         last_blob          = this_blob;
         STRINGSTREAM_FLUSH_LOCKED("\n")
       }
@@ -2375,16 +2416,31 @@
     if (cb->is_method_handles_adapter_blob()) return mh_adapterBlob;
     if (cb->is_buffer_blob())                 return bufferBlob;
 
-    nmethod*  nm = cb->as_nmethod_or_null();
-    if (nm != NULL) { // no is_readable check required, nm = (nmethod*)cb.
-      if (nm->is_not_installed()) return nMethod_inconstruction;
-      if (nm->is_zombie())        return nMethod_dead;
-      if (nm->is_unloaded())      return nMethod_unloaded;
-      if (nm->is_in_use())        return nMethod_inuse;
-      if (nm->is_alive() && !(nm->is_not_entrant()))   return nMethod_notused;
-      if (nm->is_alive())         return nMethod_alive;
-      return nMethod_dead;
+    //---<  access these fields only if we own the CodeCache_lock  >---
+    // Should be ensured by caller. aggregate() amd print_names() do that.
+    if (CodeCache_lock->owned_by_self()) {
+      nmethod*  nm = cb->as_nmethod_or_null();
+      if (nm != NULL) { // no is_readable check required, nm = (nmethod*)cb.
+        if (nm->is_not_installed()) return nMethod_inconstruction;
+        if (nm->is_zombie())        return nMethod_dead;
+        if (nm->is_unloaded())      return nMethod_unloaded;
+        if (nm->is_in_use())        return nMethod_inuse;
+        if (nm->is_alive() && !(nm->is_not_entrant()))   return nMethod_notused;
+        if (nm->is_alive())         return nMethod_alive;
+        return nMethod_dead;
+      }
     }
   }
   return noType;
 }
+
+bool CodeHeapState::blob_access_is_safe(CodeBlob* this_blob, CodeBlob* prev_blob) {
+  return (this_blob != NULL) && // a blob must have been found, obviously
+         ((this_blob == prev_blob) || (prev_blob == NULL)) &&  // when re-checking, the same blob must have been found
+         (this_blob->header_size() >= 0) &&
+         (this_blob->relocation_size() >= 0) &&
+         ((address)this_blob + this_blob->header_size() == (address)(this_blob->relocation_begin())) &&
+         ((address)this_blob + CodeBlob::align_code_offset(this_blob->header_size() + this_blob->relocation_size()) == (address)(this_blob->content_begin())) &&
+         os::is_readable_pointer((address)(this_blob->relocation_begin())) &&
+         os::is_readable_pointer(this_blob->content_begin());
+}
--- a/src/hotspot/share/code/codeHeapState.hpp	Tue Jan 15 22:59:33 2019 -0800
+++ b/src/hotspot/share/code/codeHeapState.hpp	Wed Jan 16 09:48:35 2019 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018 SAP SE. All rights reserved.
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2019 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -95,6 +95,7 @@
   static void print_line_delim(outputStream* out, bufferedStream *sst, char* low_bound, unsigned int ix, unsigned int gpl);
   static void print_line_delim(outputStream* out, outputStream *sst, char* low_bound, unsigned int ix, unsigned int gpl);
   static blobType get_cbType(CodeBlob* cb);
+  static bool blob_access_is_safe(CodeBlob* this_blob, CodeBlob* prev_blob);
 
  public:
   static void discard(outputStream* out, CodeHeap* heap);
--- a/src/hotspot/share/compiler/compileBroker.cpp	Tue Jan 15 22:59:33 2019 -0800
+++ b/src/hotspot/share/compiler/compileBroker.cpp	Wed Jan 16 09:48:35 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2672,6 +2672,7 @@
 //       preventing concurrently printing threads from stalling a long time.
 void CompileBroker::print_heapinfo(outputStream* out, const char* function, const char* granularity) {
   TimeStamp ts_total;
+  TimeStamp ts_global;
   TimeStamp ts;
 
   bool allFun = !strcmp(function, "all");
@@ -2701,35 +2702,43 @@
   }
 
   // We hold the CodeHeapStateAnalytics_lock all the time, from here until we leave this function.
-  // That helps us getting a consistent view on the CodeHeap, at least for the "all" function.
+  // That prevents another thread from destroying our view on the CodeHeap.
   // When we request individual parts of the analysis via the jcmd interface, it is possible
   // that in between another thread (another jcmd user or the vm running into CodeCache OOM)
   // updated the aggregated data. That's a tolerable tradeoff because we can't hold a lock
   // across user interaction.
+  // Acquire this lock before acquiring the CodeCache_lock.
+  // CodeHeapStateAnalytics_lock could be held by a concurrent thread for a long time,
+  // leading to an unnecessarily long hold time of the CodeCache_lock.
   ts.update(); // record starting point
   MutexLockerEx mu1(CodeHeapStateAnalytics_lock, Mutex::_no_safepoint_check_flag);
-  out->cr();
-  out->print_cr("__ CodeHeapStateAnalytics lock wait took %10.3f seconds _________", ts.seconds());
-  out->cr();
+  out->print_cr("\n__ CodeHeapStateAnalytics lock wait took %10.3f seconds _________\n", ts.seconds());
+
+  // If we serve an "allFun" call, it is beneficial to hold the CodeCache_lock
+  // for the entire duration of aggregation and printing. That makes sure
+  // we see a consistent picture and do not run into issues caused by
+  // the CodeHeap being altered concurrently.
+  Monitor* global_lock   = allFun ? CodeCache_lock : NULL;
+  Monitor* function_lock = allFun ? NULL : CodeCache_lock;
+  ts_global.update(); // record starting point
+  MutexLockerEx mu2(global_lock, Mutex::_no_safepoint_check_flag);
+  if (global_lock != NULL) {
+    out->print_cr("\n__ CodeCache (global) lock wait took %10.3f seconds _________\n", ts_global.seconds());
+    ts_global.update(); // record starting point
+  }
 
   if (aggregate) {
-    // It is sufficient to hold the CodeCache_lock only for the aggregate step.
-    // All other functions operate on aggregated data - except MethodNames, but that should be safe.
-    // The separate CodeHeapStateAnalytics_lock protects the printing functions against
-    // concurrent aggregate steps. Acquire this lock before acquiring the CodeCache_lock.
-    // CodeHeapStateAnalytics_lock could be held by a concurrent thread for a long time,
-    // leading to an unnecessarily long hold time of the CodeCache_lock.
     ts.update(); // record starting point
-    MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    out->cr();
-    out->print_cr("__ CodeCache lock wait took %10.3f seconds _________", ts.seconds());
-    out->cr();
+    MutexLockerEx mu3(function_lock, Mutex::_no_safepoint_check_flag);
+    if (function_lock != NULL) {
+      out->print_cr("\n__ CodeCache (function) lock wait took %10.3f seconds _________\n", ts.seconds());
+    }
 
     ts.update(); // record starting point
     CodeCache::aggregate(out, granularity);
-    out->cr();
-    out->print_cr("__ CodeCache lock hold took %10.3f seconds _________", ts.seconds());
-    out->cr();
+    if (function_lock != NULL) {
+      out->print_cr("\n__ CodeCache (function) lock hold took %10.3f seconds _________\n", ts.seconds());
+    }
   }
 
   if (usedSpace) CodeCache::print_usedSpace(out);
@@ -2737,10 +2746,16 @@
   if (methodCount) CodeCache::print_count(out);
   if (methodSpace) CodeCache::print_space(out);
   if (methodAge) CodeCache::print_age(out);
-  if (methodNames) CodeCache::print_names(out);
+  if (methodNames) {
+    // print_names() has shown to be sensitive to concurrent CodeHeap modifications.
+    // Therefore, request  the CodeCache_lock before calling...
+    MutexLockerEx mu3(function_lock, Mutex::_no_safepoint_check_flag);
+    CodeCache::print_names(out);
+  }
   if (discard) CodeCache::discard(out);
 
-  out->cr();
-  out->print_cr("__ CodeHeapStateAnalytics total duration %10.3f seconds _________", ts_total.seconds());
-  out->cr();
+  if (global_lock != NULL) {
+    out->print_cr("\n__ CodeCache (global) lock hold took %10.3f seconds _________\n", ts_global.seconds());
+  }
+  out->print_cr("\n__ CodeHeapStateAnalytics total duration %10.3f seconds _________\n", ts_total.seconds());
 }