8012547: Code cache flushing can get stuck reclaming of memory
authorneliasso
Mon, 29 Apr 2013 13:20:19 +0200
changeset 17132 dffd513b2a8c
parent 17131 37e5d5c5cc5a
child 17133 ef4dd2f8fcb1
8012547: Code cache flushing can get stuck reclaming of memory Summary: Keep sweeping regardless of if we are flushing Reviewed-by: kvn, twisti
hotspot/src/share/vm/code/codeCache.cpp
hotspot/src/share/vm/code/codeCache.hpp
hotspot/src/share/vm/compiler/compileBroker.cpp
hotspot/src/share/vm/oops/method.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/sweeper.cpp
hotspot/src/share/vm/runtime/sweeper.hpp
--- a/hotspot/src/share/vm/code/codeCache.cpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/code/codeCache.cpp	Mon Apr 29 13:20:19 2013 +0200
@@ -463,8 +463,10 @@
 }
 #endif //PRODUCT
 
-
-nmethod* CodeCache::find_and_remove_saved_code(Method* m) {
+/**
+ * Remove and return nmethod from the saved code list in order to reanimate it.
+ */
+nmethod* CodeCache::reanimate_saved_code(Method* m) {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   nmethod* saved = _saved_nmethods;
   nmethod* prev = NULL;
@@ -479,7 +481,7 @@
       saved->set_speculatively_disconnected(false);
       saved->set_saved_nmethod_link(NULL);
       if (PrintMethodFlushing) {
-        saved->print_on(tty, " ### nmethod is reconnected\n");
+        saved->print_on(tty, " ### nmethod is reconnected");
       }
       if (LogCompilation && (xtty != NULL)) {
         ttyLocker ttyl;
@@ -496,6 +498,9 @@
   return NULL;
 }
 
+/**
+ * Remove nmethod from the saved code list in order to discard it permanently
+ */
 void CodeCache::remove_saved_code(nmethod* nm) {
   // For conc swpr this will be called with CodeCache_lock taken by caller
   assert_locked_or_safepoint(CodeCache_lock);
@@ -529,7 +534,7 @@
   nm->set_saved_nmethod_link(_saved_nmethods);
   _saved_nmethods = nm;
   if (PrintMethodFlushing) {
-    nm->print_on(tty, " ### nmethod is speculatively disconnected\n");
+    nm->print_on(tty, " ### nmethod is speculatively disconnected");
   }
   if (LogCompilation && (xtty != NULL)) {
     ttyLocker ttyl;
--- a/hotspot/src/share/vm/code/codeCache.hpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/code/codeCache.hpp	Mon Apr 29 13:20:19 2013 +0200
@@ -57,7 +57,7 @@
   static int _number_of_nmethods_with_dependencies;
   static bool _needs_cache_clean;
   static nmethod* _scavenge_root_nmethods;  // linked via nm->scavenge_root_link()
-  static nmethod* _saved_nmethods;          // linked via nm->saved_nmethod_look()
+  static nmethod* _saved_nmethods;          // Linked list of speculatively disconnected nmethods.
 
   static void verify_if_often() PRODUCT_RETURN;
 
@@ -168,7 +168,7 @@
   static void set_needs_cache_clean(bool v)      { _needs_cache_clean = v;    }
   static void clear_inline_caches();             // clear all inline caches
 
-  static nmethod* find_and_remove_saved_code(Method* m);
+  static nmethod* reanimate_saved_code(Method* m);
   static void remove_saved_code(nmethod* nm);
   static void speculatively_disconnect(nmethod* nm);
 
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Mon Apr 29 13:20:19 2013 +0200
@@ -1229,7 +1229,7 @@
     if (method->is_not_compilable(comp_level)) return NULL;
 
     if (UseCodeCacheFlushing) {
-      nmethod* saved = CodeCache::find_and_remove_saved_code(method());
+      nmethod* saved = CodeCache::reanimate_saved_code(method());
       if (saved != NULL) {
         method->set_code(method, saved);
         return saved;
@@ -1288,9 +1288,9 @@
     method->jmethod_id();
   }
 
-  // If the compiler is shut off due to code cache flushing or otherwise,
+  // If the compiler is shut off due to code cache getting full
   // fail out now so blocking compiles dont hang the java thread
-  if (!should_compile_new_jobs() || (UseCodeCacheFlushing && CodeCache::needs_flushing())) {
+  if (!should_compile_new_jobs()) {
     CompilationPolicy::policy()->delay_compilation(method());
     return NULL;
   }
--- a/hotspot/src/share/vm/oops/method.cpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/method.cpp	Mon Apr 29 13:20:19 2013 +0200
@@ -877,7 +877,7 @@
   debug_only(No_Safepoint_Verifier nsv;)
   nmethod *code = (nmethod *)OrderAccess::load_ptr_acquire(&_code);
   if (code == NULL && UseCodeCacheFlushing) {
-    nmethod *saved_code = CodeCache::find_and_remove_saved_code(this);
+    nmethod *saved_code = CodeCache::reanimate_saved_code(this);
     if (saved_code != NULL) {
       methodHandle method(this);
       assert( ! saved_code->is_osr_method(), "should not get here for osr" );
--- a/hotspot/src/share/vm/runtime/globals.hpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Mon Apr 29 13:20:19 2013 +0200
@@ -3179,6 +3179,9 @@
   product(uintx,  CodeCacheFlushingMinimumFreeSpace, 1500*K,                \
           "When less than X space left, start code cache cleaning")         \
                                                                             \
+  product(uintx, CodeCacheFlushingFraction, 2,                              \
+          "Fraction of the code cache that is flushed when full")           \
+                                                                            \
   /* interpreter debugging */                                               \
   develop(intx, BinarySwitchThreshold, 5,                                   \
           "Minimal number of lookupswitch entries for rewriting to binary " \
--- a/hotspot/src/share/vm/runtime/sweeper.cpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/runtime/sweeper.cpp	Mon Apr 29 13:20:19 2013 +0200
@@ -136,13 +136,12 @@
 
 jint      NMethodSweeper::_locked_seen = 0;
 jint      NMethodSweeper::_not_entrant_seen_on_stack = 0;
-bool      NMethodSweeper::_rescan = false;
-bool      NMethodSweeper::_do_sweep = false;
-bool      NMethodSweeper::_was_full = false;
-jint      NMethodSweeper::_advise_to_sweep = 0;
-jlong     NMethodSweeper::_last_was_full = 0;
-uint      NMethodSweeper::_highest_marked = 0;
-long      NMethodSweeper::_was_full_traversal = 0;
+bool      NMethodSweeper::_resweep = false;
+jint      NMethodSweeper::_flush_token = 0;
+jlong     NMethodSweeper::_last_full_flush_time = 0;
+int       NMethodSweeper::_highest_marked = 0;
+int       NMethodSweeper::_dead_compile_ids = 0;
+long      NMethodSweeper::_last_flush_traversal_id = 0;
 
 class MarkActivationClosure: public CodeBlobClosure {
 public:
@@ -155,20 +154,16 @@
 };
 static MarkActivationClosure mark_activation_closure;
 
+bool NMethodSweeper::sweep_in_progress() {
+  return (_current != NULL);
+}
+
 void NMethodSweeper::scan_stacks() {
   assert(SafepointSynchronize::is_at_safepoint(), "must be executed at a safepoint");
   if (!MethodFlushing) return;
-  _do_sweep = true;
 
   // No need to synchronize access, since this is always executed at a
-  // safepoint.  If we aren't in the middle of scan and a rescan
-  // hasn't been requested then just return. If UseCodeCacheFlushing is on and
-  // code cache flushing is in progress, don't skip sweeping to help make progress
-  // clearing space in the code cache.
-  if ((_current == NULL && !_rescan) && !(UseCodeCacheFlushing && !CompileBroker::should_compile_new_jobs())) {
-    _do_sweep = false;
-    return;
-  }
+  // safepoint.
 
   // Make sure CompiledIC_lock in unlocked, since we might update some
   // inline caches. If it is, we just bail-out and try later.
@@ -176,7 +171,7 @@
 
   // Check for restart
   assert(CodeCache::find_blob_unsafe(_current) == _current, "Sweeper nmethod cached state invalid");
-  if (_current == NULL) {
+  if (!sweep_in_progress() && _resweep) {
     _seen        = 0;
     _invocations = NmethodSweepFraction;
     _current     = CodeCache::first_nmethod();
@@ -187,39 +182,30 @@
     Threads::nmethods_do(&mark_activation_closure);
 
     // reset the flags since we started a scan from the beginning.
-    _rescan = false;
+    _resweep = false;
     _locked_seen = 0;
     _not_entrant_seen_on_stack = 0;
   }
 
   if (UseCodeCacheFlushing) {
-    if (!CodeCache::needs_flushing()) {
-      // scan_stacks() runs during a safepoint, no race with setters
-      _advise_to_sweep = 0;
+    // only allow new flushes after the interval is complete.
+    jlong now           = os::javaTimeMillis();
+    jlong max_interval  = (jlong)MinCodeCacheFlushingInterval * (jlong)1000;
+    jlong curr_interval = now - _last_full_flush_time;
+    if (curr_interval > max_interval) {
+      _flush_token = 0;
     }
 
-    if (was_full()) {
-      // There was some progress so attempt to restart the compiler
-      jlong now           = os::javaTimeMillis();
-      jlong max_interval  = (jlong)MinCodeCacheFlushingInterval * (jlong)1000;
-      jlong curr_interval = now - _last_was_full;
-      if ((!CodeCache::needs_flushing()) && (curr_interval > max_interval)) {
-        CompileBroker::set_should_compile_new_jobs(CompileBroker::run_compilation);
-        set_was_full(false);
-
-        // Update the _last_was_full time so we can tell how fast the
-        // code cache is filling up
-        _last_was_full = os::javaTimeMillis();
-
-        log_sweep("restart_compiler");
-      }
+    if (!CodeCache::needs_flushing() && !CompileBroker::should_compile_new_jobs()) {
+      CompileBroker::set_should_compile_new_jobs(CompileBroker::run_compilation);
+      log_sweep("restart_compiler");
     }
   }
 }
 
 void NMethodSweeper::possibly_sweep() {
   assert(JavaThread::current()->thread_state() == _thread_in_vm, "must run in vm mode");
-  if ((!MethodFlushing) || (!_do_sweep)) return;
+  if (!MethodFlushing || !sweep_in_progress()) return;
 
   if (_invocations > 0) {
     // Only one thread at a time will sweep
@@ -253,6 +239,14 @@
     tty->print_cr("### Sweep at %d out of %d. Invocations left: %d", _seen, CodeCache::nof_nmethods(), _invocations);
   }
 
+  if (!CompileBroker::should_compile_new_jobs()) {
+    // If we have turned off compilations we might as well do full sweeps
+    // in order to reach the clean state faster. Otherwise the sleeping compiler
+    // threads will slow down sweeping. After a few iterations the cache
+    // will be clean and sweeping stops (_resweep will not be set)
+    _invocations = 1;
+  }
+
   // We want to visit all nmethods after NmethodSweepFraction
   // invocations so divide the remaining number of nmethods by the
   // remaining number of invocations.  This is only an estimate since
@@ -296,7 +290,7 @@
 
   assert(_invocations > 1 || _current == NULL, "must have scanned the whole cache");
 
-  if (_current == NULL && !_rescan && (_locked_seen || _not_entrant_seen_on_stack)) {
+  if (!sweep_in_progress() && !_resweep && (_locked_seen || _not_entrant_seen_on_stack)) {
     // we've completed a scan without making progress but there were
     // nmethods we were unable to process either because they were
     // locked or were still on stack.  We don't have to aggresively
@@ -318,6 +312,13 @@
   if (_invocations == 1) {
     log_sweep("finished");
   }
+
+  // Sweeper is the only case where memory is released,
+  // check here if it is time to restart the compiler.
+  if (UseCodeCacheFlushing && !CompileBroker::should_compile_new_jobs() && !CodeCache::needs_flushing()) {
+    CompileBroker::set_should_compile_new_jobs(CompileBroker::run_compilation);
+    log_sweep("restart_compiler");
+  }
 }
 
 class NMethodMarker: public StackObj {
@@ -392,7 +393,7 @@
         tty->print_cr("### Nmethod %3d/" PTR_FORMAT " (zombie) being marked for reclamation", nm->compile_id(), nm);
       }
       nm->mark_for_reclamation();
-      _rescan = true;
+      _resweep = true;
       SWEEP(nm);
     }
   } else if (nm->is_not_entrant()) {
@@ -403,7 +404,7 @@
         tty->print_cr("### Nmethod %3d/" PTR_FORMAT " (not entrant) being made zombie", nm->compile_id(), nm);
       }
       nm->make_zombie();
-      _rescan = true;
+      _resweep = true;
       SWEEP(nm);
     } else {
       // Still alive, clean up its inline caches
@@ -425,16 +426,15 @@
       release_nmethod(nm);
     } else {
       nm->make_zombie();
-      _rescan = true;
+      _resweep = true;
       SWEEP(nm);
     }
   } else {
     assert(nm->is_alive(), "should be alive");
 
     if (UseCodeCacheFlushing) {
-      if ((nm->method()->code() != nm) && !(nm->is_locked_by_vm()) && !(nm->is_osr_method()) &&
-          (_traversals > _was_full_traversal+2) && (((uint)nm->compile_id()) < _highest_marked) &&
-          CodeCache::needs_flushing()) {
+      if (nm->is_speculatively_disconnected() && !nm->is_locked_by_vm() && !nm->is_osr_method() &&
+          (_traversals > _last_flush_traversal_id + 2) && (nm->compile_id() < _highest_marked)) {
         // This method has not been called since the forced cleanup happened
         nm->make_not_entrant();
       }
@@ -457,41 +457,27 @@
 // _code field is restored and the Method*/nmethod
 // go back to their normal state.
 void NMethodSweeper::handle_full_code_cache(bool is_full) {
-  // Only the first one to notice can advise us to start early cleaning
-  if (!is_full){
-    jint old = Atomic::cmpxchg( 1, &_advise_to_sweep, 0 );
-    if (old != 0) {
-      return;
-    }
-  }
 
   if (is_full) {
     // Since code cache is full, immediately stop new compiles
-    bool did_set = CompileBroker::set_should_compile_new_jobs(CompileBroker::stop_compilation);
-    if (!did_set) {
-      // only the first to notice can start the cleaning,
-      // others will go back and block
-      return;
+    if (CompileBroker::set_should_compile_new_jobs(CompileBroker::stop_compilation)) {
+      log_sweep("disable_compiler");
     }
-    set_was_full(true);
+  }
 
-    // If we run out within MinCodeCacheFlushingInterval of the last unload time, give up
-    jlong now = os::javaTimeMillis();
-    jlong max_interval = (jlong)MinCodeCacheFlushingInterval * (jlong)1000;
-    jlong curr_interval = now - _last_was_full;
-    if (curr_interval < max_interval) {
-      _rescan = true;
-      log_sweep("disable_compiler", "flushing_interval='" UINT64_FORMAT "'",
-                           curr_interval/1000);
-      return;
-    }
+  // Make sure only one thread can flush
+  // The token is reset after CodeCacheMinimumFlushInterval in scan stacks,
+  // no need to check the timeout here.
+  jint old = Atomic::cmpxchg( 1, &_flush_token, 0 );
+  if (old != 0) {
+    return;
   }
 
   VM_HandleFullCodeCache op(is_full);
   VMThread::execute(&op);
 
-  // rescan again as soon as possible
-  _rescan = true;
+  // resweep again as soon as possible
+  _resweep = true;
 }
 
 void NMethodSweeper::speculative_disconnect_nmethods(bool is_full) {
@@ -500,62 +486,64 @@
 
   debug_only(jlong start = os::javaTimeMillis();)
 
-  if ((!was_full()) && (is_full)) {
-    if (!CodeCache::needs_flushing()) {
-      log_sweep("restart_compiler");
-      CompileBroker::set_should_compile_new_jobs(CompileBroker::run_compilation);
-      return;
-    }
-  }
+  // Traverse the code cache trying to dump the oldest nmethods
+  int curr_max_comp_id = CompileBroker::get_compilation_id();
+  int flush_target = ((curr_max_comp_id - _dead_compile_ids) / CodeCacheFlushingFraction) + _dead_compile_ids;
 
-  // Traverse the code cache trying to dump the oldest nmethods
-  uint curr_max_comp_id = CompileBroker::get_compilation_id();
-  uint flush_target = ((curr_max_comp_id - _highest_marked) >> 1) + _highest_marked;
   log_sweep("start_cleaning");
 
   nmethod* nm = CodeCache::alive_nmethod(CodeCache::first());
   jint disconnected = 0;
   jint made_not_entrant  = 0;
+  jint nmethod_count = 0;
+
   while ((nm != NULL)){
-    uint curr_comp_id = nm->compile_id();
+    int curr_comp_id = nm->compile_id();
 
     // OSR methods cannot be flushed like this. Also, don't flush native methods
     // since they are part of the JDK in most cases
-    if (nm->is_in_use() && (!nm->is_osr_method()) && (!nm->is_locked_by_vm()) &&
-        (!nm->is_native_method()) && ((curr_comp_id < flush_target))) {
+    if (!nm->is_osr_method() && !nm->is_locked_by_vm() && !nm->is_native_method()) {
+
+      // only count methods that can be speculatively disconnected
+      nmethod_count++;
 
-      if ((nm->method()->code() == nm)) {
-        // This method has not been previously considered for
-        // unloading or it was restored already
-        CodeCache::speculatively_disconnect(nm);
-        disconnected++;
-      } else if (nm->is_speculatively_disconnected()) {
-        // This method was previously considered for preemptive unloading and was not called since then
-        CompilationPolicy::policy()->delay_compilation(nm->method());
-        nm->make_not_entrant();
-        made_not_entrant++;
-      }
+      if (nm->is_in_use() && (curr_comp_id < flush_target)) {
+        if ((nm->method()->code() == nm)) {
+          // This method has not been previously considered for
+          // unloading or it was restored already
+          CodeCache::speculatively_disconnect(nm);
+          disconnected++;
+        } else if (nm->is_speculatively_disconnected()) {
+          // This method was previously considered for preemptive unloading and was not called since then
+          CompilationPolicy::policy()->delay_compilation(nm->method());
+          nm->make_not_entrant();
+          made_not_entrant++;
+        }
 
-      if (curr_comp_id > _highest_marked) {
-        _highest_marked = curr_comp_id;
+        if (curr_comp_id > _highest_marked) {
+          _highest_marked = curr_comp_id;
+        }
       }
     }
     nm = CodeCache::alive_nmethod(CodeCache::next(nm));
   }
 
+  // remember how many compile_ids wheren't seen last flush.
+  _dead_compile_ids = curr_max_comp_id - nmethod_count;
+
   log_sweep("stop_cleaning",
                        "disconnected='" UINT32_FORMAT "' made_not_entrant='" UINT32_FORMAT "'",
                        disconnected, made_not_entrant);
 
   // Shut off compiler. Sweeper will start over with a new stack scan and
   // traversal cycle and turn it back on if it clears enough space.
-  if (was_full()) {
-    _last_was_full = os::javaTimeMillis();
-    CompileBroker::set_should_compile_new_jobs(CompileBroker::stop_compilation);
+  if (is_full) {
+    _last_full_flush_time = os::javaTimeMillis();
   }
 
   // After two more traversals the sweeper will get rid of unrestored nmethods
-  _was_full_traversal = _traversals;
+  _last_flush_traversal_id = _traversals;
+  _resweep = true;
 #ifdef ASSERT
   jlong end = os::javaTimeMillis();
   if(PrintMethodFlushing && Verbose) {
--- a/hotspot/src/share/vm/runtime/sweeper.hpp	Fri Apr 26 14:41:10 2013 -0700
+++ b/hotspot/src/share/vm/runtime/sweeper.hpp	Mon Apr 29 13:20:19 2013 +0200
@@ -35,26 +35,29 @@
   static nmethod*  _current;      // Current nmethod
   static int       _seen;         // Nof. nmethod we have currently processed in current pass of CodeCache
 
-  static volatile int      _invocations;   // No. of invocations left until we are completed with this pass
-  static volatile int      _sweep_started; // Flag to control conc sweeper
+  static volatile int  _invocations;   // No. of invocations left until we are completed with this pass
+  static volatile int  _sweep_started; // Flag to control conc sweeper
 
-  static bool      _rescan;          // Indicates that we should do a full rescan of the
-                                     // of the code cache looking for work to do.
-  static bool      _do_sweep;        // Flag to skip the conc sweep if no stack scan happened
-  static int       _locked_seen;     // Number of locked nmethods encountered during the scan
+  //The following are reset in scan_stacks and synchronized by the safepoint
+  static bool      _resweep;           // Indicates that a change has happend and we want another sweep,
+                                       // always checked and reset at a safepoint so memory will be in sync.
+  static int       _locked_seen;       // Number of locked nmethods encountered during the scan
   static int       _not_entrant_seen_on_stack; // Number of not entrant nmethod were are still on stack
+  static jint      _flush_token;       // token that guards method flushing, making sure it is executed only once.
 
-  static bool      _was_full;        // remember if we did emergency unloading
-  static jint      _advise_to_sweep; // flag to indicate code cache getting full
-  static jlong     _last_was_full;   // timestamp of last emergency unloading
-  static uint      _highest_marked;   // highest compile id dumped at last emergency unloading
-  static long      _was_full_traversal;   // trav number at last emergency unloading
+  // These are set during a flush, a VM-operation
+  static long      _last_flush_traversal_id; // trav number at last flush unloading
+  static jlong     _last_full_flush_time;    // timestamp of last emergency unloading
+
+  // These are synchronized by the _sweep_started token
+  static int       _highest_marked;   // highest compile id dumped at last emergency unloading
+  static int       _dead_compile_ids; // number of compile ids that where not in the cache last flush
 
   static void process_nmethod(nmethod *nm);
-
   static void release_nmethod(nmethod* nm);
 
   static void log_sweep(const char* msg, const char* format = NULL, ...);
+  static bool sweep_in_progress();
 
  public:
   static long traversal_count() { return _traversals; }
@@ -71,17 +74,14 @@
   static void possibly_sweep();   // Compiler threads call this to sweep
 
   static void notify(nmethod* nm) {
-    // Perform a full scan of the code cache from the beginning.  No
+    // Request a new sweep of the code cache from the beginning. No
     // need to synchronize the setting of this flag since it only
     // changes to false at safepoint so we can never overwrite it with false.
-     _rescan = true;
+     _resweep = true;
   }
 
   static void handle_full_code_cache(bool is_full); // Called by compilers who fail to allocate
   static void speculative_disconnect_nmethods(bool was_full);   // Called by vm op to deal with alloc failure
-
-  static void set_was_full(bool state) { _was_full = state; }
-  static bool was_full() { return _was_full; }
 };
 
 #endif // SHARE_VM_RUNTIME_SWEEPER_HPP