8069273: Decrease Hot Card Cache Lock contention
authorredestad
Thu, 29 Jan 2015 15:05:25 +0100
changeset 28831 454224c7e3ba
parent 28830 a252e278c3d9
child 28832 4c2704386850
8069273: Decrease Hot Card Cache Lock contention Reviewed-by: tschatzl, mgerdin
hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
hotspot/src/share/vm/runtime/mutexLocker.cpp
hotspot/src/share/vm/runtime/mutexLocker.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp	Tue Jan 27 13:50:31 2015 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp	Thu Jan 29 15:05:25 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,11 +36,10 @@
   if (default_use_cache()) {
     _use_cache = true;
 
-    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize;
     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);
 
-    _n_hot = 0;
-    _hot_cache_idx = 0;
+    reset_hot_cache_internal();
 
     // For refining the cards in the hot cache in parallel
     _hot_cache_par_chunk_size = ClaimChunkSize;
@@ -64,26 +63,21 @@
     // return it for immediate refining.
     return card_ptr;
   }
-
   // Otherwise, the card is hot.
-  jbyte* res = NULL;
-  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
-  if (_n_hot == _hot_cache_size) {
-    res = _hot_cache[_hot_cache_idx];
-    _n_hot--;
-  }
+  size_t index = Atomic::add(1, &_hot_cache_idx) - 1;
+  size_t masked_index = index & (_hot_cache_size - 1);
+  jbyte* current_ptr = _hot_cache[masked_index];
 
-  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
-  _hot_cache[_hot_cache_idx] = card_ptr;
-  _hot_cache_idx++;
-
-  if (_hot_cache_idx == _hot_cache_size) {
-    // Wrap around
-    _hot_cache_idx = 0;
-  }
-  _n_hot++;
-
-  return res;
+  // Try to store the new card pointer into the cache. Compare-and-swap to guard
+  // against the unlikely event of a race resulting in another card pointer to
+  // have already been written to the cache. In this case we will return
+  // card_ptr in favor of the other option, which would be starting over. This
+  // should be OK since card_ptr will likely be the older card already when/if
+  // this ever happens.
+  jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr,
+                                                    &_hot_cache[masked_index],
+                                                    current_ptr);
+  return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
 }
 
 void G1HotCardCache::drain(uint worker_i,
@@ -96,38 +90,38 @@
 
   assert(_hot_cache != NULL, "Logic");
   assert(!use_cache(), "cache should be disabled");
-  int start_idx;
-
-  while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
-    int end_idx = start_idx + _hot_cache_par_chunk_size;
 
-    if (start_idx ==
-        Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
-      // The current worker has successfully claimed the chunk [start_idx..end_idx)
-      end_idx = MIN2(end_idx, _n_hot);
-      for (int i = start_idx; i < end_idx; i++) {
-        jbyte* card_ptr = _hot_cache[i];
-        if (card_ptr != NULL) {
-          if (g1rs->refine_card(card_ptr, worker_i, true)) {
-            // The part of the heap spanned by the card contains references
-            // that point into the current collection set.
-            // We need to record the card pointer in the DirtyCardQueueSet
-            // that we use for such cards.
-            //
-            // The only time we care about recording cards that contain
-            // references that point into the collection set is during
-            // RSet updating while within an evacuation pause.
-            // In this case worker_i should be the id of a GC worker thread
-            assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            assert(worker_i < ParallelGCThreads,
-                   err_msg("incorrect worker id: %u", worker_i));
+  while (_hot_cache_par_claimed_idx < _hot_cache_size) {
+    size_t end_idx = Atomic::add(_hot_cache_par_chunk_size,
+                                 &_hot_cache_par_claimed_idx);
+    size_t start_idx = end_idx - _hot_cache_par_chunk_size;
+    // The current worker has successfully claimed the chunk [start_idx..end_idx)
+    end_idx = MIN2(end_idx, _hot_cache_size);
+    for (size_t i = start_idx; i < end_idx; i++) {
+      jbyte* card_ptr = _hot_cache[i];
+      if (card_ptr != NULL) {
+        if (g1rs->refine_card(card_ptr, worker_i, true)) {
+          // The part of the heap spanned by the card contains references
+          // that point into the current collection set.
+          // We need to record the card pointer in the DirtyCardQueueSet
+          // that we use for such cards.
+          //
+          // The only time we care about recording cards that contain
+          // references that point into the collection set is during
+          // RSet updating while within an evacuation pause.
+          // In this case worker_i should be the id of a GC worker thread
+          assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
+          assert(worker_i < ParallelGCThreads,
+                 err_msg("incorrect worker id: %u", worker_i));
 
-            into_cset_dcq->enqueue(card_ptr);
-          }
+          into_cset_dcq->enqueue(card_ptr);
         }
+      } else {
+        break;
       }
     }
   }
+
   // The existing entries in the hot card cache, which were just refined
   // above, are discarded prior to re-enabling the cache near the end of the GC.
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp	Tue Jan 27 13:50:31 2015 -0500
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp	Thu Jan 29 15:05:25 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,21 +54,30 @@
 // code, increasing throughput.
 
 class G1HotCardCache: public CHeapObj<mtGC> {
-  G1CollectedHeap*   _g1h;
+
+  G1CollectedHeap*  _g1h;
+
+  bool              _use_cache;
+
+  G1CardCounts      _card_counts;
 
   // The card cache table
-  jbyte**      _hot_cache;
+  jbyte**           _hot_cache;
 
-  int          _hot_cache_size;
-  int          _n_hot;
-  int          _hot_cache_idx;
+  size_t            _hot_cache_size;
+
+  int               _hot_cache_par_chunk_size;
 
-  int          _hot_cache_par_chunk_size;
-  volatile int _hot_cache_par_claimed_idx;
+  // Avoids false sharing when concurrently updating _hot_cache_idx or
+  // _hot_cache_par_claimed_idx. These are never updated at the same time
+  // thus it's not necessary to separate them as well
+  char _pad_before[DEFAULT_CACHE_LINE_SIZE];
 
-  bool         _use_cache;
+  volatile size_t _hot_cache_idx;
 
-  G1CardCounts _card_counts;
+  volatile size_t _hot_cache_par_claimed_idx;
+
+  char _pad_after[DEFAULT_CACHE_LINE_SIZE];
 
   // The number of cached cards a thread claims when flushing the cache
   static const int ClaimChunkSize = 32;
@@ -113,16 +122,25 @@
   void reset_hot_cache() {
     assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
     assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
-    _hot_cache_idx = 0; _n_hot = 0;
+    if (default_use_cache()) {
+        reset_hot_cache_internal();
+    }
   }
 
-  bool hot_cache_is_empty() { return _n_hot == 0; }
-
   // Zeros the values in the card counts table for entire committed heap
   void reset_card_counts();
 
   // Zeros the values in the card counts table for the given region
   void reset_card_counts(HeapRegion* hr);
+
+ private:
+  void reset_hot_cache_internal() {
+    assert(_hot_cache != NULL, "Logic");
+    _hot_cache_idx = 0;
+    for (size_t i = 0; i < _hot_cache_size; i++) {
+      _hot_cache[i] = NULL;
+    }
+  }
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP
--- a/hotspot/src/share/vm/runtime/mutexLocker.cpp	Tue Jan 27 13:50:31 2015 -0500
+++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp	Thu Jan 29 15:05:25 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -120,7 +120,6 @@
 Mutex*   OldSets_lock                 = NULL;
 Monitor* RootRegionScan_lock          = NULL;
 Mutex*   MMUTracker_lock              = NULL;
-Mutex*   HotCardCache_lock            = NULL;
 
 Monitor* GCTaskManager_lock           = NULL;
 
@@ -199,7 +198,6 @@
     def(OldSets_lock               , Mutex  , leaf     ,   true,  Monitor::_safepoint_check_never);
     def(RootRegionScan_lock        , Monitor, leaf     ,   true,  Monitor::_safepoint_check_never);
     def(MMUTracker_lock            , Mutex  , leaf     ,   true,  Monitor::_safepoint_check_never);
-    def(HotCardCache_lock          , Mutex  , special  ,   true,  Monitor::_safepoint_check_never);
     def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true,  Monitor::_safepoint_check_never);
 
     def(StringDedupQueue_lock      , Monitor, leaf,        true,  Monitor::_safepoint_check_never);
--- a/hotspot/src/share/vm/runtime/mutexLocker.hpp	Tue Jan 27 13:50:31 2015 -0500
+++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp	Thu Jan 29 15:05:25 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -122,7 +122,6 @@
 extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                  // tracker data structures
-extern Mutex*   HotCardCache_lock;               // protects the hot card cache
 
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* Service_lock;                    // a lock used for service thread operation