6690928: Use spinning in combination with yields for workstealing termination.
Summary: Substitute a spin loop for most calls to yield() to reduce the stress on the system.
Reviewed-by: tonyp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -949,6 +949,10 @@
GCOverheadReporter::recordSTWEnd(end);
g1_policy()->record_full_collection_end();
+#ifdef TRACESPINNING
+ ParallelTaskTerminator::print_termination_counts();
+#endif
+
gc_epilogue(true);
// Abandon concurrent refinement. This must happen last: in the
@@ -2647,8 +2651,13 @@
}
}
- if (mark_in_progress())
+ if (mark_in_progress()) {
concurrent_mark()->update_g1_committed();
+ }
+
+#ifdef TRACESPINNING
+ ParallelTaskTerminator::print_termination_counts();
+#endif
gc_epilogue(false);
}
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -362,6 +362,10 @@
if (PrintHeapAtGC) {
Universe::print_heap_after_gc();
}
+
+#ifdef TRACESPINNING
+ ParallelTaskTerminator::print_termination_counts();
+#endif
}
bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -2203,6 +2203,10 @@
collection_exit.ticks());
gc_task_manager()->print_task_time_stamps();
}
+
+#ifdef TRACESPINNING
+ ParallelTaskTerminator::print_termination_counts();
+#endif
}
bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -615,6 +615,10 @@
gc_task_manager()->print_task_time_stamps();
}
+#ifdef TRACESPINNING
+ ParallelTaskTerminator::print_termination_counts();
+#endif
+
return !promotion_failure_occurred;
}
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -610,6 +610,10 @@
Universe::print_heap_after_gc();
}
+#ifdef TRACESPINNING
+ ParallelTaskTerminator::print_termination_counts();
+#endif
+
if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
vm_exit(-1);
--- a/hotspot/src/share/vm/oops/cpCacheKlass.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/oops/cpCacheKlass.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -161,7 +161,7 @@
}
bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const {
- assert(obj->is_constantPoolCache(), "must be constMethod oop");
+ assert(obj->is_constantPoolCache(), "should be constant pool");
return constantPoolCacheOop(obj)->is_conc_safe();
}
--- a/hotspot/src/share/vm/runtime/globals.hpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/runtime/globals.hpp Sun Feb 08 13:18:01 2009 -0800
@@ -1655,6 +1655,13 @@
develop(uintx, WorkStealingYieldsBeforeSleep, 1000, \
"Number of yields before a sleep is done during workstealing") \
\
+ develop(uintx, WorkStealingHardSpins, 4096, \
+ "Number of iterations in a spin loop between checks on " \
+ "time out of hard spin") \
+ \
+ develop(uintx, WorkStealingSpinToYieldRatio, 10, \
+ "Ratio of hard spins to calls to yield") \
+ \
product(uintx, PreserveMarkStackSize, 1024, \
"Size for stack used in promotion failure handling") \
\
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp Sun Feb 08 13:18:01 2009 -0800
@@ -25,6 +25,12 @@
# include "incls/_precompiled.incl"
# include "incls/_taskqueue.cpp.incl"
+#ifdef TRACESPINNING
+uint ParallelTaskTerminator::_total_yields = 0;
+uint ParallelTaskTerminator::_total_spins = 0;
+uint ParallelTaskTerminator::_total_peeks = 0;
+#endif
+
bool TaskQueueSuper::peek() {
return _bottom != _age.top();
}
@@ -70,14 +76,61 @@
Atomic::inc(&_offered_termination);
uint yield_count = 0;
+ // Number of hard spin loops done since last yield
+ uint hard_spin_count = 0;
+ // Number of iterations in the hard spin loop.
+ uint hard_spin_limit = WorkStealingHardSpins;
+
+ // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
+ // If it is greater than 0, then start with a small number
+ // of spins and increase number with each turn at spinning until
+ // the count of hard spins exceeds WorkStealingSpinToYieldRatio.
+ // Then do a yield() call and start spinning afresh.
+ if (WorkStealingSpinToYieldRatio > 0) {
+ hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
+ hard_spin_limit = MAX2(hard_spin_limit, 1U);
+ }
+ // Remember the initial spin limit.
+ uint hard_spin_start = hard_spin_limit;
+
+ // Loop waiting for all threads to offer termination or
+ // more work.
while (true) {
+ // Are all threads offering termination?
if (_offered_termination == _n_threads) {
- //inner_termination_loop();
return true;
} else {
+ // Look for more work.
+ // Periodically sleep() instead of yield() to give threads
+ // waiting on the cores the chance to grab this code
if (yield_count <= WorkStealingYieldsBeforeSleep) {
+ // Do a yield or hardspin. For purposes of deciding whether
+ // to sleep, count this as a yield.
yield_count++;
- yield();
+
+ // Periodically call yield() instead spinning
+ // After WorkStealingSpinToYieldRatio spins, do a yield() call
+ // and reset the counts and starting limit.
+ if (hard_spin_count > WorkStealingSpinToYieldRatio) {
+ yield();
+ hard_spin_count = 0;
+ hard_spin_limit = hard_spin_start;
+#ifdef TRACESPINNING
+ _total_yields++;
+#endif
+ } else {
+ // Hard spin this time
+ // Increase the hard spinning period but only up to a limit.
+ hard_spin_limit = MIN2(2*hard_spin_limit,
+ (uint) WorkStealingHardSpins);
+ for (uint j = 0; j < hard_spin_limit; j++) {
+ SpinPause();
+ }
+ hard_spin_count++;
+#ifdef TRACESPINNING
+ _total_spins++;
+#endif
+ }
} else {
if (PrintGCDetails && Verbose) {
gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() "
@@ -92,6 +145,9 @@
sleep(WorkStealingSleepMillis);
}
+#ifdef TRACESPINNING
+ _total_peeks++;
+#endif
if (peek_in_queue_set() ||
(terminator != NULL && terminator->should_exit_termination())) {
Atomic::dec(&_offered_termination);
@@ -101,6 +157,16 @@
}
}
+#ifdef TRACESPINNING
+void ParallelTaskTerminator::print_termination_counts() {
+ gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld "
+ "Total spins: %lld Total peeks: %lld",
+ total_yields(),
+ total_spins(),
+ total_peeks());
+}
+#endif
+
void ParallelTaskTerminator::reset_for_reuse() {
if (_offered_termination != 0) {
assert(_offered_termination == _n_threads,
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp Fri Feb 06 01:38:50 2009 +0300
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp Sun Feb 08 13:18:01 2009 -0800
@@ -426,12 +426,20 @@
// A class to aid in the termination of a set of parallel tasks using
// TaskQueueSet's for work stealing.
+#undef TRACESPINNING
+
class ParallelTaskTerminator: public StackObj {
private:
int _n_threads;
TaskQueueSetSuper* _queue_set;
int _offered_termination;
+#ifdef TRACESPINNING
+ static uint _total_yields;
+ static uint _total_spins;
+ static uint _total_peeks;
+#endif
+
bool peek_in_queue_set();
protected:
virtual void yield();
@@ -462,6 +470,12 @@
// the terminator is finished.
void reset_for_reuse();
+#ifdef TRACESPINNING
+ static uint total_yields() { return _total_yields; }
+ static uint total_spins() { return _total_spins; }
+ static uint total_peeks() { return _total_peeks; }
+ static void print_termination_counts();
+#endif
};
#define SIMPLE_STACK 0