8014555: G1: Memory ordering problem with Conc refinement and card marking
Summary: Add a StoreLoad barrier in the G1 post-barrier to fix a race with concurrent refinement. Also-reviewed-by: martin.doerr@sap.com
Reviewed-by: iveresov, tschatzl, brutisso, roland, kvn
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -37,6 +37,9 @@
#include "runtime/vframeArray.hpp"
#include "utilities/macros.hpp"
#include "vmreg_sparc.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
// Implementation of StubAssembler
@@ -912,7 +915,7 @@
Register tmp2 = G3_scratch;
jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
- Label not_already_dirty, restart, refill;
+ Label not_already_dirty, restart, refill, young_card;
#ifdef _LP64
__ srlx(addr, CardTableModRefBS::card_shift, addr);
@@ -924,9 +927,15 @@
__ set(rs, cardtable); // cardtable := <card table base>
__ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+ __ cmp_and_br_short(tmp, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
+
+ __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+ __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+
assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
__ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
+ __ bind(young_card);
// We didn't take the branch, so we're already dirty: return.
// Use return-from-leaf
__ retl();
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -3752,7 +3752,7 @@
#define __ masm.
address start = __ pc();
- Label not_already_dirty, restart, refill;
+ Label not_already_dirty, restart, refill, young_card;
#ifdef _LP64
__ srlx(O0, CardTableModRefBS::card_shift, O0);
@@ -3763,9 +3763,15 @@
__ set(addrlit, O1); // O1 := <card table base>
__ ldub(O0, O1, O2); // O2 := [O0 + O1]
+ __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
+
+ __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+ __ ldub(O0, O1, O2); // O2 := [O0 + O1]
+
assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
__ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
+ __ bind(young_card);
// We didn't take the branch, so we're already dirty: return.
// Use return-from-leaf
__ retl();
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -38,6 +38,9 @@
#include "runtime/vframeArray.hpp"
#include "utilities/macros.hpp"
#include "vmreg_x86.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
// Implementation of StubAssembler
@@ -1753,13 +1756,17 @@
__ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
#endif
- __ cmpb(Address(card_addr, 0), 0);
+ __ cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val());
+ __ jcc(Assembler::equal, done);
+
+ __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+ __ cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
__ jcc(Assembler::equal, done);
// storing region crossing non-NULL, card is clean.
// dirty card and log.
- __ movb(Address(card_addr, 0), 0);
+ __ movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
__ cmpl(queue_index, 0);
__ jcc(Assembler::equal, runtime);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -3389,13 +3389,18 @@
const Register card_addr = tmp;
lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
#endif
- cmpb(Address(card_addr, 0), 0);
+ cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val());
jcc(Assembler::equal, done);
+ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+ cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
+ jcc(Assembler::equal, done);
+
+
// storing a region crossing, non-NULL oop, card is clean.
// dirty card and log.
- movb(Address(card_addr, 0), 0);
+ movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
cmpl(queue_index, 0);
jcc(Assembler::equal, runtime);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -6035,7 +6035,11 @@
// is dirty.
G1SATBCardTableModRefBS* ct_bs = g1_barrier_set();
MemRegion mr(hr->bottom(), hr->pre_dummy_top());
- ct_bs->verify_dirty_region(mr);
+ if (hr->is_young()) {
+ ct_bs->verify_g1_young_region(mr);
+ } else {
+ ct_bs->verify_dirty_region(mr);
+ }
}
void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Tue Oct 08 17:35:51 2013 +0200
@@ -29,6 +29,7 @@
#include "gc_implementation/g1/g1CollectedHeap.hpp"
#include "gc_implementation/g1/g1AllocRegion.inline.hpp"
#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
#include "gc_implementation/g1/heapRegionSeq.inline.hpp"
#include "utilities/taskqueue.hpp"
@@ -134,7 +135,7 @@
assert(containing_hr->is_in(end - 1), "it should also contain end - 1");
MemRegion mr(start, end);
- g1_barrier_set()->dirty(mr);
+ g1_barrier_set()->g1_mark_as_young(mr);
}
inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -70,6 +70,12 @@
if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) {
return false;
}
+
+ if (val == g1_young_gen) {
+ // the card is for a young gen region. We don't need to keep track of all pointers into young
+ return false;
+ }
+
// Cached bit can be installed either on a clean card or on a claimed card.
jbyte new_val = val;
if (val == clean_card_val()) {
@@ -85,6 +91,19 @@
return true;
}
+void G1SATBCardTableModRefBS::g1_mark_as_young(const MemRegion& mr) {
+ jbyte *const first = byte_for(mr.start());
+ jbyte *const last = byte_after(mr.last());
+
+ memset(first, g1_young_gen, last - first);
+}
+
+#ifndef PRODUCT
+void G1SATBCardTableModRefBS::verify_g1_young_region(MemRegion mr) {
+ verify_region(mr, g1_young_gen, true);
+}
+#endif
+
G1SATBCardTableLoggingModRefBS::
G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
int max_covered_regions) :
@@ -97,7 +116,11 @@
void
G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
oop new_val) {
- jbyte* byte = byte_for(field);
+ volatile jbyte* byte = byte_for(field);
+ if (*byte == g1_young_gen) {
+ return;
+ }
+ OrderAccess::storeload();
if (*byte != dirty_card) {
*byte = dirty_card;
Thread* thr = Thread::current();
@@ -129,7 +152,7 @@
void
G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
- jbyte* byte = byte_for(mr.start());
+ volatile jbyte* byte = byte_for(mr.start());
jbyte* last_byte = byte_for(mr.last());
Thread* thr = Thread::current();
if (whole_heap) {
@@ -138,25 +161,35 @@
byte++;
}
} else {
- // Enqueue if necessary.
- if (thr->is_Java_thread()) {
- JavaThread* jt = (JavaThread*)thr;
- while (byte <= last_byte) {
- if (*byte != dirty_card) {
- *byte = dirty_card;
- jt->dirty_card_queue().enqueue(byte);
+ // skip all consecutive young cards
+ for (; byte <= last_byte && *byte == g1_young_gen; byte++);
+
+ if (byte <= last_byte) {
+ OrderAccess::storeload();
+ // Enqueue if necessary.
+ if (thr->is_Java_thread()) {
+ JavaThread* jt = (JavaThread*)thr;
+ for (; byte <= last_byte; byte++) {
+ if (*byte == g1_young_gen) {
+ continue;
+ }
+ if (*byte != dirty_card) {
+ *byte = dirty_card;
+ jt->dirty_card_queue().enqueue(byte);
+ }
}
- byte++;
- }
- } else {
- MutexLockerEx x(Shared_DirtyCardQ_lock,
- Mutex::_no_safepoint_check_flag);
- while (byte <= last_byte) {
- if (*byte != dirty_card) {
- *byte = dirty_card;
- _dcqs.shared_dirty_card_queue()->enqueue(byte);
+ } else {
+ MutexLockerEx x(Shared_DirtyCardQ_lock,
+ Mutex::_no_safepoint_check_flag);
+ for (; byte <= last_byte; byte++) {
+ if (*byte == g1_young_gen) {
+ continue;
+ }
+ if (*byte != dirty_card) {
+ *byte = dirty_card;
+ _dcqs.shared_dirty_card_queue()->enqueue(byte);
+ }
}
- byte++;
}
}
}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp Tue Oct 08 17:35:51 2013 +0200
@@ -38,7 +38,14 @@
// snapshot-at-the-beginning marking.
class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
+protected:
+ enum G1CardValues {
+ g1_young_gen = CT_MR_BS_last_reserved << 1
+ };
+
public:
+ static int g1_young_card_val() { return g1_young_gen; }
+
// Add "pre_val" to a set of objects that may have been disconnected from the
// pre-marking object graph.
static void enqueue(oop pre_val);
@@ -118,6 +125,9 @@
_byte_map[card_index] = val;
}
+ void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
+ void g1_mark_as_young(const MemRegion& mr);
+
bool mark_card_deferred(size_t card_index);
bool is_card_deferred(size_t card_index) {
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp Tue Oct 08 17:35:51 2013 +0200
@@ -80,6 +80,10 @@
void reset() { if (_buf != NULL) _index = _sz; }
+ void enqueue(volatile void* ptr) {
+ enqueue((void*)(ptr));
+ }
+
// Enqueues the given "obj".
void enqueue(void* ptr) {
if (!_active) return;
--- a/hotspot/src/share/vm/opto/graphKit.cpp Fri Oct 04 13:33:02 2013 +0200
+++ b/hotspot/src/share/vm/opto/graphKit.cpp Tue Oct 08 17:35:51 2013 +0200
@@ -3713,7 +3713,8 @@
Node* no_base = __ top();
float likely = PROB_LIKELY(0.999);
float unlikely = PROB_UNLIKELY(0.999);
- Node* zero = __ ConI(0);
+ Node* young_card = __ ConI((jint)G1SATBCardTableModRefBS::g1_young_card_val());
+ Node* dirty_card = __ ConI((jint)CardTableModRefBS::dirty_card_val());
Node* zeroX = __ ConX(0);
// Get the alias_index for raw card-mark memory
@@ -3769,8 +3770,16 @@
// load the original value of the card
Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
- __ if_then(card_val, BoolTest::ne, zero); {
- g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
+ __ if_then(card_val, BoolTest::ne, young_card); {
+ sync_kit(ideal);
+ // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier.
+ insert_mem_bar(Op_MemBarVolatile, oop_store);
+ __ sync_kit(this);
+
+ Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+ __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
+ g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
+ } __ end_if();
} __ end_if();
} __ end_if();
} __ end_if();