8222766: Shenandoah: streamline post-LRB CAS barrier (x86)
authorshade
Mon, 30 Sep 2019 22:39:11 +0200
changeset 58788 6a147ac7a68f
parent 58787 32d39d9525f9
child 58789 a2dfaae89445
8222766: Shenandoah: streamline post-LRB CAS barrier (x86) Reviewed-by: rkennke
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Thu Oct 24 17:24:58 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Mon Sep 30 22:39:11 2019 +0200
@@ -247,54 +247,6 @@
   __ bind(done);
 }
 
-void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
-  assert(ShenandoahCASBarrier, "should be enabled");
-  Label is_null;
-  __ testptr(dst, dst);
-  __ jcc(Assembler::zero, is_null);
-  resolve_forward_pointer_not_null(masm, dst, tmp);
-  __ bind(is_null);
-}
-
-void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
-  assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled");
-  // The below loads the mark word, checks if the lowest two bits are
-  // set, and if so, clear the lowest two bits and copy the result
-  // to dst. Otherwise it leaves dst alone.
-  // Implementing this is surprisingly awkward. I do it here by:
-  // - Inverting the mark word
-  // - Test lowest two bits == 0
-  // - If so, set the lowest two bits
-  // - Invert the result back, and copy to dst
-
-  bool borrow_reg = (tmp == noreg);
-  if (borrow_reg) {
-    // No free registers available. Make one useful.
-    tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx);
-    if (tmp == dst) {
-      tmp = LP64_ONLY(rscratch2) NOT_LP64(rcx);
-    }
-    __ push(tmp);
-  }
-
-  assert_different_registers(dst, tmp);
-
-  Label done;
-  __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
-  __ notptr(tmp);
-  __ testb(tmp, markWord::marked_value);
-  __ jccb(Assembler::notZero, done);
-  __ orptr(tmp, markWord::marked_value);
-  __ notptr(tmp);
-  __ mov(dst, tmp);
-  __ bind(done);
-
-  if (borrow_reg) {
-    __ pop(tmp);
-  }
-}
-
-
 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 
@@ -605,8 +557,9 @@
                                                 bool exchange, Register tmp1, Register tmp2) {
   assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
   assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
+  assert_different_registers(oldval, newval, tmp1, tmp2);
 
-  Label retry, done;
+  Label L_success, L_failure;
 
   // Remember oldval for retry logic below
 #ifdef _LP64
@@ -618,8 +571,10 @@
     __ movptr(tmp1, oldval);
   }
 
-  // Step 1. Try to CAS with given arguments. If successful, then we are done,
-  // and can safely return.
+  // Step 1. Fast-path.
+  //
+  // Try to CAS with given arguments. If successful, then we are done.
+
   if (os::is_MP()) __ lock();
 #ifdef _LP64
   if (UseCompressedOops) {
@@ -629,21 +584,32 @@
   {
     __ cmpxchgptr(newval, addr);
   }
-  __ jcc(Assembler::equal, done, true);
+  __ jcc(Assembler::equal, L_success);
 
   // Step 2. CAS had failed. This may be a false negative.
   //
   // The trouble comes when we compare the to-space pointer with the from-space
-  // pointer to the same object. To resolve this, it will suffice to resolve both
-  // oldval and the value from memory -- this will give both to-space pointers.
+  // pointer to the same object. To resolve this, it will suffice to resolve
+  // the value from memory -- this will give both to-space pointers.
   // If they mismatch, then it was a legitimate failure.
   //
+  // Before reaching to resolve sequence, see if we can avoid the whole shebang
+  // with filters.
+
+  // Filter: when offending in-memory value is NULL, the failure is definitely legitimate
+  __ testptr(oldval, oldval);
+  __ jcc(Assembler::zero, L_failure);
+
+  // Filter: when heap is stable, the failure is definitely legitimate
 #ifdef _LP64
-  if (UseCompressedOops) {
-    __ decode_heap_oop(tmp1);
-  }
+  const Register thread = r15_thread;
+#else
+  const Register thread = tmp2;
+  __ get_thread(thread);
 #endif
-  resolve_forward_pointer(masm, tmp1);
+  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+  __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
+  __ jcc(Assembler::zero, L_failure);
 
 #ifdef _LP64
   if (UseCompressedOops) {
@@ -654,18 +620,70 @@
   {
     __ movptr(tmp2, oldval);
   }
-  resolve_forward_pointer(masm, tmp2);
+
+  // Decode offending in-memory value.
+  // Test if-forwarded
+  __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value);
+  __ jcc(Assembler::noParity, L_failure);  // When odd number of bits, then not forwarded
+  __ jcc(Assembler::zero, L_failure);      // When it is 00, then also not forwarded
+
+  // Load and mask forwarding pointer
+  __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));
+  __ shrptr(tmp2, 2);
+  __ shlptr(tmp2, 2);
 
+#ifdef _LP64
+  if (UseCompressedOops) {
+    __ decode_heap_oop(tmp1); // decode for comparison
+  }
+#endif
+
+  // Now we have the forwarded offender in tmp2.
+  // Compare and if they don't match, we have legitimate failure
   __ cmpptr(tmp1, tmp2);
-  __ jcc(Assembler::notEqual, done, true);
+  __ jcc(Assembler::notEqual, L_failure);
+
+  // Step 3. Need to fix the memory ptr before continuing.
+  //
+  // At this point, we have from-space oldval in the register, and its to-space
+  // address is in tmp2. Let's try to update it into memory. We don't care if it
+  // succeeds or not. If it does, then the retrying CAS would see it and succeed.
+  // If this fixup fails, this means somebody else beat us to it, and necessarily
+  // with to-space ptr store. We still have to do the retry, because the GC might
+  // have updated the reference for us.
 
-  // Step 3. Try to CAS again with resolved to-space pointers.
+#ifdef _LP64
+  if (UseCompressedOops) {
+    __ encode_heap_oop(tmp2); // previously decoded at step 2.
+  }
+#endif
+
+  if (os::is_MP()) __ lock();
+#ifdef _LP64
+  if (UseCompressedOops) {
+    __ cmpxchgl(tmp2, addr);
+  } else
+#endif
+  {
+    __ cmpxchgptr(tmp2, addr);
+  }
+
+  // Step 4. Try to CAS again.
   //
-  // Corner case: it may happen that somebody stored the from-space pointer
-  // to memory while we were preparing for retry. Therefore, we can fail again
-  // on retry, and so need to do this in loop, always resolving the failure
-  // witness.
-  __ bind(retry);
+  // This is guaranteed not to have false negatives, because oldval is definitely
+  // to-space, and memory pointer is to-space as well. Nothing is able to store
+  // from-space ptr into memory anymore. Make sure oldval is restored, after being
+  // garbled during retries.
+  //
+#ifdef _LP64
+  if (UseCompressedOops) {
+    __ movl(oldval, tmp2);
+  } else
+#endif
+  {
+    __ movptr(oldval, tmp2);
+  }
+
   if (os::is_MP()) __ lock();
 #ifdef _LP64
   if (UseCompressedOops) {
@@ -675,41 +693,28 @@
   {
     __ cmpxchgptr(newval, addr);
   }
-  __ jcc(Assembler::equal, done, true);
+  if (!exchange) {
+    __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump
+  }
 
-#ifdef _LP64
-  if (UseCompressedOops) {
-    __ movl(tmp2, oldval);
-    __ decode_heap_oop(tmp2);
-  } else
-#endif
-  {
-    __ movptr(tmp2, oldval);
-  }
-  resolve_forward_pointer(masm, tmp2);
-
-  __ cmpptr(tmp1, tmp2);
-  __ jcc(Assembler::equal, retry, true);
+  // Step 5. If we need a boolean result out of CAS, set the flag appropriately.
+  // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.
+  // Otherwise, failure witness for CAE is in oldval on all paths, and we can return.
 
-  // Step 4. If we need a boolean result out of CAS, check the flag again,
-  // and promote the result. Note that we handle the flag from both the CAS
-  // itself and from the retry loop.
-  __ bind(done);
-  if (!exchange) {
+  if (exchange) {
+    __ bind(L_failure);
+    __ bind(L_success);
+  } else {
     assert(res != NULL, "need result register");
-#ifdef _LP64
-    __ setb(Assembler::equal, res);
-    __ movzbl(res, res);
-#else
-    // Need something else to clean the result, because some registers
-    // do not have byte encoding that movzbl wants. Cannot do the xor first,
-    // because it modifies the flags.
-    Label res_non_zero;
+
+    Label exit;
+    __ bind(L_failure);
+    __ xorptr(res, res);
+    __ jmpb(exit);
+
+    __ bind(L_success);
     __ movptr(res, 1);
-    __ jcc(Assembler::equal, res_non_zero, true);
-    __ xorptr(res, res);
-    __ bind(res_non_zero);
-#endif
+    __ bind(exit);
   }
 }
 
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Thu Oct 24 17:24:58 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Mon Sep 30 22:39:11 2019 +0200
@@ -55,9 +55,6 @@
                                     bool tosca_live,
                                     bool expand_call);
 
-  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
-  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
-
   void load_reference_barrier_not_null(MacroAssembler* masm, Register dst);
 
   void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp);