8210466: Modularize allocations in assembler
authoravoitylov
Mon, 24 Sep 2018 16:44:24 +0300
changeset 51846 cc1a4a267798
parent 51845 f5daffd7ec7a
child 51847 34e2180a6d51
8210466: Modularize allocations in assembler Reviewed-by: rkennke, dsamersoff
src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp
src/hotspot/cpu/arm/c1_Runtime1_arm.cpp
src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp
src/hotspot/cpu/arm/macroAssembler_arm.cpp
src/hotspot/cpu/arm/macroAssembler_arm.hpp
src/hotspot/cpu/arm/templateTable_arm.cpp
--- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp	Mon Sep 24 16:44:24 2018 +0300
@@ -90,7 +90,6 @@
     tlab_allocate(obj, obj_end, tmp1, size_expression, slow_case);
   } else {
     eden_allocate(obj, obj_end, tmp1, tmp2, size_expression, slow_case);
-    incr_allocated_bytes(size_expression, tmp1);
   }
 }
 
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Mon Sep 24 16:44:24 2018 +0300
@@ -569,7 +569,6 @@
 
           __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
           __ eden_allocate(result, obj_end, tmp1, tmp2, obj_size, slow_case);        // initializes result and obj_end
-          __ incr_allocated_bytes(obj_size, tmp2);
           __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2,
                                instanceOopDesc::header_size() * HeapWordSize, -1,
                                /* is_tlab_allocated */ false);
@@ -658,7 +657,6 @@
           // eden_allocate destroys tmp2, so reload header_size after allocation
           // eden_allocate initializes result and obj_end
           __ eden_allocate(result, obj_end, tmp1, tmp2, arr_size, slow_case);
-          __ incr_allocated_bytes(arr_size, tmp2);
           __ ldrb(tmp2, Address(klass, in_bytes(Klass::layout_helper_offset()) +
                                        Klass::_lh_header_size_shift / BitsPerByte));
           __ initialize_object(result, obj_end, klass, length, tmp1, tmp2, tmp2, -1, /* is_tlab_allocated */ false);
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp	Mon Sep 24 16:44:24 2018 +0300
@@ -24,6 +24,8 @@
 
 #include "precompiled.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "runtime/thread.hpp"
 
 #define __ masm->
 
@@ -166,3 +168,118 @@
                                      Register obj1, Register obj2) {
   __ cmp(obj1, obj2);
 }
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, Register tmp2,
+                                 RegisterOrConstant size_expression, Label& slow_case) {
+  if (!Universe::heap()->supports_inline_contig_alloc()) {
+    __ b(slow_case);
+    return;
+  }
+
+  CollectedHeap* ch = Universe::heap();
+
+  const Register top_addr = tmp1;
+  const Register heap_end = tmp2;
+
+  if (size_expression.is_register()) {
+    assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
+  } else {
+    assert_different_registers(obj, obj_end, top_addr, heap_end);
+  }
+
+  bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
+  if (load_const) {
+    __ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
+  } else {
+    __ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
+  }
+  // Calculate new heap_top by adding the size of the object
+  Label retry;
+  __ bind(retry);
+
+#ifdef AARCH64
+  __ ldxr(obj, top_addr);
+#else
+  __ ldr(obj, Address(top_addr));
+#endif // AARCH64
+
+  __ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
+  __ add_rc(obj_end, obj, size_expression);
+  // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
+  __ cmp(obj_end, obj);
+  __ b(slow_case, lo);
+  // Update heap_top if allocation succeeded
+  __ cmp(obj_end, heap_end);
+  __ b(slow_case, hi);
+
+#ifdef AARCH64
+  __ stxr(heap_end/*scratched*/, obj_end, top_addr);
+  __ cbnz_w(heap_end, retry);
+#else
+  __ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
+  __ b(retry, ne);
+#endif // AARCH64
+
+  incr_allocated_bytes(masm, size_expression, tmp1);
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1,
+                                 RegisterOrConstant size_expression, Label& slow_case) {
+  const Register tlab_end = tmp1;
+  assert_different_registers(obj, obj_end, tlab_end);
+
+  __ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
+  __ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
+  __ add_rc(obj_end, obj, size_expression);
+  __ cmp(obj_end, tlab_end);
+  __ b(slow_case, hi);
+  __ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+}
+
+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrConstant size_in_bytes, Register tmp) {
+#ifdef AARCH64
+  __ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+  __ add_rc(tmp, tmp, size_in_bytes);
+  __ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+#else
+  // Bump total bytes allocated by this thread
+  Label done;
+
+  // Borrow the Rthread for alloc counter
+  Register Ralloc = Rthread;
+  __ add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
+  __ ldr(tmp, Address(Ralloc));
+  __ adds(tmp, tmp, size_in_bytes);
+  __ str(tmp, Address(Ralloc), cc);
+  __ b(done, cc);
+
+  // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
+  // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
+  // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
+  Register low, high;
+  // Select ether R0/R1 or R2/R3
+
+  if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
+    low = R2;
+    high  = R3;
+  } else {
+    low = R0;
+    high  = R1;
+  }
+  __ push(RegisterSet(low, high));
+
+  __ ldrd(low, Address(Ralloc));
+  __ adds(low, low, size_in_bytes);
+  __ adc(high, high, 0);
+  __ strd(low, Address(Ralloc));
+
+  __ pop(RegisterSet(low, high));
+
+  __ bind(done);
+
+  // Unborrow the Rthread
+  __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
+#endif // AARCH64
+}
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp	Mon Sep 24 16:44:24 2018 +0300
@@ -30,6 +30,12 @@
 #include "oops/access.hpp"
 
 class BarrierSetAssembler: public CHeapObj<mtGC> {
+private:
+  void incr_allocated_bytes(MacroAssembler* masm,
+    RegisterOrConstant size_in_bytes,
+    Register           tmp
+);
+
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register addr, Register count, int callee_saved_regs) {}
@@ -44,6 +50,23 @@
   virtual void obj_equals(MacroAssembler* masm,
                           Register obj1, Register obj2);
 
+  virtual void eden_allocate(MacroAssembler* masm,
+    Register           obj,              // result: pointer to object after successful allocation
+    Register           obj_end,          // result: pointer to end of object after successful allocation
+    Register           tmp1,             // temp register
+    Register           tmp2,             // temp register
+    RegisterOrConstant size_expression,  // size of object
+    Label&             slow_case         // continuation point if fast allocation fails
+  );
+
+  virtual void tlab_allocate(MacroAssembler* masm,
+    Register           obj,              // result: pointer to object after successful allocation
+    Register           obj_end,          // result: pointer to end of object after successful allocation
+    Register           tmp1,             // temp register
+    RegisterOrConstant size_expression,  // size of object
+    Label&             slow_case         // continuation point if fast allocation fails
+  );
+
   virtual void barrier_stubs_init() {}
 };
 
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Mon Sep 24 16:44:24 2018 +0300
@@ -1256,68 +1256,15 @@
 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
                                  RegisterOrConstant size_expression, Label& slow_case) {
-  if (!Universe::heap()->supports_inline_contig_alloc()) {
-    b(slow_case);
-    return;
-  }
-
-  CollectedHeap* ch = Universe::heap();
-
-  const Register top_addr = tmp1;
-  const Register heap_end = tmp2;
-
-  if (size_expression.is_register()) {
-    assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
-  } else {
-    assert_different_registers(obj, obj_end, top_addr, heap_end);
-  }
-
-  bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
-  if (load_const) {
-    mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
-  } else {
-    ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
-  }
-  // Calculate new heap_top by adding the size of the object
-  Label retry;
-  bind(retry);
-
-#ifdef AARCH64
-  ldxr(obj, top_addr);
-#else
-  ldr(obj, Address(top_addr));
-#endif // AARCH64
-
-  ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
-  add_rc(obj_end, obj, size_expression);
-  // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
-  cmp(obj_end, obj);
-  b(slow_case, lo);
-  // Update heap_top if allocation succeeded
-  cmp(obj_end, heap_end);
-  b(slow_case, hi);
-
-#ifdef AARCH64
-  stxr(heap_end/*scratched*/, obj_end, top_addr);
-  cbnz_w(heap_end, retry);
-#else
-  atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
-  b(retry, ne);
-#endif // AARCH64
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->eden_allocate(this, obj, obj_end, tmp1, tmp2, size_expression, slow_case);
 }
 
 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
                                  RegisterOrConstant size_expression, Label& slow_case) {
-  const Register tlab_end = tmp1;
-  assert_different_registers(obj, obj_end, tlab_end);
-
-  ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
-  ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
-  add_rc(obj_end, obj, size_expression);
-  cmp(obj_end, tlab_end);
-  b(slow_case, hi);
-  str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case);
 }
 
 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
@@ -1363,52 +1310,6 @@
 #endif // AARCH64
 }
 
-void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
-#ifdef AARCH64
-  ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-  add_rc(tmp, tmp, size_in_bytes);
-  str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-#else
-  // Bump total bytes allocated by this thread
-  Label done;
-
-  // Borrow the Rthread for alloc counter
-  Register Ralloc = Rthread;
-  add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
-  ldr(tmp, Address(Ralloc));
-  adds(tmp, tmp, size_in_bytes);
-  str(tmp, Address(Ralloc), cc);
-  b(done, cc);
-
-  // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
-  // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
-  // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
-  Register low, high;
-  // Select ether R0/R1 or R2/R3
-
-  if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
-    low = R2;
-    high  = R3;
-  } else {
-    low = R0;
-    high  = R1;
-  }
-  push(RegisterSet(low, high));
-
-  ldrd(low, Address(Ralloc));
-  adds(low, low, size_in_bytes);
-  adc(high, high, 0);
-  strd(low, Address(Ralloc));
-
-  pop(RegisterSet(low, high));
-
-  bind(done);
-
-  // Unborrow the Rthread
-  sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
-#endif // AARCH64
-}
-
 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
   if (UseStackBanging) {
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp	Mon Sep 24 16:44:24 2018 +0300
@@ -361,8 +361,6 @@
 
   void zero_memory(Register start, Register end, Register tmp);
 
-  void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp);
-
   static bool needs_explicit_null_check(intptr_t offset);
 
   void arm_stack_overflow_check(int frame_size_in_bytes, Register tmp);
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp	Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp	Mon Sep 24 16:44:24 2018 +0300
@@ -4502,12 +4502,7 @@
     const Register Rtlab_end = R2_tmp;
     assert_different_registers(Robj, Rsize, Rklass, Rtlab_top, Rtlab_end);
 
-    __ ldr(Robj, Address(Rthread, JavaThread::tlab_top_offset()));
-    __ ldr(Rtlab_end, Address(Rthread, in_bytes(JavaThread::tlab_end_offset())));
-    __ add(Rtlab_top, Robj, Rsize);
-    __ cmp(Rtlab_top, Rtlab_end);
-    __ b(slow_case, hi);
-    __ str(Rtlab_top, Address(Rthread, JavaThread::tlab_top_offset()));
+    __ tlab_allocate(Robj, Rtlab_top, Rtlab_end, Rsize, slow_case);
     if (ZeroTLAB) {
       // the fields have been already cleared
       __ b(initialize_header);
@@ -4523,34 +4518,7 @@
       const Register Rheap_end = Rtemp;
       assert_different_registers(Robj, Rklass, Rsize, Rheap_top_addr, Rheap_top, Rheap_end, LR);
 
-      // heap_end now (re)loaded in the loop since also used as a scratch register in the CAS
-      __ ldr_literal(Rheap_top_addr, Lheap_top_addr);
-
-      Label retry;
-      __ bind(retry);
-
-#ifdef AARCH64
-      __ ldxr(Robj, Rheap_top_addr);
-#else
-      __ ldr(Robj, Address(Rheap_top_addr));
-#endif // AARCH64
-
-      __ ldr(Rheap_end, Address(Rheap_top_addr, (intptr_t)Universe::heap()->end_addr()-(intptr_t)Universe::heap()->top_addr()));
-      __ add(Rheap_top, Robj, Rsize);
-      __ cmp(Rheap_top, Rheap_end);
-      __ b(slow_case, hi);
-
-      // Update heap top atomically.
-      // If someone beats us on the allocation, try again, otherwise continue.
-#ifdef AARCH64
-      __ stxr(Rtemp2, Rheap_top, Rheap_top_addr);
-      __ cbnz_w(Rtemp2, retry);
-#else
-      __ atomic_cas_bool(Robj, Rheap_top, Rheap_top_addr, 0, Rheap_end/*scratched*/);
-      __ b(retry, ne);
-#endif // AARCH64
-
-      __ incr_allocated_bytes(Rsize, Rtemp);
+      __ eden_allocate(Robj, Rheap_top, Rheap_top_addr, Rheap_end, Rsize, slow_case);
     }
   }