--- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp Mon Sep 24 16:44:24 2018 +0300
@@ -90,7 +90,6 @@
tlab_allocate(obj, obj_end, tmp1, size_expression, slow_case);
} else {
eden_allocate(obj, obj_end, tmp1, tmp2, size_expression, slow_case);
- incr_allocated_bytes(size_expression, tmp1);
}
}
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp Mon Sep 24 16:44:24 2018 +0300
@@ -569,7 +569,6 @@
__ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
__ eden_allocate(result, obj_end, tmp1, tmp2, obj_size, slow_case); // initializes result and obj_end
- __ incr_allocated_bytes(obj_size, tmp2);
__ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2,
instanceOopDesc::header_size() * HeapWordSize, -1,
/* is_tlab_allocated */ false);
@@ -658,7 +657,6 @@
// eden_allocate destroys tmp2, so reload header_size after allocation
// eden_allocate initializes result and obj_end
__ eden_allocate(result, obj_end, tmp1, tmp2, arr_size, slow_case);
- __ incr_allocated_bytes(arr_size, tmp2);
__ ldrb(tmp2, Address(klass, in_bytes(Klass::layout_helper_offset()) +
Klass::_lh_header_size_shift / BitsPerByte));
__ initialize_object(result, obj_end, klass, length, tmp1, tmp2, tmp2, -1, /* is_tlab_allocated */ false);
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp Mon Sep 24 16:44:24 2018 +0300
@@ -24,6 +24,8 @@
#include "precompiled.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "runtime/thread.hpp"
#define __ masm->
@@ -166,3 +168,118 @@
Register obj1, Register obj2) {
__ cmp(obj1, obj2);
}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, Register tmp2,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
+ __ b(slow_case);
+ return;
+ }
+
+ CollectedHeap* ch = Universe::heap();
+
+ const Register top_addr = tmp1;
+ const Register heap_end = tmp2;
+
+ if (size_expression.is_register()) {
+ assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
+ } else {
+ assert_different_registers(obj, obj_end, top_addr, heap_end);
+ }
+
+ bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
+ if (load_const) {
+ __ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
+ } else {
+ __ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
+ }
+ // Calculate new heap_top by adding the size of the object
+ Label retry;
+ __ bind(retry);
+
+#ifdef AARCH64
+ __ ldxr(obj, top_addr);
+#else
+ __ ldr(obj, Address(top_addr));
+#endif // AARCH64
+
+ __ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
+ __ add_rc(obj_end, obj, size_expression);
+ // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
+ __ cmp(obj_end, obj);
+ __ b(slow_case, lo);
+ // Update heap_top if allocation succeeded
+ __ cmp(obj_end, heap_end);
+ __ b(slow_case, hi);
+
+#ifdef AARCH64
+ __ stxr(heap_end/*scratched*/, obj_end, top_addr);
+ __ cbnz_w(heap_end, retry);
+#else
+ __ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
+ __ b(retry, ne);
+#endif // AARCH64
+
+ incr_allocated_bytes(masm, size_expression, tmp1);
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1,
+ RegisterOrConstant size_expression, Label& slow_case) {
+ const Register tlab_end = tmp1;
+ assert_different_registers(obj, obj_end, tlab_end);
+
+ __ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
+ __ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
+ __ add_rc(obj_end, obj, size_expression);
+ __ cmp(obj_end, tlab_end);
+ __ b(slow_case, hi);
+ __ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+}
+
+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrConstant size_in_bytes, Register tmp) {
+#ifdef AARCH64
+ __ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+ __ add_rc(tmp, tmp, size_in_bytes);
+ __ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+#else
+ // Bump total bytes allocated by this thread
+ Label done;
+
+ // Borrow the Rthread for alloc counter
+ Register Ralloc = Rthread;
+ __ add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
+ __ ldr(tmp, Address(Ralloc));
+ __ adds(tmp, tmp, size_in_bytes);
+ __ str(tmp, Address(Ralloc), cc);
+ __ b(done, cc);
+
+ // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
+ // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
+ // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
+ Register low, high;
+ // Select ether R0/R1 or R2/R3
+
+ if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
+ low = R2;
+ high = R3;
+ } else {
+ low = R0;
+ high = R1;
+ }
+ __ push(RegisterSet(low, high));
+
+ __ ldrd(low, Address(Ralloc));
+ __ adds(low, low, size_in_bytes);
+ __ adc(high, high, 0);
+ __ strd(low, Address(Ralloc));
+
+ __ pop(RegisterSet(low, high));
+
+ __ bind(done);
+
+ // Unborrow the Rthread
+ __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
+#endif // AARCH64
+}
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp Mon Sep 24 16:44:24 2018 +0300
@@ -30,6 +30,12 @@
#include "oops/access.hpp"
class BarrierSetAssembler: public CHeapObj<mtGC> {
+private:
+ void incr_allocated_bytes(MacroAssembler* masm,
+ RegisterOrConstant size_in_bytes,
+ Register tmp
+);
+
public:
virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register addr, Register count, int callee_saved_regs) {}
@@ -44,6 +50,23 @@
virtual void obj_equals(MacroAssembler* masm,
Register obj1, Register obj2);
+ virtual void eden_allocate(MacroAssembler* masm,
+ Register obj, // result: pointer to object after successful allocation
+ Register obj_end, // result: pointer to end of object after successful allocation
+ Register tmp1, // temp register
+ Register tmp2, // temp register
+ RegisterOrConstant size_expression, // size of object
+ Label& slow_case // continuation point if fast allocation fails
+ );
+
+ virtual void tlab_allocate(MacroAssembler* masm,
+ Register obj, // result: pointer to object after successful allocation
+ Register obj_end, // result: pointer to end of object after successful allocation
+ Register tmp1, // temp register
+ RegisterOrConstant size_expression, // size of object
+ Label& slow_case // continuation point if fast allocation fails
+ );
+
virtual void barrier_stubs_init() {}
};
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp Mon Sep 24 16:44:24 2018 +0300
@@ -1256,68 +1256,15 @@
// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
RegisterOrConstant size_expression, Label& slow_case) {
- if (!Universe::heap()->supports_inline_contig_alloc()) {
- b(slow_case);
- return;
- }
-
- CollectedHeap* ch = Universe::heap();
-
- const Register top_addr = tmp1;
- const Register heap_end = tmp2;
-
- if (size_expression.is_register()) {
- assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
- } else {
- assert_different_registers(obj, obj_end, top_addr, heap_end);
- }
-
- bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
- if (load_const) {
- mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
- } else {
- ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
- }
- // Calculate new heap_top by adding the size of the object
- Label retry;
- bind(retry);
-
-#ifdef AARCH64
- ldxr(obj, top_addr);
-#else
- ldr(obj, Address(top_addr));
-#endif // AARCH64
-
- ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
- add_rc(obj_end, obj, size_expression);
- // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
- cmp(obj_end, obj);
- b(slow_case, lo);
- // Update heap_top if allocation succeeded
- cmp(obj_end, heap_end);
- b(slow_case, hi);
-
-#ifdef AARCH64
- stxr(heap_end/*scratched*/, obj_end, top_addr);
- cbnz_w(heap_end, retry);
-#else
- atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
- b(retry, ne);
-#endif // AARCH64
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->eden_allocate(this, obj, obj_end, tmp1, tmp2, size_expression, slow_case);
}
// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
RegisterOrConstant size_expression, Label& slow_case) {
- const Register tlab_end = tmp1;
- assert_different_registers(obj, obj_end, tlab_end);
-
- ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
- ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
- add_rc(obj_end, obj, size_expression);
- cmp(obj_end, tlab_end);
- b(slow_case, hi);
- str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case);
}
// Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
@@ -1363,52 +1310,6 @@
#endif // AARCH64
}
-void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
-#ifdef AARCH64
- ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
- add_rc(tmp, tmp, size_in_bytes);
- str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-#else
- // Bump total bytes allocated by this thread
- Label done;
-
- // Borrow the Rthread for alloc counter
- Register Ralloc = Rthread;
- add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
- ldr(tmp, Address(Ralloc));
- adds(tmp, tmp, size_in_bytes);
- str(tmp, Address(Ralloc), cc);
- b(done, cc);
-
- // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
- // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
- // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
- Register low, high;
- // Select ether R0/R1 or R2/R3
-
- if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
- low = R2;
- high = R3;
- } else {
- low = R0;
- high = R1;
- }
- push(RegisterSet(low, high));
-
- ldrd(low, Address(Ralloc));
- adds(low, low, size_in_bytes);
- adc(high, high, 0);
- strd(low, Address(Ralloc));
-
- pop(RegisterSet(low, high));
-
- bind(done);
-
- // Unborrow the Rthread
- sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
-#endif // AARCH64
-}
-
void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
// Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
if (UseStackBanging) {
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp Mon Sep 24 16:44:24 2018 +0300
@@ -361,8 +361,6 @@
void zero_memory(Register start, Register end, Register tmp);
- void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp);
-
static bool needs_explicit_null_check(intptr_t offset);
void arm_stack_overflow_check(int frame_size_in_bytes, Register tmp);
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp Mon Sep 24 16:39:02 2018 +0300
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp Mon Sep 24 16:44:24 2018 +0300
@@ -4502,12 +4502,7 @@
const Register Rtlab_end = R2_tmp;
assert_different_registers(Robj, Rsize, Rklass, Rtlab_top, Rtlab_end);
- __ ldr(Robj, Address(Rthread, JavaThread::tlab_top_offset()));
- __ ldr(Rtlab_end, Address(Rthread, in_bytes(JavaThread::tlab_end_offset())));
- __ add(Rtlab_top, Robj, Rsize);
- __ cmp(Rtlab_top, Rtlab_end);
- __ b(slow_case, hi);
- __ str(Rtlab_top, Address(Rthread, JavaThread::tlab_top_offset()));
+ __ tlab_allocate(Robj, Rtlab_top, Rtlab_end, Rsize, slow_case);
if (ZeroTLAB) {
// the fields have been already cleared
__ b(initialize_header);
@@ -4523,34 +4518,7 @@
const Register Rheap_end = Rtemp;
assert_different_registers(Robj, Rklass, Rsize, Rheap_top_addr, Rheap_top, Rheap_end, LR);
- // heap_end now (re)loaded in the loop since also used as a scratch register in the CAS
- __ ldr_literal(Rheap_top_addr, Lheap_top_addr);
-
- Label retry;
- __ bind(retry);
-
-#ifdef AARCH64
- __ ldxr(Robj, Rheap_top_addr);
-#else
- __ ldr(Robj, Address(Rheap_top_addr));
-#endif // AARCH64
-
- __ ldr(Rheap_end, Address(Rheap_top_addr, (intptr_t)Universe::heap()->end_addr()-(intptr_t)Universe::heap()->top_addr()));
- __ add(Rheap_top, Robj, Rsize);
- __ cmp(Rheap_top, Rheap_end);
- __ b(slow_case, hi);
-
- // Update heap top atomically.
- // If someone beats us on the allocation, try again, otherwise continue.
-#ifdef AARCH64
- __ stxr(Rtemp2, Rheap_top, Rheap_top_addr);
- __ cbnz_w(Rtemp2, retry);
-#else
- __ atomic_cas_bool(Robj, Rheap_top, Rheap_top_addr, 0, Rheap_end/*scratched*/);
- __ b(retry, ne);
-#endif // AARCH64
-
- __ incr_allocated_bytes(Rsize, Rtemp);
+ __ eden_allocate(Robj, Rheap_top, Rheap_top_addr, Rheap_end, Rsize, slow_case);
}
}