src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp
changeset 51846 cc1a4a267798
parent 51845 f5daffd7ec7a
child 52351 0ecb4e520110
equal deleted inserted replaced
51845:f5daffd7ec7a 51846:cc1a4a267798
    22  *
    22  *
    23  */
    23  */
    24 
    24 
    25 #include "precompiled.hpp"
    25 #include "precompiled.hpp"
    26 #include "gc/shared/barrierSetAssembler.hpp"
    26 #include "gc/shared/barrierSetAssembler.hpp"
       
    27 #include "gc/shared/collectedHeap.hpp"
       
    28 #include "runtime/thread.hpp"
    27 
    29 
    28 #define __ masm->
    30 #define __ masm->
    29 
    31 
    30 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
    32 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
    31                                   Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
    33                                   Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) {
   164 
   166 
   165 void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
   167 void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
   166                                      Register obj1, Register obj2) {
   168                                      Register obj1, Register obj2) {
   167   __ cmp(obj1, obj2);
   169   __ cmp(obj1, obj2);
   168 }
   170 }
       
   171 
       
   172 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
       
   173 void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, Register tmp2,
       
   174                                  RegisterOrConstant size_expression, Label& slow_case) {
       
   175   if (!Universe::heap()->supports_inline_contig_alloc()) {
       
   176     __ b(slow_case);
       
   177     return;
       
   178   }
       
   179 
       
   180   CollectedHeap* ch = Universe::heap();
       
   181 
       
   182   const Register top_addr = tmp1;
       
   183   const Register heap_end = tmp2;
       
   184 
       
   185   if (size_expression.is_register()) {
       
   186     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
       
   187   } else {
       
   188     assert_different_registers(obj, obj_end, top_addr, heap_end);
       
   189   }
       
   190 
       
   191   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
       
   192   if (load_const) {
       
   193     __ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
       
   194   } else {
       
   195     __ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
       
   196   }
       
   197   // Calculate new heap_top by adding the size of the object
       
   198   Label retry;
       
   199   __ bind(retry);
       
   200 
       
   201 #ifdef AARCH64
       
   202   __ ldxr(obj, top_addr);
       
   203 #else
       
   204   __ ldr(obj, Address(top_addr));
       
   205 #endif // AARCH64
       
   206 
       
   207   __ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
       
   208   __ add_rc(obj_end, obj, size_expression);
       
   209   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
       
   210   __ cmp(obj_end, obj);
       
   211   __ b(slow_case, lo);
       
   212   // Update heap_top if allocation succeeded
       
   213   __ cmp(obj_end, heap_end);
       
   214   __ b(slow_case, hi);
       
   215 
       
   216 #ifdef AARCH64
       
   217   __ stxr(heap_end/*scratched*/, obj_end, top_addr);
       
   218   __ cbnz_w(heap_end, retry);
       
   219 #else
       
   220   __ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
       
   221   __ b(retry, ne);
       
   222 #endif // AARCH64
       
   223 
       
   224   incr_allocated_bytes(masm, size_expression, tmp1);
       
   225 }
       
   226 
       
   227 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
       
   228 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1,
       
   229                                  RegisterOrConstant size_expression, Label& slow_case) {
       
   230   const Register tlab_end = tmp1;
       
   231   assert_different_registers(obj, obj_end, tlab_end);
       
   232 
       
   233   __ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
       
   234   __ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
       
   235   __ add_rc(obj_end, obj, size_expression);
       
   236   __ cmp(obj_end, tlab_end);
       
   237   __ b(slow_case, hi);
       
   238   __ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
       
   239 }
       
   240 
       
   241 void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrConstant size_in_bytes, Register tmp) {
       
   242 #ifdef AARCH64
       
   243   __ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
   244   __ add_rc(tmp, tmp, size_in_bytes);
       
   245   __ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
       
   246 #else
       
   247   // Bump total bytes allocated by this thread
       
   248   Label done;
       
   249 
       
   250   // Borrow the Rthread for alloc counter
       
   251   Register Ralloc = Rthread;
       
   252   __ add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
       
   253   __ ldr(tmp, Address(Ralloc));
       
   254   __ adds(tmp, tmp, size_in_bytes);
       
   255   __ str(tmp, Address(Ralloc), cc);
       
   256   __ b(done, cc);
       
   257 
       
   258   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
       
   259   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
       
   260   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
       
   261   Register low, high;
       
   262   // Select ether R0/R1 or R2/R3
       
   263 
       
   264   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
       
   265     low = R2;
       
   266     high  = R3;
       
   267   } else {
       
   268     low = R0;
       
   269     high  = R1;
       
   270   }
       
   271   __ push(RegisterSet(low, high));
       
   272 
       
   273   __ ldrd(low, Address(Ralloc));
       
   274   __ adds(low, low, size_in_bytes);
       
   275   __ adc(high, high, 0);
       
   276   __ strd(low, Address(Ralloc));
       
   277 
       
   278   __ pop(RegisterSet(low, high));
       
   279 
       
   280   __ bind(done);
       
   281 
       
   282   // Unborrow the Rthread
       
   283   __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
       
   284 #endif // AARCH64
       
   285 }