hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
changeset 360 21d113ecbf6a
parent 1 489c9b5090e2
child 371 1aacedc9db7c
equal deleted inserted replaced
357:f4edb0d9f109 360:21d113ecbf6a
  1777     delayed()->nop();
  1777     delayed()->nop();
  1778   }
  1778   }
  1779 
  1779 
  1780   // Check the klassOop of this object for being in the right area of memory.
  1780   // Check the klassOop of this object for being in the right area of memory.
  1781   // Cannot do the load in the delay above slot in case O0 is null
  1781   // Cannot do the load in the delay above slot in case O0 is null
  1782   ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj);
  1782   load_klass(O0_obj, O0_obj);
  1783   // assert((klass & klass_mask) == klass_bits);
  1783   // assert((klass & klass_mask) == klass_bits);
  1784   if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
  1784   if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
  1785     set(Universe::verify_klass_mask(), O2_mask);
  1785     set(Universe::verify_klass_mask(), O2_mask);
  1786   if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
  1786   if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
  1787     set(Universe::verify_klass_bits(), O3_bits);
  1787     set(Universe::verify_klass_bits(), O3_bits);
  1788   and3(O0_obj, O2_mask, O4_temp);
  1788   and3(O0_obj, O2_mask, O4_temp);
  1789   cmp(O4_temp, O3_bits);
  1789   cmp(O4_temp, O3_bits);
  1790   brx(notEqual, false, pn, fail);
  1790   brx(notEqual, false, pn, fail);
       
  1791   delayed()->nop();
  1791   // Check the klass's klass
  1792   // Check the klass's klass
  1792   delayed()->ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj);
  1793   load_klass(O0_obj, O0_obj);
  1793   and3(O0_obj, O2_mask, O4_temp);
  1794   and3(O0_obj, O2_mask, O4_temp);
  1794   cmp(O4_temp, O3_bits);
  1795   cmp(O4_temp, O3_bits);
  1795   brx(notEqual, false, pn, fail);
  1796   brx(notEqual, false, pn, fail);
  1796   delayed()->wrccr( O5_save_flags ); // Restore CCR's
  1797   delayed()->wrccr( O5_save_flags ); // Restore CCR's
  1797 
  1798 
  2586   // pointers to allow age to be placed into low bits
  2587   // pointers to allow age to be placed into low bits
  2587   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
  2588   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
  2588   and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
  2589   and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
  2589   cmp(temp_reg, markOopDesc::biased_lock_pattern);
  2590   cmp(temp_reg, markOopDesc::biased_lock_pattern);
  2590   brx(Assembler::notEqual, false, Assembler::pn, cas_label);
  2591   brx(Assembler::notEqual, false, Assembler::pn, cas_label);
  2591 
  2592   delayed()->nop();
  2592   delayed()->ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg);
  2593 
       
  2594   load_klass(obj_reg, temp_reg);
  2593   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
  2595   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
  2594   or3(G2_thread, temp_reg, temp_reg);
  2596   or3(G2_thread, temp_reg, temp_reg);
  2595   xor3(mark_reg, temp_reg, temp_reg);
  2597   xor3(mark_reg, temp_reg, temp_reg);
  2596   andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
  2598   andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
  2597   if (counters != NULL) {
  2599   if (counters != NULL) {
  2666   // bias in the current epoch. In other words, we allow transfer of
  2668   // bias in the current epoch. In other words, we allow transfer of
  2667   // the bias from one thread to another directly in this situation.
  2669   // the bias from one thread to another directly in this situation.
  2668   //
  2670   //
  2669   // FIXME: due to a lack of registers we currently blow away the age
  2671   // FIXME: due to a lack of registers we currently blow away the age
  2670   // bits in this situation. Should attempt to preserve them.
  2672   // bits in this situation. Should attempt to preserve them.
  2671   ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg);
  2673   load_klass(obj_reg, temp_reg);
  2672   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
  2674   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
  2673   or3(G2_thread, temp_reg, temp_reg);
  2675   or3(G2_thread, temp_reg, temp_reg);
  2674   casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
  2676   casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
  2675                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
  2677                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
  2676   // If the biasing toward our thread failed, this means that
  2678   // If the biasing toward our thread failed, this means that
  2698   // bias of this particular object, so it's okay to continue in the
  2700   // bias of this particular object, so it's okay to continue in the
  2699   // normal locking code.
  2701   // normal locking code.
  2700   //
  2702   //
  2701   // FIXME: due to a lack of registers we currently blow away the age
  2703   // FIXME: due to a lack of registers we currently blow away the age
  2702   // bits in this situation. Should attempt to preserve them.
  2704   // bits in this situation. Should attempt to preserve them.
  2703   ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg);
  2705   load_klass(obj_reg, temp_reg);
  2704   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
  2706   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
  2705   casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
  2707   casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
  2706                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
  2708                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
  2707   // Fall through to the normal CAS-based lock, because no matter what
  2709   // Fall through to the normal CAS-based lock, because no matter what
  2708   // the result of the above CAS, some thread must have succeeded in
  2710   // the result of the above CAS, some thread must have succeeded in
  3404   set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
  3406   set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
  3405   st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
  3407   st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
  3406   // set klass to intArrayKlass
  3408   // set klass to intArrayKlass
  3407   set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
  3409   set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
  3408   ld_ptr(t2, 0, t2);
  3410   ld_ptr(t2, 0, t2);
  3409   st_ptr(t2, top, oopDesc::klass_offset_in_bytes());
  3411   store_klass(t2, top);
  3410   sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
  3412   sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
  3411   add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
  3413   add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
  3412   sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
  3414   sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
  3413   st(t1, top, arrayOopDesc::length_offset_in_bytes());
  3415   st(t1, top, arrayOopDesc::length_offset_in_bytes());
  3414   verify_oop(top);
  3416   verify_oop(top);
  3532   for (int i = 0; i< StackShadowPages-1; i++) {
  3534   for (int i = 0; i< StackShadowPages-1; i++) {
  3533     set((-i*offset)+STACK_BIAS, Rscratch);
  3535     set((-i*offset)+STACK_BIAS, Rscratch);
  3534     st(G0, Rtsp, Rscratch);
  3536     st(G0, Rtsp, Rscratch);
  3535   }
  3537   }
  3536 }
  3538 }
       
  3539 
       
  3540 void MacroAssembler::load_klass(Register s, Register d) {
       
  3541   // The number of bytes in this code is used by
       
  3542   // MachCallDynamicJavaNode::ret_addr_offset()
       
  3543   // if this changes, change that.
       
  3544   if (UseCompressedOops) {
       
  3545     lduw(s, oopDesc::klass_offset_in_bytes(), d);
       
  3546     decode_heap_oop_not_null(d);
       
  3547   } else {
       
  3548     ld_ptr(s, oopDesc::klass_offset_in_bytes(), d);
       
  3549   }
       
  3550 }
       
  3551 
       
  3552 // ??? figure out src vs. dst!
       
  3553 void MacroAssembler::store_klass(Register d, Register s1) {
       
  3554   if (UseCompressedOops) {
       
  3555     assert(s1 != d, "not enough registers");
       
  3556     encode_heap_oop_not_null(d);
       
  3557     // Zero out entire klass field first.
       
  3558     st_ptr(G0, s1, oopDesc::klass_offset_in_bytes());
       
  3559     st(d, s1, oopDesc::klass_offset_in_bytes());
       
  3560   } else {
       
  3561     st_ptr(d, s1, oopDesc::klass_offset_in_bytes());
       
  3562   }
       
  3563 }
       
  3564 
       
  3565 void MacroAssembler::load_heap_oop(const Address& s, Register d, int offset) {
       
  3566   if (UseCompressedOops) {
       
  3567     lduw(s, d, offset);
       
  3568     decode_heap_oop(d);
       
  3569   } else {
       
  3570     ld_ptr(s, d, offset);
       
  3571   }
       
  3572 }
       
  3573 
       
  3574 void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
       
  3575    if (UseCompressedOops) {
       
  3576     lduw(s1, s2, d);
       
  3577     decode_heap_oop(d, d);
       
  3578   } else {
       
  3579     ld_ptr(s1, s2, d);
       
  3580   }
       
  3581 }
       
  3582 
       
  3583 void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
       
  3584    if (UseCompressedOops) {
       
  3585     lduw(s1, simm13a, d);
       
  3586     decode_heap_oop(d, d);
       
  3587   } else {
       
  3588     ld_ptr(s1, simm13a, d);
       
  3589   }
       
  3590 }
       
  3591 
       
  3592 void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
       
  3593   if (UseCompressedOops) {
       
  3594     assert(s1 != d && s2 != d, "not enough registers");
       
  3595     encode_heap_oop(d);
       
  3596     st(d, s1, s2);
       
  3597   } else {
       
  3598     st_ptr(d, s1, s2);
       
  3599   }
       
  3600 }
       
  3601 
       
  3602 void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
       
  3603   if (UseCompressedOops) {
       
  3604     assert(s1 != d, "not enough registers");
       
  3605     encode_heap_oop(d);
       
  3606     st(d, s1, simm13a);
       
  3607   } else {
       
  3608     st_ptr(d, s1, simm13a);
       
  3609   }
       
  3610 }
       
  3611 
       
  3612 void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
       
  3613   if (UseCompressedOops) {
       
  3614     assert(a.base() != d, "not enough registers");
       
  3615     encode_heap_oop(d);
       
  3616     st(d, a, offset);
       
  3617   } else {
       
  3618     st_ptr(d, a, offset);
       
  3619   }
       
  3620 }
       
  3621 
       
  3622 
       
  3623 void MacroAssembler::encode_heap_oop(Register src, Register dst) {
       
  3624   assert (UseCompressedOops, "must be compressed");
       
  3625   Label done;
       
  3626   if (src == dst) {
       
  3627     // optimize for frequent case src == dst
       
  3628     bpr(rc_nz, true, Assembler::pt, src, done);
       
  3629     delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken
       
  3630     bind(done);
       
  3631     srlx(src, LogMinObjAlignmentInBytes, dst);
       
  3632   } else {
       
  3633     bpr(rc_z, false, Assembler::pn, src, done);
       
  3634     delayed() -> mov(G0, dst);
       
  3635     // could be moved before branch, and annulate delay,
       
  3636     // but may add some unneeded work decoding null
       
  3637     sub(src, G6_heapbase, dst);
       
  3638     srlx(dst, LogMinObjAlignmentInBytes, dst);
       
  3639     bind(done);
       
  3640   }
       
  3641 }
       
  3642 
       
  3643 
       
  3644 void MacroAssembler::encode_heap_oop_not_null(Register r) {
       
  3645   assert (UseCompressedOops, "must be compressed");
       
  3646   sub(r, G6_heapbase, r);
       
  3647   srlx(r, LogMinObjAlignmentInBytes, r);
       
  3648 }
       
  3649 
       
  3650 // Same algorithm as oops.inline.hpp decode_heap_oop.
       
  3651 void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
       
  3652   assert (UseCompressedOops, "must be compressed");
       
  3653   Label done;
       
  3654   sllx(src, LogMinObjAlignmentInBytes, dst);
       
  3655   bpr(rc_nz, true, Assembler::pt, dst, done);
       
  3656   delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
       
  3657   bind(done);
       
  3658 }
       
  3659 
       
  3660 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
       
  3661   // Do not add assert code to this unless you change vtableStubs_sparc.cpp
       
  3662   // pd_code_size_limit.
       
  3663   assert (UseCompressedOops, "must be compressed");
       
  3664   sllx(r, LogMinObjAlignmentInBytes, r);
       
  3665   add(r, G6_heapbase, r);
       
  3666 }
       
  3667 
       
  3668 void MacroAssembler::reinit_heapbase() {
       
  3669   if (UseCompressedOops) {
       
  3670     // call indirectly to solve generation ordering problem
       
  3671     Address base(G6_heapbase, (address)Universe::heap_base_addr());
       
  3672     load_ptr_contents(base, G6_heapbase);
       
  3673   }
       
  3674 }