src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
changeset 57716 bfcdcd00e4fb
parent 57542 db740ced41c4
child 57748 9bddbd69351c
equal deleted inserted replaced
57715:6390e7056d12 57716:bfcdcd00e4fb
   750     __ bind(res_non_zero);
   750     __ bind(res_non_zero);
   751 #endif
   751 #endif
   752   }
   752   }
   753 }
   753 }
   754 
   754 
   755 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) {
       
   756   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
       
   757   if (UseAVX > 2) {
       
   758     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
       
   759   }
       
   760 
       
   761   if (UseSSE == 1)  {
       
   762     __ subptr(rsp, sizeof(jdouble)*8);
       
   763     for (int n = 0; n < 8; n++) {
       
   764       __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
       
   765     }
       
   766   } else if (UseSSE >= 2)  {
       
   767     if (UseAVX > 2) {
       
   768       __ push(rbx);
       
   769       __ movl(rbx, 0xffff);
       
   770       __ kmovwl(k1, rbx);
       
   771       __ pop(rbx);
       
   772     }
       
   773 #ifdef COMPILER2
       
   774     if (MaxVectorSize > 16) {
       
   775       if(UseAVX > 2) {
       
   776         // Save upper half of ZMM registers
       
   777         __ subptr(rsp, 32*num_xmm_regs);
       
   778         for (int n = 0; n < num_xmm_regs; n++) {
       
   779           __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
       
   780         }
       
   781       }
       
   782       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
       
   783       // Save upper half of YMM registers
       
   784       __ subptr(rsp, 16*num_xmm_regs);
       
   785       for (int n = 0; n < num_xmm_regs; n++) {
       
   786         __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
       
   787       }
       
   788     }
       
   789 #endif
       
   790     // Save whole 128bit (16 bytes) XMM registers
       
   791     __ subptr(rsp, 16*num_xmm_regs);
       
   792 #ifdef _LP64
       
   793     if (VM_Version::supports_evex()) {
       
   794       for (int n = 0; n < num_xmm_regs; n++) {
       
   795         __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
       
   796       }
       
   797     } else {
       
   798       for (int n = 0; n < num_xmm_regs; n++) {
       
   799         __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
       
   800       }
       
   801     }
       
   802 #else
       
   803     for (int n = 0; n < num_xmm_regs; n++) {
       
   804       __ movdqu(Address(rsp, n*16), as_XMMRegister(n));
       
   805     }
       
   806 #endif
       
   807   }
       
   808 }
       
   809 
       
   810 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) {
       
   811   int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
       
   812   if (UseAVX > 2) {
       
   813     num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
       
   814   }
       
   815   if (UseSSE == 1)  {
       
   816     for (int n = 0; n < 8; n++) {
       
   817       __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
       
   818     }
       
   819     __ addptr(rsp, sizeof(jdouble)*8);
       
   820   } else if (UseSSE >= 2)  {
       
   821     // Restore whole 128bit (16 bytes) XMM registers
       
   822 #ifdef _LP64
       
   823     if (VM_Version::supports_evex()) {
       
   824       for (int n = 0; n < num_xmm_regs; n++) {
       
   825         __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
       
   826       }
       
   827     } else {
       
   828       for (int n = 0; n < num_xmm_regs; n++) {
       
   829         __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
       
   830       }
       
   831     }
       
   832 #else
       
   833     for (int n = 0; n < num_xmm_regs; n++) {
       
   834       __ movdqu(as_XMMRegister(n), Address(rsp, n*16));
       
   835     }
       
   836 #endif
       
   837     __ addptr(rsp, 16*num_xmm_regs);
       
   838 
       
   839 #ifdef COMPILER2
       
   840     if (MaxVectorSize > 16) {
       
   841       // Restore upper half of YMM registers.
       
   842       for (int n = 0; n < num_xmm_regs; n++) {
       
   843         __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
       
   844       }
       
   845       __ addptr(rsp, 16*num_xmm_regs);
       
   846       if (UseAVX > 2) {
       
   847         for (int n = 0; n < num_xmm_regs; n++) {
       
   848           __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
       
   849         }
       
   850         __ addptr(rsp, 32*num_xmm_regs);
       
   851       }
       
   852     }
       
   853 #endif
       
   854   }
       
   855 }
       
   856 
       
   857 #undef __
   755 #undef __
   858 
   756 
   859 #ifdef COMPILER1
   757 #ifdef COMPILER1
   860 
   758 
   861 #define __ ce->masm()->
   759 #define __ ce->masm()->
   883   __ jmp(*stub->continuation());
   781   __ jmp(*stub->continuation());
   884 
   782 
   885 }
   783 }
   886 
   784 
   887 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
   785 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {
       
   786   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
   888   __ bind(*stub->entry());
   787   __ bind(*stub->entry());
   889 
   788 
   890   Label done;
       
   891   Register obj = stub->obj()->as_register();
   789   Register obj = stub->obj()->as_register();
   892   Register res = stub->result()->as_register();
   790   Register res = stub->result()->as_register();
       
   791   Register tmp1 = stub->tmp1()->as_register();
       
   792   Register tmp2 = stub->tmp2()->as_register();
       
   793 
       
   794   Label slow_path;
       
   795 
       
   796   assert(res == rax, "result must arrive in rax");
   893 
   797 
   894   if (res != obj) {
   798   if (res != obj) {
   895     __ mov(res, obj);
   799     __ mov(res, obj);
   896   }
   800   }
   897 
   801 
   898   // Check for null.
   802   // Check for null.
   899   __ testptr(res, res);
   803   __ testptr(res, res);
   900   __ jcc(Assembler::zero, done);
   804   __ jcc(Assembler::zero, *stub->continuation());
   901 
   805 
   902   load_reference_barrier_not_null(ce->masm(), res);
   806   // Check for object being in the collection set.
   903 
   807   __ mov(tmp1, res);
   904   __ bind(done);
   808   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
       
   809   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
       
   810   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
       
   811   __ testbool(tmp2);
       
   812   __ jcc(Assembler::zero, *stub->continuation());
       
   813 
       
   814   // Test if object is resolved.
       
   815   __ movptr(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
       
   816   // Test if both lowest bits are set. We trick it by negating the bits
       
   817   // then test for both bits clear.
       
   818   __ notptr(tmp1);
       
   819   __ testb(tmp1, markOopDesc::marked_value);
       
   820   __ jccb(Assembler::notZero, slow_path);
       
   821   // Clear both lower bits. It's still inverted, so set them, and then invert back.
       
   822   __ orptr(tmp1, markOopDesc::marked_value);
       
   823   __ notptr(tmp1);
       
   824   // At this point, tmp1 contains the decoded forwarding pointer.
       
   825   __ mov(res, tmp1);
       
   826 
       
   827   __ jmp(*stub->continuation());
       
   828 
       
   829   __ bind(slow_path);
       
   830   ce->store_parameter(res, 0);
       
   831   __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
       
   832 
   905   __ jmp(*stub->continuation());
   833   __ jmp(*stub->continuation());
   906 }
   834 }
   907 
   835 
   908 #undef __
   836 #undef __
   909 
   837 
   963   __ pop(rax);
   891   __ pop(rax);
   964 
   892 
   965   __ epilogue();
   893   __ epilogue();
   966 }
   894 }
   967 
   895 
       
   896 void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
       
   897   __ prologue("shenandoah_load_reference_barrier", false);
       
   898   // arg0 : object to be resolved
       
   899 
       
   900   __ save_live_registers_no_oop_map(true);
       
   901   __ load_parameter(0, c_rarg0);
       
   902   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0);
       
   903   __ restore_live_registers_except_rax(true);
       
   904 
       
   905   __ epilogue();
       
   906 }
       
   907 
   968 #undef __
   908 #undef __
   969 
   909 
   970 #endif // COMPILER1
   910 #endif // COMPILER1
   971 
   911 
   972 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
   912 address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
   979 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
   919 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
   980   __ align(CodeEntryAlignment);
   920   __ align(CodeEntryAlignment);
   981   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
   921   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
   982   address start = __ pc();
   922   address start = __ pc();
   983 
   923 
   984   Label resolve_oop, slow_path;
   924   Label resolve_oop, slow_path, done;
   985 
   925 
   986   // We use RDI, which also serves as argument register for slow call.
   926   // We use RDI, which also serves as argument register for slow call.
   987   // RAX always holds the src object ptr, except after the slow call and
   927   // RAX always holds the src object ptr, except after the slow call,
   988   // the cmpxchg, then it holds the result. R8/RBX is used as temporary register.
   928   // then it holds the result. R8/RBX is used as temporary register.
   989 
   929 
   990   Register tmp1 = rdi;
   930   Register tmp1 = rdi;
   991   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
   931   Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
   992 
   932 
   993   __ push(tmp1);
   933   __ push(tmp1);
   994   __ push(tmp2);
   934   __ push(tmp2);
   995 
   935 
   996   // Check for object being in the collection set.
   936   // Check for object being in the collection set.
   997   // TODO: Can we use only 1 register here?
       
   998   // The source object arrives here in rax.
       
   999   // live: rax
       
  1000   // live: tmp1
       
  1001   __ mov(tmp1, rax);
   937   __ mov(tmp1, rax);
  1002   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
   938   __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
  1003   // live: tmp2
       
  1004   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
   939   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
  1005   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
   940   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
  1006   // unlive: tmp1
       
  1007   __ testbool(tmp2);
   941   __ testbool(tmp2);
  1008   // unlive: tmp2
       
  1009   __ jccb(Assembler::notZero, resolve_oop);
   942   __ jccb(Assembler::notZero, resolve_oop);
  1010 
       
  1011   __ pop(tmp2);
   943   __ pop(tmp2);
  1012   __ pop(tmp1);
   944   __ pop(tmp1);
  1013   __ ret(0);
   945   __ ret(0);
  1014 
   946 
       
   947   // Test if object is already resolved.
  1015   __ bind(resolve_oop);
   948   __ bind(resolve_oop);
  1016 
       
  1017   __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes()));
   949   __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes()));
  1018   // Test if both lowest bits are set. We trick it by negating the bits
   950   // Test if both lowest bits are set. We trick it by negating the bits
  1019   // then test for both bits clear.
   951   // then test for both bits clear.
  1020   __ notptr(tmp2);
   952   __ notptr(tmp2);
  1021   __ testb(tmp2, markOopDesc::marked_value);
   953   __ testb(tmp2, markOopDesc::marked_value);
  1024   __ orptr(tmp2, markOopDesc::marked_value);
   956   __ orptr(tmp2, markOopDesc::marked_value);
  1025   __ notptr(tmp2);
   957   __ notptr(tmp2);
  1026   // At this point, tmp2 contains the decoded forwarding pointer.
   958   // At this point, tmp2 contains the decoded forwarding pointer.
  1027   __ mov(rax, tmp2);
   959   __ mov(rax, tmp2);
  1028 
   960 
       
   961   __ bind(done);
  1029   __ pop(tmp2);
   962   __ pop(tmp2);
  1030   __ pop(tmp1);
   963   __ pop(tmp1);
  1031   __ ret(0);
   964   __ ret(0);
  1032 
   965 
  1033   __ bind(slow_path);
   966   __ bind(slow_path);
  1045   __ push(r13);
   978   __ push(r13);
  1046   __ push(r14);
   979   __ push(r14);
  1047   __ push(r15);
   980   __ push(r15);
  1048 #endif
   981 #endif
  1049 
   982 
  1050   save_vector_registers(cgen->assembler());
   983   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);
  1051   __ movptr(rdi, rax);
       
  1052   __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rdi);
       
  1053   restore_vector_registers(cgen->assembler());
       
  1054 
   984 
  1055 #ifdef _LP64
   985 #ifdef _LP64
  1056   __ pop(r15);
   986   __ pop(r15);
  1057   __ pop(r14);
   987   __ pop(r14);
  1058   __ pop(r13);
   988   __ pop(r13);