750 __ bind(res_non_zero); |
750 __ bind(res_non_zero); |
751 #endif |
751 #endif |
752 } |
752 } |
753 } |
753 } |
754 |
754 |
755 void ShenandoahBarrierSetAssembler::save_vector_registers(MacroAssembler* masm) { |
|
756 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); |
|
757 if (UseAVX > 2) { |
|
758 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); |
|
759 } |
|
760 |
|
761 if (UseSSE == 1) { |
|
762 __ subptr(rsp, sizeof(jdouble)*8); |
|
763 for (int n = 0; n < 8; n++) { |
|
764 __ movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n)); |
|
765 } |
|
766 } else if (UseSSE >= 2) { |
|
767 if (UseAVX > 2) { |
|
768 __ push(rbx); |
|
769 __ movl(rbx, 0xffff); |
|
770 __ kmovwl(k1, rbx); |
|
771 __ pop(rbx); |
|
772 } |
|
773 #ifdef COMPILER2 |
|
774 if (MaxVectorSize > 16) { |
|
775 if(UseAVX > 2) { |
|
776 // Save upper half of ZMM registers |
|
777 __ subptr(rsp, 32*num_xmm_regs); |
|
778 for (int n = 0; n < num_xmm_regs; n++) { |
|
779 __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); |
|
780 } |
|
781 } |
|
782 assert(UseAVX > 0, "256 bit vectors are supported only with AVX"); |
|
783 // Save upper half of YMM registers |
|
784 __ subptr(rsp, 16*num_xmm_regs); |
|
785 for (int n = 0; n < num_xmm_regs; n++) { |
|
786 __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); |
|
787 } |
|
788 } |
|
789 #endif |
|
790 // Save whole 128bit (16 bytes) XMM registers |
|
791 __ subptr(rsp, 16*num_xmm_regs); |
|
792 #ifdef _LP64 |
|
793 if (VM_Version::supports_evex()) { |
|
794 for (int n = 0; n < num_xmm_regs; n++) { |
|
795 __ vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0); |
|
796 } |
|
797 } else { |
|
798 for (int n = 0; n < num_xmm_regs; n++) { |
|
799 __ movdqu(Address(rsp, n*16), as_XMMRegister(n)); |
|
800 } |
|
801 } |
|
802 #else |
|
803 for (int n = 0; n < num_xmm_regs; n++) { |
|
804 __ movdqu(Address(rsp, n*16), as_XMMRegister(n)); |
|
805 } |
|
806 #endif |
|
807 } |
|
808 } |
|
809 |
|
810 void ShenandoahBarrierSetAssembler::restore_vector_registers(MacroAssembler* masm) { |
|
811 int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8); |
|
812 if (UseAVX > 2) { |
|
813 num_xmm_regs = LP64_ONLY(32) NOT_LP64(8); |
|
814 } |
|
815 if (UseSSE == 1) { |
|
816 for (int n = 0; n < 8; n++) { |
|
817 __ movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble))); |
|
818 } |
|
819 __ addptr(rsp, sizeof(jdouble)*8); |
|
820 } else if (UseSSE >= 2) { |
|
821 // Restore whole 128bit (16 bytes) XMM registers |
|
822 #ifdef _LP64 |
|
823 if (VM_Version::supports_evex()) { |
|
824 for (int n = 0; n < num_xmm_regs; n++) { |
|
825 __ vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0); |
|
826 } |
|
827 } else { |
|
828 for (int n = 0; n < num_xmm_regs; n++) { |
|
829 __ movdqu(as_XMMRegister(n), Address(rsp, n*16)); |
|
830 } |
|
831 } |
|
832 #else |
|
833 for (int n = 0; n < num_xmm_regs; n++) { |
|
834 __ movdqu(as_XMMRegister(n), Address(rsp, n*16)); |
|
835 } |
|
836 #endif |
|
837 __ addptr(rsp, 16*num_xmm_regs); |
|
838 |
|
839 #ifdef COMPILER2 |
|
840 if (MaxVectorSize > 16) { |
|
841 // Restore upper half of YMM registers. |
|
842 for (int n = 0; n < num_xmm_regs; n++) { |
|
843 __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16)); |
|
844 } |
|
845 __ addptr(rsp, 16*num_xmm_regs); |
|
846 if (UseAVX > 2) { |
|
847 for (int n = 0; n < num_xmm_regs; n++) { |
|
848 __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32)); |
|
849 } |
|
850 __ addptr(rsp, 32*num_xmm_regs); |
|
851 } |
|
852 } |
|
853 #endif |
|
854 } |
|
855 } |
|
856 |
|
857 #undef __ |
755 #undef __ |
858 |
756 |
859 #ifdef COMPILER1 |
757 #ifdef COMPILER1 |
860 |
758 |
861 #define __ ce->masm()-> |
759 #define __ ce->masm()-> |
883 __ jmp(*stub->continuation()); |
781 __ jmp(*stub->continuation()); |
884 |
782 |
885 } |
783 } |
886 |
784 |
887 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { |
785 void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { |
|
786 ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); |
888 __ bind(*stub->entry()); |
787 __ bind(*stub->entry()); |
889 |
788 |
890 Label done; |
|
891 Register obj = stub->obj()->as_register(); |
789 Register obj = stub->obj()->as_register(); |
892 Register res = stub->result()->as_register(); |
790 Register res = stub->result()->as_register(); |
|
791 Register tmp1 = stub->tmp1()->as_register(); |
|
792 Register tmp2 = stub->tmp2()->as_register(); |
|
793 |
|
794 Label slow_path; |
|
795 |
|
796 assert(res == rax, "result must arrive in rax"); |
893 |
797 |
894 if (res != obj) { |
798 if (res != obj) { |
895 __ mov(res, obj); |
799 __ mov(res, obj); |
896 } |
800 } |
897 |
801 |
898 // Check for null. |
802 // Check for null. |
899 __ testptr(res, res); |
803 __ testptr(res, res); |
900 __ jcc(Assembler::zero, done); |
804 __ jcc(Assembler::zero, *stub->continuation()); |
901 |
805 |
902 load_reference_barrier_not_null(ce->masm(), res); |
806 // Check for object being in the collection set. |
903 |
807 __ mov(tmp1, res); |
904 __ bind(done); |
808 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); |
|
809 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); |
|
810 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); |
|
811 __ testbool(tmp2); |
|
812 __ jcc(Assembler::zero, *stub->continuation()); |
|
813 |
|
814 // Test if object is resolved. |
|
815 __ movptr(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); |
|
816 // Test if both lowest bits are set. We trick it by negating the bits |
|
817 // then test for both bits clear. |
|
818 __ notptr(tmp1); |
|
819 __ testb(tmp1, markOopDesc::marked_value); |
|
820 __ jccb(Assembler::notZero, slow_path); |
|
821 // Clear both lower bits. It's still inverted, so set them, and then invert back. |
|
822 __ orptr(tmp1, markOopDesc::marked_value); |
|
823 __ notptr(tmp1); |
|
824 // At this point, tmp1 contains the decoded forwarding pointer. |
|
825 __ mov(res, tmp1); |
|
826 |
|
827 __ jmp(*stub->continuation()); |
|
828 |
|
829 __ bind(slow_path); |
|
830 ce->store_parameter(res, 0); |
|
831 __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); |
|
832 |
905 __ jmp(*stub->continuation()); |
833 __ jmp(*stub->continuation()); |
906 } |
834 } |
907 |
835 |
908 #undef __ |
836 #undef __ |
909 |
837 |
979 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { |
919 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { |
980 __ align(CodeEntryAlignment); |
920 __ align(CodeEntryAlignment); |
981 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); |
921 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); |
982 address start = __ pc(); |
922 address start = __ pc(); |
983 |
923 |
984 Label resolve_oop, slow_path; |
924 Label resolve_oop, slow_path, done; |
985 |
925 |
986 // We use RDI, which also serves as argument register for slow call. |
926 // We use RDI, which also serves as argument register for slow call. |
987 // RAX always holds the src object ptr, except after the slow call and |
927 // RAX always holds the src object ptr, except after the slow call, |
988 // the cmpxchg, then it holds the result. R8/RBX is used as temporary register. |
928 // then it holds the result. R8/RBX is used as temporary register. |
989 |
929 |
990 Register tmp1 = rdi; |
930 Register tmp1 = rdi; |
991 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); |
931 Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); |
992 |
932 |
993 __ push(tmp1); |
933 __ push(tmp1); |
994 __ push(tmp2); |
934 __ push(tmp2); |
995 |
935 |
996 // Check for object being in the collection set. |
936 // Check for object being in the collection set. |
997 // TODO: Can we use only 1 register here? |
|
998 // The source object arrives here in rax. |
|
999 // live: rax |
|
1000 // live: tmp1 |
|
1001 __ mov(tmp1, rax); |
937 __ mov(tmp1, rax); |
1002 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); |
938 __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); |
1003 // live: tmp2 |
|
1004 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); |
939 __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); |
1005 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); |
940 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); |
1006 // unlive: tmp1 |
|
1007 __ testbool(tmp2); |
941 __ testbool(tmp2); |
1008 // unlive: tmp2 |
|
1009 __ jccb(Assembler::notZero, resolve_oop); |
942 __ jccb(Assembler::notZero, resolve_oop); |
1010 |
|
1011 __ pop(tmp2); |
943 __ pop(tmp2); |
1012 __ pop(tmp1); |
944 __ pop(tmp1); |
1013 __ ret(0); |
945 __ ret(0); |
1014 |
946 |
|
947 // Test if object is already resolved. |
1015 __ bind(resolve_oop); |
948 __ bind(resolve_oop); |
1016 |
|
1017 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); |
949 __ movptr(tmp2, Address(rax, oopDesc::mark_offset_in_bytes())); |
1018 // Test if both lowest bits are set. We trick it by negating the bits |
950 // Test if both lowest bits are set. We trick it by negating the bits |
1019 // then test for both bits clear. |
951 // then test for both bits clear. |
1020 __ notptr(tmp2); |
952 __ notptr(tmp2); |
1021 __ testb(tmp2, markOopDesc::marked_value); |
953 __ testb(tmp2, markOopDesc::marked_value); |