578 int rex_prefix_and_encode(int dst_enc, int src_enc, |
623 int rex_prefix_and_encode(int dst_enc, int src_enc, |
579 VexSimdPrefix pre, VexOpcode opc, bool rex_w); |
624 VexSimdPrefix pre, VexOpcode opc, bool rex_w); |
580 |
625 |
581 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, |
626 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, |
582 int nds_enc, VexSimdPrefix pre, VexOpcode opc, |
627 int nds_enc, VexSimdPrefix pre, VexOpcode opc, |
583 bool vector256); |
628 int vector_len); |
|
629 |
|
630 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, |
|
631 int nds_enc, VexSimdPrefix pre, VexOpcode opc, |
|
632 bool is_extended_context, bool is_merge_context, |
|
633 int vector_len, bool no_mask_reg ); |
584 |
634 |
585 void vex_prefix(Address adr, int nds_enc, int xreg_enc, |
635 void vex_prefix(Address adr, int nds_enc, int xreg_enc, |
586 VexSimdPrefix pre, VexOpcode opc, |
636 VexSimdPrefix pre, VexOpcode opc, |
587 bool vex_w, bool vector256); |
637 bool vex_w, int vector_len, |
|
638 bool legacy_mode = false, bool no_mask_reg = false); |
588 |
639 |
589 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, |
640 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, |
590 VexSimdPrefix pre, bool vector256 = false) { |
641 VexSimdPrefix pre, int vector_len = AVX_128bit, |
|
642 bool no_mask_reg = false, bool legacy_mode = false) { |
591 int dst_enc = dst->encoding(); |
643 int dst_enc = dst->encoding(); |
592 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
644 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
593 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); |
645 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg); |
594 } |
646 } |
595 |
647 |
596 void vex_prefix_0F38(Register dst, Register nds, Address src) { |
648 void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src, |
|
649 VexSimdPrefix pre, int vector_len = AVX_128bit, |
|
650 bool no_mask_reg = false) { |
|
651 int dst_enc = dst->encoding(); |
|
652 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
|
653 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); |
|
654 } |
|
655 |
|
656 void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) { |
597 bool vex_w = false; |
657 bool vex_w = false; |
598 bool vector256 = false; |
658 int vector_len = AVX_128bit; |
599 vex_prefix(src, nds->encoding(), dst->encoding(), |
659 vex_prefix(src, nds->encoding(), dst->encoding(), |
600 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); |
660 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, |
601 } |
661 vector_len, no_mask_reg); |
602 |
662 } |
603 void vex_prefix_0F38_q(Register dst, Register nds, Address src) { |
663 |
|
664 void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) { |
604 bool vex_w = true; |
665 bool vex_w = true; |
605 bool vector256 = false; |
666 int vector_len = AVX_128bit; |
606 vex_prefix(src, nds->encoding(), dst->encoding(), |
667 vex_prefix(src, nds->encoding(), dst->encoding(), |
607 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); |
668 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, |
|
669 vector_len, no_mask_reg); |
608 } |
670 } |
609 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, |
671 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, |
610 VexSimdPrefix pre, VexOpcode opc, |
672 VexSimdPrefix pre, VexOpcode opc, |
611 bool vex_w, bool vector256); |
673 bool vex_w, int vector_len, |
612 |
674 bool legacy_mode, bool no_mask_reg); |
613 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) { |
675 |
|
676 int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) { |
614 bool vex_w = false; |
677 bool vex_w = false; |
615 bool vector256 = false; |
678 int vector_len = AVX_128bit; |
616 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), |
679 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), |
617 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); |
680 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, |
618 } |
681 false, no_mask_reg); |
619 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) { |
682 } |
|
683 int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) { |
620 bool vex_w = true; |
684 bool vex_w = true; |
621 bool vector256 = false; |
685 int vector_len = AVX_128bit; |
622 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), |
686 return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), |
623 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); |
687 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, |
|
688 false, no_mask_reg); |
624 } |
689 } |
625 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, |
690 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, |
626 VexSimdPrefix pre, bool vector256 = false, |
691 VexSimdPrefix pre, int vector_len = AVX_128bit, |
627 VexOpcode opc = VEX_OPCODE_0F) { |
692 VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false, |
|
693 bool no_mask_reg = false) { |
628 int src_enc = src->encoding(); |
694 int src_enc = src->encoding(); |
629 int dst_enc = dst->encoding(); |
695 int dst_enc = dst->encoding(); |
630 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
696 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
631 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256); |
697 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg); |
632 } |
698 } |
633 |
699 |
634 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, |
700 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, |
635 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, |
701 VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F, |
636 bool rex_w = false, bool vector256 = false); |
702 bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false); |
637 |
703 |
638 void simd_prefix(XMMRegister dst, Address src, |
704 void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre, |
639 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { |
705 bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) { |
640 simd_prefix(dst, xnoreg, src, pre, opc); |
706 simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc); |
641 } |
707 } |
642 |
708 |
643 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { |
709 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { |
644 simd_prefix(src, dst, pre); |
710 simd_prefix(src, dst, pre, no_mask_reg); |
645 } |
711 } |
646 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, |
712 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, |
647 VexSimdPrefix pre) { |
713 VexSimdPrefix pre, bool no_mask_reg = false) { |
648 bool rex_w = true; |
714 bool rex_w = true; |
649 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); |
715 simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w); |
650 } |
716 } |
651 |
717 |
652 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, |
718 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, |
653 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, |
719 VexSimdPrefix pre, bool no_mask_reg, |
654 bool rex_w = false, bool vector256 = false); |
720 VexOpcode opc = VEX_OPCODE_0F, |
|
721 bool rex_w = false, int vector_len = AVX_128bit, |
|
722 bool legacy_mode = false); |
|
723 |
|
724 int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, |
|
725 VexSimdPrefix pre, bool no_mask_reg, |
|
726 VexOpcode opc = VEX_OPCODE_0F, |
|
727 bool rex_w = false, int vector_len = AVX_128bit); |
|
728 |
|
729 int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, |
|
730 VexSimdPrefix pre, bool no_mask_reg, |
|
731 VexOpcode opc = VEX_OPCODE_0F, |
|
732 bool rex_w = false, int vector_len = AVX_128bit); |
655 |
733 |
656 // Move/convert 32-bit integer value. |
734 // Move/convert 32-bit integer value. |
657 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, |
735 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, |
658 VexSimdPrefix pre) { |
736 VexSimdPrefix pre, bool no_mask_reg) { |
659 // It is OK to cast from Register to XMMRegister to pass argument here |
737 // It is OK to cast from Register to XMMRegister to pass argument here |
660 // since only encoding is used in simd_prefix_and_encode() and number of |
738 // since only encoding is used in simd_prefix_and_encode() and number of |
661 // Gen and Xmm registers are the same. |
739 // Gen and Xmm registers are the same. |
662 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); |
740 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F); |
663 } |
741 } |
664 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { |
742 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) { |
665 return simd_prefix_and_encode(dst, xnoreg, src, pre); |
743 return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg); |
666 } |
744 } |
667 int simd_prefix_and_encode(Register dst, XMMRegister src, |
745 int simd_prefix_and_encode(Register dst, XMMRegister src, |
668 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { |
746 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, |
669 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); |
747 bool no_mask_reg = false) { |
|
748 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc); |
670 } |
749 } |
671 |
750 |
672 // Move/convert 64-bit integer value. |
751 // Move/convert 64-bit integer value. |
673 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, |
752 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, |
674 VexSimdPrefix pre) { |
753 VexSimdPrefix pre, bool no_mask_reg = false) { |
675 bool rex_w = true; |
754 bool rex_w = true; |
676 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); |
755 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w); |
677 } |
756 } |
678 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { |
757 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) { |
679 return simd_prefix_and_encode_q(dst, xnoreg, src, pre); |
758 return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg); |
680 } |
759 } |
681 int simd_prefix_and_encode_q(Register dst, XMMRegister src, |
760 int simd_prefix_and_encode_q(Register dst, XMMRegister src, |
682 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { |
761 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, |
|
762 bool no_mask_reg = false) { |
683 bool rex_w = true; |
763 bool rex_w = true; |
684 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); |
764 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w); |
685 } |
765 } |
686 |
766 |
687 // Helper functions for groups of instructions |
767 // Helper functions for groups of instructions |
688 void emit_arith_b(int op1, int op2, Register dst, int imm8); |
768 void emit_arith_b(int op1, int op2, Register dst, int imm8); |
689 |
769 |
690 void emit_arith(int op1, int op2, Register dst, int32_t imm32); |
770 void emit_arith(int op1, int op2, Register dst, int32_t imm32); |
691 // Force generation of a 4 byte immediate value even if it fits into 8bit |
771 // Force generation of a 4 byte immediate value even if it fits into 8bit |
692 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); |
772 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); |
693 void emit_arith(int op1, int op2, Register dst, Register src); |
773 void emit_arith(int op1, int op2, Register dst, Register src); |
694 |
774 |
695 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); |
775 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); |
696 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); |
776 void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); |
697 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); |
777 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); |
698 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); |
778 void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false); |
|
779 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); |
|
780 void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false); |
|
781 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false); |
|
782 void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false); |
699 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, |
783 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, |
700 Address src, VexSimdPrefix pre, bool vector256); |
784 Address src, VexSimdPrefix pre, int vector_len, |
|
785 bool no_mask_reg = false, bool legacy_mode = false); |
|
786 void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, |
|
787 Address src, VexSimdPrefix pre, int vector_len, |
|
788 bool no_mask_reg = false); |
701 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, |
789 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, |
702 XMMRegister src, VexSimdPrefix pre, bool vector256); |
790 XMMRegister src, VexSimdPrefix pre, int vector_len, |
|
791 bool no_mask_reg = false, bool legacy_mode = false); |
|
792 void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, |
|
793 XMMRegister src, VexSimdPrefix pre, int vector_len, |
|
794 bool no_mask_reg = false); |
|
795 |
|
796 bool emit_compressed_disp_byte(int &disp); |
703 |
797 |
704 void emit_operand(Register reg, |
798 void emit_operand(Register reg, |
705 Register base, Register index, Address::ScaleFactor scale, |
799 Register base, Register index, Address::ScaleFactor scale, |
706 int disp, |
800 int disp, |
707 RelocationHolder const& rspec, |
801 RelocationHolder const& rspec, |
1732 //====================VECTOR ARITHMETIC===================================== |
1849 //====================VECTOR ARITHMETIC===================================== |
1733 |
1850 |
1734 // Add Packed Floating-Point Values |
1851 // Add Packed Floating-Point Values |
1735 void addpd(XMMRegister dst, XMMRegister src); |
1852 void addpd(XMMRegister dst, XMMRegister src); |
1736 void addps(XMMRegister dst, XMMRegister src); |
1853 void addps(XMMRegister dst, XMMRegister src); |
1737 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1854 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1738 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1855 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1739 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1856 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1740 void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1857 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1741 |
1858 |
1742 // Subtract Packed Floating-Point Values |
1859 // Subtract Packed Floating-Point Values |
1743 void subpd(XMMRegister dst, XMMRegister src); |
1860 void subpd(XMMRegister dst, XMMRegister src); |
1744 void subps(XMMRegister dst, XMMRegister src); |
1861 void subps(XMMRegister dst, XMMRegister src); |
1745 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1862 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1746 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1863 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1747 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1864 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1748 void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1865 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1749 |
1866 |
1750 // Multiply Packed Floating-Point Values |
1867 // Multiply Packed Floating-Point Values |
1751 void mulpd(XMMRegister dst, XMMRegister src); |
1868 void mulpd(XMMRegister dst, XMMRegister src); |
1752 void mulps(XMMRegister dst, XMMRegister src); |
1869 void mulps(XMMRegister dst, XMMRegister src); |
1753 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1870 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1754 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1871 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1755 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1872 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1756 void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1873 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1757 |
1874 |
1758 // Divide Packed Floating-Point Values |
1875 // Divide Packed Floating-Point Values |
1759 void divpd(XMMRegister dst, XMMRegister src); |
1876 void divpd(XMMRegister dst, XMMRegister src); |
1760 void divps(XMMRegister dst, XMMRegister src); |
1877 void divps(XMMRegister dst, XMMRegister src); |
1761 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1878 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1762 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1879 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1763 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1880 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1764 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1881 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1765 |
1882 |
1766 // Bitwise Logical AND of Packed Floating-Point Values |
1883 // Bitwise Logical AND of Packed Floating-Point Values |
1767 void andpd(XMMRegister dst, XMMRegister src); |
1884 void andpd(XMMRegister dst, XMMRegister src); |
1768 void andps(XMMRegister dst, XMMRegister src); |
1885 void andps(XMMRegister dst, XMMRegister src); |
1769 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1886 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1770 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1887 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1771 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1888 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1772 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1889 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1773 |
1890 |
1774 // Bitwise Logical XOR of Packed Floating-Point Values |
1891 // Bitwise Logical XOR of Packed Floating-Point Values |
1775 void xorpd(XMMRegister dst, XMMRegister src); |
1892 void xorpd(XMMRegister dst, XMMRegister src); |
1776 void xorps(XMMRegister dst, XMMRegister src); |
1893 void xorps(XMMRegister dst, XMMRegister src); |
1777 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1894 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1778 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1895 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1779 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1896 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1780 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1897 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1781 |
1898 |
1782 // Add horizontal packed integers |
1899 // Add horizontal packed integers |
1783 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1900 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1784 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1901 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1785 void phaddw(XMMRegister dst, XMMRegister src); |
1902 void phaddw(XMMRegister dst, XMMRegister src); |
1786 void phaddd(XMMRegister dst, XMMRegister src); |
1903 void phaddd(XMMRegister dst, XMMRegister src); |
1787 |
1904 |
1788 // Add packed integers |
1905 // Add packed integers |
1789 void paddb(XMMRegister dst, XMMRegister src); |
1906 void paddb(XMMRegister dst, XMMRegister src); |
1790 void paddw(XMMRegister dst, XMMRegister src); |
1907 void paddw(XMMRegister dst, XMMRegister src); |
1791 void paddd(XMMRegister dst, XMMRegister src); |
1908 void paddd(XMMRegister dst, XMMRegister src); |
1792 void paddq(XMMRegister dst, XMMRegister src); |
1909 void paddq(XMMRegister dst, XMMRegister src); |
1793 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1910 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1794 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1911 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1795 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1912 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1796 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1913 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1797 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1914 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1798 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1915 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1799 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1916 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1800 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1917 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1801 |
1918 |
1802 // Sub packed integers |
1919 // Sub packed integers |
1803 void psubb(XMMRegister dst, XMMRegister src); |
1920 void psubb(XMMRegister dst, XMMRegister src); |
1804 void psubw(XMMRegister dst, XMMRegister src); |
1921 void psubw(XMMRegister dst, XMMRegister src); |
1805 void psubd(XMMRegister dst, XMMRegister src); |
1922 void psubd(XMMRegister dst, XMMRegister src); |
1806 void psubq(XMMRegister dst, XMMRegister src); |
1923 void psubq(XMMRegister dst, XMMRegister src); |
1807 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1924 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1808 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1925 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1809 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1926 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1810 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1927 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1811 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1928 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1812 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1929 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1813 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1930 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1814 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1931 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1815 |
1932 |
1816 // Multiply packed integers (only shorts and ints) |
1933 // Multiply packed integers (only shorts and ints) |
1817 void pmullw(XMMRegister dst, XMMRegister src); |
1934 void pmullw(XMMRegister dst, XMMRegister src); |
1818 void pmulld(XMMRegister dst, XMMRegister src); |
1935 void pmulld(XMMRegister dst, XMMRegister src); |
1819 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1936 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1820 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1937 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1821 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1938 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1822 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1939 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
|
1940 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
|
1941 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1823 |
1942 |
1824 // Shift left packed integers |
1943 // Shift left packed integers |
1825 void psllw(XMMRegister dst, int shift); |
1944 void psllw(XMMRegister dst, int shift); |
1826 void pslld(XMMRegister dst, int shift); |
1945 void pslld(XMMRegister dst, int shift); |
1827 void psllq(XMMRegister dst, int shift); |
1946 void psllq(XMMRegister dst, int shift); |
1828 void psllw(XMMRegister dst, XMMRegister shift); |
1947 void psllw(XMMRegister dst, XMMRegister shift); |
1829 void pslld(XMMRegister dst, XMMRegister shift); |
1948 void pslld(XMMRegister dst, XMMRegister shift); |
1830 void psllq(XMMRegister dst, XMMRegister shift); |
1949 void psllq(XMMRegister dst, XMMRegister shift); |
1831 void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1950 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1832 void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1951 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1833 void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1952 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1834 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1953 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1835 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1954 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1836 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1955 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1837 |
1956 |
1838 // Logical shift right packed integers |
1957 // Logical shift right packed integers |
1839 void psrlw(XMMRegister dst, int shift); |
1958 void psrlw(XMMRegister dst, int shift); |
1840 void psrld(XMMRegister dst, int shift); |
1959 void psrld(XMMRegister dst, int shift); |
1841 void psrlq(XMMRegister dst, int shift); |
1960 void psrlq(XMMRegister dst, int shift); |
1842 void psrlw(XMMRegister dst, XMMRegister shift); |
1961 void psrlw(XMMRegister dst, XMMRegister shift); |
1843 void psrld(XMMRegister dst, XMMRegister shift); |
1962 void psrld(XMMRegister dst, XMMRegister shift); |
1844 void psrlq(XMMRegister dst, XMMRegister shift); |
1963 void psrlq(XMMRegister dst, XMMRegister shift); |
1845 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1964 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1846 void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1965 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1847 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1966 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1848 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1967 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1849 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1968 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1850 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1969 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1851 |
1970 |
1852 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) |
1971 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) |
1853 void psraw(XMMRegister dst, int shift); |
1972 void psraw(XMMRegister dst, int shift); |
1854 void psrad(XMMRegister dst, int shift); |
1973 void psrad(XMMRegister dst, int shift); |
1855 void psraw(XMMRegister dst, XMMRegister shift); |
1974 void psraw(XMMRegister dst, XMMRegister shift); |
1856 void psrad(XMMRegister dst, XMMRegister shift); |
1975 void psrad(XMMRegister dst, XMMRegister shift); |
1857 void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1976 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1858 void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
1977 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len); |
1859 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1978 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1860 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
1979 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); |
1861 |
1980 |
1862 // And packed integers |
1981 // And packed integers |
1863 void pand(XMMRegister dst, XMMRegister src); |
1982 void pand(XMMRegister dst, XMMRegister src); |
1864 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1983 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1865 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1984 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1866 |
1985 |
1867 // Or packed integers |
1986 // Or packed integers |
1868 void por(XMMRegister dst, XMMRegister src); |
1987 void por(XMMRegister dst, XMMRegister src); |
1869 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1988 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1870 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1989 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1871 |
1990 |
1872 // Xor packed integers |
1991 // Xor packed integers |
1873 void pxor(XMMRegister dst, XMMRegister src); |
1992 void pxor(XMMRegister dst, XMMRegister src); |
1874 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
1993 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1875 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
1994 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1876 |
1995 |
1877 // Copy low 128bit into high 128bit of YMM registers. |
1996 // Copy low 128bit into high 128bit of YMM registers. |
1878 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
1997 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
1879 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
1998 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
1880 void vextractf128h(XMMRegister dst, XMMRegister src); |
1999 void vextractf128h(XMMRegister dst, XMMRegister src); |
|
2000 void vextracti128h(XMMRegister dst, XMMRegister src); |
1881 |
2001 |
1882 // Load/store high 128bit of YMM registers which does not destroy other half. |
2002 // Load/store high 128bit of YMM registers which does not destroy other half. |
1883 void vinsertf128h(XMMRegister dst, Address src); |
2003 void vinsertf128h(XMMRegister dst, Address src); |
1884 void vinserti128h(XMMRegister dst, Address src); |
2004 void vinserti128h(XMMRegister dst, Address src); |
1885 void vextractf128h(Address dst, XMMRegister src); |
2005 void vextractf128h(Address dst, XMMRegister src); |
1886 void vextracti128h(Address dst, XMMRegister src); |
2006 void vextracti128h(Address dst, XMMRegister src); |
1887 |
2007 |
|
2008 // Copy low 256bit into high 256bit of ZMM registers. |
|
2009 void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
|
2010 void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
|
2011 void vextracti64x4h(XMMRegister dst, XMMRegister src); |
|
2012 void vextractf64x4h(XMMRegister dst, XMMRegister src); |
|
2013 void vextractf64x4h(Address dst, XMMRegister src); |
|
2014 void vinsertf64x4h(XMMRegister dst, Address src); |
|
2015 |
|
2016 // Copy targeted 128bit segments of the ZMM registers |
|
2017 void vextracti64x2h(XMMRegister dst, XMMRegister src, int value); |
|
2018 void vextractf64x2h(XMMRegister dst, XMMRegister src, int value); |
|
2019 void vextractf32x4h(XMMRegister dst, XMMRegister src, int value); |
|
2020 |
1888 // duplicate 4-bytes integer data from src into 8 locations in dest |
2021 // duplicate 4-bytes integer data from src into 8 locations in dest |
1889 void vpbroadcastd(XMMRegister dst, XMMRegister src); |
2022 void vpbroadcastd(XMMRegister dst, XMMRegister src); |
|
2023 |
|
2024 // duplicate 4-bytes integer data from src into vector_len locations in dest |
|
2025 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len); |
1890 |
2026 |
1891 // Carry-Less Multiplication Quadword |
2027 // Carry-Less Multiplication Quadword |
1892 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); |
2028 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); |
1893 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); |
2029 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); |
1894 |
2030 |