hotspot/src/cpu/x86/vm/assembler_x86.hpp
changeset 30624 2e1803c8a26d
parent 30305 b92a97e1e9cb
child 31129 02ee7609f0e1
equal deleted inserted replaced
30596:0322b394e7fd 30624:2e1803c8a26d
   436   AddressLiteral base() { return _base; }
   436   AddressLiteral base() { return _base; }
   437   Address index() { return _index; }
   437   Address index() { return _index; }
   438 
   438 
   439 };
   439 };
   440 
   440 
   441 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
   441 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512*2 / wordSize);
   442 
   442 
   443 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
   443 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
   444 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
   444 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
   445 // is what you get. The Assembler is generating code into a CodeBuffer.
   445 // is what you get. The Assembler is generating code into a CodeBuffer.
   446 
   446 
   501     REX_WRB    = 0x4D,
   501     REX_WRB    = 0x4D,
   502     REX_WRX    = 0x4E,
   502     REX_WRX    = 0x4E,
   503     REX_WRXB   = 0x4F,
   503     REX_WRXB   = 0x4F,
   504 
   504 
   505     VEX_3bytes = 0xC4,
   505     VEX_3bytes = 0xC4,
   506     VEX_2bytes = 0xC5
   506     VEX_2bytes = 0xC5,
       
   507     EVEX_4bytes = 0x62
   507   };
   508   };
   508 
   509 
   509   enum VexPrefix {
   510   enum VexPrefix {
   510     VEX_B = 0x20,
   511     VEX_B = 0x20,
   511     VEX_X = 0x40,
   512     VEX_X = 0x40,
   512     VEX_R = 0x80,
   513     VEX_R = 0x80,
   513     VEX_W = 0x80
   514     VEX_W = 0x80
   514   };
   515   };
   515 
   516 
       
   517   enum ExexPrefix {
       
   518     EVEX_F  = 0x04,
       
   519     EVEX_V  = 0x08,
       
   520     EVEX_Rb = 0x10,
       
   521     EVEX_X  = 0x40,
       
   522     EVEX_Z  = 0x80
       
   523   };
       
   524 
   516   enum VexSimdPrefix {
   525   enum VexSimdPrefix {
   517     VEX_SIMD_NONE = 0x0,
   526     VEX_SIMD_NONE = 0x0,
   518     VEX_SIMD_66   = 0x1,
   527     VEX_SIMD_66   = 0x1,
   519     VEX_SIMD_F3   = 0x2,
   528     VEX_SIMD_F3   = 0x2,
   520     VEX_SIMD_F2   = 0x3
   529     VEX_SIMD_F2   = 0x3
   523   enum VexOpcode {
   532   enum VexOpcode {
   524     VEX_OPCODE_NONE  = 0x0,
   533     VEX_OPCODE_NONE  = 0x0,
   525     VEX_OPCODE_0F    = 0x1,
   534     VEX_OPCODE_0F    = 0x1,
   526     VEX_OPCODE_0F_38 = 0x2,
   535     VEX_OPCODE_0F_38 = 0x2,
   527     VEX_OPCODE_0F_3A = 0x3
   536     VEX_OPCODE_0F_3A = 0x3
       
   537   };
       
   538 
       
   539   enum AvxVectorLen {
       
   540     AVX_128bit = 0x0,
       
   541     AVX_256bit = 0x1,
       
   542     AVX_512bit = 0x2,
       
   543     AVX_NoVec  = 0x4
       
   544   };
       
   545 
       
   546   enum EvexTupleType {
       
   547     EVEX_FV   = 0,
       
   548     EVEX_HV   = 4,
       
   549     EVEX_FVM  = 6,
       
   550     EVEX_T1S  = 7,
       
   551     EVEX_T1F  = 11,
       
   552     EVEX_T2   = 13,
       
   553     EVEX_T4   = 15,
       
   554     EVEX_T8   = 17,
       
   555     EVEX_HVM  = 18,
       
   556     EVEX_QVM  = 19,
       
   557     EVEX_OVM  = 20,
       
   558     EVEX_M128 = 21,
       
   559     EVEX_DUP  = 22,
       
   560     EVEX_ETUP = 23
       
   561   };
       
   562 
       
   563   enum EvexInputSizeInBits {
       
   564     EVEX_8bit  = 0,
       
   565     EVEX_16bit = 1,
       
   566     EVEX_32bit = 2,
       
   567     EVEX_64bit = 3
   528   };
   568   };
   529 
   569 
   530   enum WhichOperand {
   570   enum WhichOperand {
   531     // input to locate_operand, and format code for relocations
   571     // input to locate_operand, and format code for relocations
   532     imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
   572     imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
   552   // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
   592   // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
   553   // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
   593   // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
   554 
   594 
   555 private:
   595 private:
   556 
   596 
       
   597   int evex_encoding;
       
   598   int input_size_in_bits;
       
   599   int avx_vector_len;
       
   600   int tuple_type;
       
   601   bool is_evex_instruction;
   557 
   602 
   558   // 64bit prefixes
   603   // 64bit prefixes
   559   int prefix_and_encode(int reg_enc, bool byteinst = false);
   604   int prefix_and_encode(int reg_enc, bool byteinst = false);
   560   int prefixq_and_encode(int reg_enc);
   605   int prefixq_and_encode(int reg_enc);
   561 
   606 
   578   int  rex_prefix_and_encode(int dst_enc, int src_enc,
   623   int  rex_prefix_and_encode(int dst_enc, int src_enc,
   579                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
   624                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
   580 
   625 
   581   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
   626   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
   582                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
   627                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
   583                   bool vector256);
   628                   int vector_len);
       
   629 
       
   630   void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
       
   631                    int nds_enc, VexSimdPrefix pre, VexOpcode opc,
       
   632                    bool is_extended_context, bool is_merge_context,
       
   633                    int vector_len, bool no_mask_reg );
   584 
   634 
   585   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
   635   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
   586                   VexSimdPrefix pre, VexOpcode opc,
   636                   VexSimdPrefix pre, VexOpcode opc,
   587                   bool vex_w, bool vector256);
   637                   bool vex_w, int vector_len,
       
   638                   bool legacy_mode = false, bool no_mask_reg = false);
   588 
   639 
   589   void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
   640   void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
   590                   VexSimdPrefix pre, bool vector256 = false) {
   641                   VexSimdPrefix pre, int vector_len = AVX_128bit,
       
   642                   bool no_mask_reg = false, bool legacy_mode = false) {
   591     int dst_enc = dst->encoding();
   643     int dst_enc = dst->encoding();
   592     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   644     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   593     vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
   645     vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
   594   }
   646   }
   595 
   647 
   596   void vex_prefix_0F38(Register dst, Register nds, Address src) {
   648   void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
       
   649                     VexSimdPrefix pre, int vector_len = AVX_128bit,
       
   650                     bool no_mask_reg = false) {
       
   651     int dst_enc = dst->encoding();
       
   652     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
       
   653     vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
       
   654   }
       
   655 
       
   656   void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
   597     bool vex_w = false;
   657     bool vex_w = false;
   598     bool vector256 = false;
   658     int vector_len = AVX_128bit;
   599     vex_prefix(src, nds->encoding(), dst->encoding(),
   659     vex_prefix(src, nds->encoding(), dst->encoding(),
   600                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
   660                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
   601   }
   661                vector_len, no_mask_reg);
   602 
   662   }
   603   void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
   663 
       
   664   void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
   604     bool vex_w = true;
   665     bool vex_w = true;
   605     bool vector256 = false;
   666     int vector_len = AVX_128bit;
   606     vex_prefix(src, nds->encoding(), dst->encoding(),
   667     vex_prefix(src, nds->encoding(), dst->encoding(),
   607                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
   668                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
       
   669                vector_len, no_mask_reg);
   608   }
   670   }
   609   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
   671   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
   610                              VexSimdPrefix pre, VexOpcode opc,
   672                              VexSimdPrefix pre, VexOpcode opc,
   611                              bool vex_w, bool vector256);
   673                              bool vex_w, int vector_len,
   612 
   674                              bool legacy_mode, bool no_mask_reg);
   613   int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
   675 
       
   676   int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
   614     bool vex_w = false;
   677     bool vex_w = false;
   615     bool vector256 = false;
   678     int vector_len = AVX_128bit;
   616     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
   679     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
   617                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
   680                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
   618   }
   681                                  false, no_mask_reg);
   619   int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
   682   }
       
   683   int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
   620     bool vex_w = true;
   684     bool vex_w = true;
   621     bool vector256 = false;
   685     int vector_len = AVX_128bit;
   622     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
   686     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
   623                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
   687                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
       
   688                                  false, no_mask_reg);
   624   }
   689   }
   625   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
   690   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
   626                              VexSimdPrefix pre, bool vector256 = false,
   691                              VexSimdPrefix pre, int vector_len = AVX_128bit,
   627                              VexOpcode opc = VEX_OPCODE_0F) {
   692                              VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
       
   693                              bool no_mask_reg = false) {
   628     int src_enc = src->encoding();
   694     int src_enc = src->encoding();
   629     int dst_enc = dst->encoding();
   695     int dst_enc = dst->encoding();
   630     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   696     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   631     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
   697     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
   632   }
   698   }
   633 
   699 
   634   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
   700   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
   635                    VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
   701                    VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
   636                    bool rex_w = false, bool vector256 = false);
   702                    bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
   637 
   703 
   638   void simd_prefix(XMMRegister dst, Address src,
   704   void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
   639                    VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
   705                    bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
   640     simd_prefix(dst, xnoreg, src, pre, opc);
   706     simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
   641   }
   707   }
   642 
   708 
   643   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
   709   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
   644     simd_prefix(src, dst, pre);
   710     simd_prefix(src, dst, pre, no_mask_reg);
   645   }
   711   }
   646   void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
   712   void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
   647                      VexSimdPrefix pre) {
   713                      VexSimdPrefix pre, bool no_mask_reg = false) {
   648     bool rex_w = true;
   714     bool rex_w = true;
   649     simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
   715     simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
   650   }
   716   }
   651 
   717 
   652   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
   718   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
   653                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
   719                              VexSimdPrefix pre, bool no_mask_reg,
   654                              bool rex_w = false, bool vector256 = false);
   720                              VexOpcode opc = VEX_OPCODE_0F,
       
   721                              bool rex_w = false, int vector_len = AVX_128bit,
       
   722                              bool legacy_mode = false);
       
   723 
       
   724   int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
       
   725                              VexSimdPrefix pre, bool no_mask_reg,
       
   726                              VexOpcode opc = VEX_OPCODE_0F,
       
   727                              bool rex_w = false, int vector_len = AVX_128bit);
       
   728 
       
   729   int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
       
   730                              VexSimdPrefix pre, bool no_mask_reg,
       
   731                              VexOpcode opc = VEX_OPCODE_0F,
       
   732                              bool rex_w = false, int vector_len = AVX_128bit);
   655 
   733 
   656   // Move/convert 32-bit integer value.
   734   // Move/convert 32-bit integer value.
   657   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
   735   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
   658                              VexSimdPrefix pre) {
   736                              VexSimdPrefix pre, bool no_mask_reg) {
   659     // It is OK to cast from Register to XMMRegister to pass argument here
   737     // It is OK to cast from Register to XMMRegister to pass argument here
   660     // since only encoding is used in simd_prefix_and_encode() and number of
   738     // since only encoding is used in simd_prefix_and_encode() and number of
   661     // Gen and Xmm registers are the same.
   739     // Gen and Xmm registers are the same.
   662     return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
   740     return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
   663   }
   741   }
   664   int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
   742   int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
   665     return simd_prefix_and_encode(dst, xnoreg, src, pre);
   743     return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
   666   }
   744   }
   667   int simd_prefix_and_encode(Register dst, XMMRegister src,
   745   int simd_prefix_and_encode(Register dst, XMMRegister src,
   668                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
   746                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
   669     return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
   747                              bool no_mask_reg = false) {
       
   748     return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
   670   }
   749   }
   671 
   750 
   672   // Move/convert 64-bit integer value.
   751   // Move/convert 64-bit integer value.
   673   int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
   752   int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
   674                                VexSimdPrefix pre) {
   753                                VexSimdPrefix pre, bool no_mask_reg = false) {
   675     bool rex_w = true;
   754     bool rex_w = true;
   676     return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
   755     return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
   677   }
   756   }
   678   int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
   757   int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
   679     return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
   758     return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
   680   }
   759   }
   681   int simd_prefix_and_encode_q(Register dst, XMMRegister src,
   760   int simd_prefix_and_encode_q(Register dst, XMMRegister src,
   682                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
   761                                VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
       
   762                                bool no_mask_reg = false) {
   683     bool rex_w = true;
   763     bool rex_w = true;
   684     return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
   764     return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
   685   }
   765   }
   686 
   766 
   687   // Helper functions for groups of instructions
   767   // Helper functions for groups of instructions
   688   void emit_arith_b(int op1, int op2, Register dst, int imm8);
   768   void emit_arith_b(int op1, int op2, Register dst, int imm8);
   689 
   769 
   690   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
   770   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
   691   // Force generation of a 4 byte immediate value even if it fits into 8bit
   771   // Force generation of a 4 byte immediate value even if it fits into 8bit
   692   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   772   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   693   void emit_arith(int op1, int op2, Register dst, Register src);
   773   void emit_arith(int op1, int op2, Register dst, Register src);
   694 
   774 
   695   void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
   775   void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
   696   void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
   776   void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
   697   void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
   777   void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
   698   void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
   778   void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
       
   779   void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
       
   780   void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
       
   781   void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
       
   782   void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
   699   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
   783   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
   700                       Address src, VexSimdPrefix pre, bool vector256);
   784                       Address src, VexSimdPrefix pre, int vector_len,
       
   785                       bool no_mask_reg = false, bool legacy_mode = false);
       
   786   void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
       
   787                         Address src, VexSimdPrefix pre, int vector_len,
       
   788                         bool no_mask_reg = false);
   701   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
   789   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
   702                       XMMRegister src, VexSimdPrefix pre, bool vector256);
   790                       XMMRegister src, VexSimdPrefix pre, int vector_len,
       
   791                       bool no_mask_reg = false, bool legacy_mode = false);
       
   792   void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
       
   793                         XMMRegister src, VexSimdPrefix pre, int vector_len,
       
   794                         bool no_mask_reg = false);
       
   795 
       
   796   bool emit_compressed_disp_byte(int &disp);
   703 
   797 
   704   void emit_operand(Register reg,
   798   void emit_operand(Register reg,
   705                     Register base, Register index, Address::ScaleFactor scale,
   799                     Register base, Register index, Address::ScaleFactor scale,
   706                     int disp,
   800                     int disp,
   707                     RelocationHolder const& rspec,
   801                     RelocationHolder const& rspec,
   823   void prefix(Prefix p);
   917   void prefix(Prefix p);
   824 
   918 
   825   public:
   919   public:
   826 
   920 
   827   // Creation
   921   // Creation
   828   Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
   922   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
       
   923     init_attributes();
       
   924   }
   829 
   925 
   830   // Decoding
   926   // Decoding
   831   static address locate_operand(address inst, WhichOperand which);
   927   static address locate_operand(address inst, WhichOperand which);
   832   static address locate_next_instruction(address inst);
   928   static address locate_next_instruction(address inst);
   833 
   929 
   834   // Utilities
   930   // Utilities
   835   static bool is_polling_page_far() NOT_LP64({ return false;});
   931   static bool is_polling_page_far() NOT_LP64({ return false;});
       
   932   static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
       
   933                                          int cur_tuple_type, int in_size_in_bits, int cur_encoding);
   836 
   934 
   837   // Generic instructions
   935   // Generic instructions
   838   // Does 32bit or 64bit as needed for the platform. In some sense these
   936   // Does 32bit or 64bit as needed for the platform. In some sense these
   839   // belong in macro assembler but there is no need for both varieties to exist
   937   // belong in macro assembler but there is no need for both varieties to exist
       
   938 
       
   939   void init_attributes(void) {
       
   940     evex_encoding = 0;
       
   941     input_size_in_bits = 0;
       
   942     avx_vector_len = AVX_NoVec;
       
   943     tuple_type = EVEX_ETUP;
       
   944     is_evex_instruction = false;
       
   945   }
   840 
   946 
   841   void lea(Register dst, Address src);
   947   void lea(Register dst, Address src);
   842 
   948 
   843   void mov(Register dst, Register src);
   949   void mov(Register dst, Register src);
   844 
   950 
  1336 
  1442 
  1337   void movb(Address dst, Register src);
  1443   void movb(Address dst, Register src);
  1338   void movb(Address dst, int imm8);
  1444   void movb(Address dst, int imm8);
  1339   void movb(Register dst, Address src);
  1445   void movb(Register dst, Address src);
  1340 
  1446 
       
  1447   void kmovq(KRegister dst, KRegister src);
       
  1448   void kmovql(KRegister dst, Register src);
       
  1449   void kmovdl(KRegister dst, Register src);
       
  1450   void kmovq(Address dst, KRegister src);
       
  1451   void kmovq(KRegister dst, Address src);
       
  1452 
  1341   void movdl(XMMRegister dst, Register src);
  1453   void movdl(XMMRegister dst, Register src);
  1342   void movdl(Register dst, XMMRegister src);
  1454   void movdl(Register dst, XMMRegister src);
  1343   void movdl(XMMRegister dst, Address src);
  1455   void movdl(XMMRegister dst, Address src);
  1344   void movdl(Address dst, XMMRegister src);
  1456   void movdl(Address dst, XMMRegister src);
  1345 
  1457 
  1358 
  1470 
  1359   // Move Unaligned 256bit Vector
  1471   // Move Unaligned 256bit Vector
  1360   void vmovdqu(Address dst, XMMRegister src);
  1472   void vmovdqu(Address dst, XMMRegister src);
  1361   void vmovdqu(XMMRegister dst, Address src);
  1473   void vmovdqu(XMMRegister dst, Address src);
  1362   void vmovdqu(XMMRegister dst, XMMRegister src);
  1474   void vmovdqu(XMMRegister dst, XMMRegister src);
       
  1475 
       
  1476    // Move Unaligned 512bit Vector
       
  1477   void evmovdqu(Address dst, XMMRegister src, int vector_len);
       
  1478   void evmovdqu(XMMRegister dst, Address src, int vector_len);
       
  1479   void evmovdqu(XMMRegister dst, XMMRegister src, int vector_len);
  1363 
  1480 
  1364   // Move lower 64bit to high 64bit in 128bit register
  1481   // Move lower 64bit to high 64bit in 128bit register
  1365   void movlhps(XMMRegister dst, XMMRegister src);
  1482   void movlhps(XMMRegister dst, XMMRegister src);
  1366 
  1483 
  1367   void movl(Register dst, int32_t imm32);
  1484   void movl(Register dst, int32_t imm32);
  1484   void orq(Register dst, Register src);
  1601   void orq(Register dst, Register src);
  1485 
  1602 
  1486   // Pack with unsigned saturation
  1603   // Pack with unsigned saturation
  1487   void packuswb(XMMRegister dst, XMMRegister src);
  1604   void packuswb(XMMRegister dst, XMMRegister src);
  1488   void packuswb(XMMRegister dst, Address src);
  1605   void packuswb(XMMRegister dst, Address src);
  1489   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1606   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1490 
  1607 
  1491   // Pemutation of 64bit words
  1608   // Pemutation of 64bit words
  1492   void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
  1609   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
  1493 
  1610 
  1494   void pause();
  1611   void pause();
  1495 
  1612 
  1496   // SSE4.2 string instructions
  1613   // SSE4.2 string instructions
  1497   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
  1614   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
  1732   //====================VECTOR ARITHMETIC=====================================
  1849   //====================VECTOR ARITHMETIC=====================================
  1733 
  1850 
  1734   // Add Packed Floating-Point Values
  1851   // Add Packed Floating-Point Values
  1735   void addpd(XMMRegister dst, XMMRegister src);
  1852   void addpd(XMMRegister dst, XMMRegister src);
  1736   void addps(XMMRegister dst, XMMRegister src);
  1853   void addps(XMMRegister dst, XMMRegister src);
  1737   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1854   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1738   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1855   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1739   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1856   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1740   void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1857   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1741 
  1858 
  1742   // Subtract Packed Floating-Point Values
  1859   // Subtract Packed Floating-Point Values
  1743   void subpd(XMMRegister dst, XMMRegister src);
  1860   void subpd(XMMRegister dst, XMMRegister src);
  1744   void subps(XMMRegister dst, XMMRegister src);
  1861   void subps(XMMRegister dst, XMMRegister src);
  1745   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1862   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1746   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1863   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1747   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1864   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1748   void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1865   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1749 
  1866 
  1750   // Multiply Packed Floating-Point Values
  1867   // Multiply Packed Floating-Point Values
  1751   void mulpd(XMMRegister dst, XMMRegister src);
  1868   void mulpd(XMMRegister dst, XMMRegister src);
  1752   void mulps(XMMRegister dst, XMMRegister src);
  1869   void mulps(XMMRegister dst, XMMRegister src);
  1753   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1870   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1754   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1871   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1755   void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1872   void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1756   void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1873   void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1757 
  1874 
  1758   // Divide Packed Floating-Point Values
  1875   // Divide Packed Floating-Point Values
  1759   void divpd(XMMRegister dst, XMMRegister src);
  1876   void divpd(XMMRegister dst, XMMRegister src);
  1760   void divps(XMMRegister dst, XMMRegister src);
  1877   void divps(XMMRegister dst, XMMRegister src);
  1761   void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1878   void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1762   void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1879   void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1763   void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1880   void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1764   void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1881   void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1765 
  1882 
  1766   // Bitwise Logical AND of Packed Floating-Point Values
  1883   // Bitwise Logical AND of Packed Floating-Point Values
  1767   void andpd(XMMRegister dst, XMMRegister src);
  1884   void andpd(XMMRegister dst, XMMRegister src);
  1768   void andps(XMMRegister dst, XMMRegister src);
  1885   void andps(XMMRegister dst, XMMRegister src);
  1769   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1886   void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1770   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1887   void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1771   void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1888   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1772   void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1889   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1773 
  1890 
  1774   // Bitwise Logical XOR of Packed Floating-Point Values
  1891   // Bitwise Logical XOR of Packed Floating-Point Values
  1775   void xorpd(XMMRegister dst, XMMRegister src);
  1892   void xorpd(XMMRegister dst, XMMRegister src);
  1776   void xorps(XMMRegister dst, XMMRegister src);
  1893   void xorps(XMMRegister dst, XMMRegister src);
  1777   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1894   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1778   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1895   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1779   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1896   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1780   void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1897   void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1781 
  1898 
  1782   // Add horizontal packed integers
  1899   // Add horizontal packed integers
  1783   void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1900   void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1784   void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1901   void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1785   void phaddw(XMMRegister dst, XMMRegister src);
  1902   void phaddw(XMMRegister dst, XMMRegister src);
  1786   void phaddd(XMMRegister dst, XMMRegister src);
  1903   void phaddd(XMMRegister dst, XMMRegister src);
  1787 
  1904 
  1788   // Add packed integers
  1905   // Add packed integers
  1789   void paddb(XMMRegister dst, XMMRegister src);
  1906   void paddb(XMMRegister dst, XMMRegister src);
  1790   void paddw(XMMRegister dst, XMMRegister src);
  1907   void paddw(XMMRegister dst, XMMRegister src);
  1791   void paddd(XMMRegister dst, XMMRegister src);
  1908   void paddd(XMMRegister dst, XMMRegister src);
  1792   void paddq(XMMRegister dst, XMMRegister src);
  1909   void paddq(XMMRegister dst, XMMRegister src);
  1793   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1910   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1794   void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1911   void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1795   void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1912   void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1796   void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1913   void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1797   void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1914   void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1798   void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1915   void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1799   void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1916   void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1800   void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1917   void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1801 
  1918 
  1802   // Sub packed integers
  1919   // Sub packed integers
  1803   void psubb(XMMRegister dst, XMMRegister src);
  1920   void psubb(XMMRegister dst, XMMRegister src);
  1804   void psubw(XMMRegister dst, XMMRegister src);
  1921   void psubw(XMMRegister dst, XMMRegister src);
  1805   void psubd(XMMRegister dst, XMMRegister src);
  1922   void psubd(XMMRegister dst, XMMRegister src);
  1806   void psubq(XMMRegister dst, XMMRegister src);
  1923   void psubq(XMMRegister dst, XMMRegister src);
  1807   void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1924   void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1808   void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1925   void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1809   void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1926   void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1810   void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1927   void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1811   void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1928   void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1812   void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1929   void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1813   void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1930   void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1814   void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1931   void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1815 
  1932 
  1816   // Multiply packed integers (only shorts and ints)
  1933   // Multiply packed integers (only shorts and ints)
  1817   void pmullw(XMMRegister dst, XMMRegister src);
  1934   void pmullw(XMMRegister dst, XMMRegister src);
  1818   void pmulld(XMMRegister dst, XMMRegister src);
  1935   void pmulld(XMMRegister dst, XMMRegister src);
  1819   void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1936   void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1820   void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1937   void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1821   void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1938   void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1822   void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1939   void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
       
  1940   void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
       
  1941   void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1823 
  1942 
  1824   // Shift left packed integers
  1943   // Shift left packed integers
  1825   void psllw(XMMRegister dst, int shift);
  1944   void psllw(XMMRegister dst, int shift);
  1826   void pslld(XMMRegister dst, int shift);
  1945   void pslld(XMMRegister dst, int shift);
  1827   void psllq(XMMRegister dst, int shift);
  1946   void psllq(XMMRegister dst, int shift);
  1828   void psllw(XMMRegister dst, XMMRegister shift);
  1947   void psllw(XMMRegister dst, XMMRegister shift);
  1829   void pslld(XMMRegister dst, XMMRegister shift);
  1948   void pslld(XMMRegister dst, XMMRegister shift);
  1830   void psllq(XMMRegister dst, XMMRegister shift);
  1949   void psllq(XMMRegister dst, XMMRegister shift);
  1831   void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1950   void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1832   void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1951   void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1833   void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1952   void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1834   void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1953   void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1835   void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1954   void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1836   void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1955   void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1837 
  1956 
  1838   // Logical shift right packed integers
  1957   // Logical shift right packed integers
  1839   void psrlw(XMMRegister dst, int shift);
  1958   void psrlw(XMMRegister dst, int shift);
  1840   void psrld(XMMRegister dst, int shift);
  1959   void psrld(XMMRegister dst, int shift);
  1841   void psrlq(XMMRegister dst, int shift);
  1960   void psrlq(XMMRegister dst, int shift);
  1842   void psrlw(XMMRegister dst, XMMRegister shift);
  1961   void psrlw(XMMRegister dst, XMMRegister shift);
  1843   void psrld(XMMRegister dst, XMMRegister shift);
  1962   void psrld(XMMRegister dst, XMMRegister shift);
  1844   void psrlq(XMMRegister dst, XMMRegister shift);
  1963   void psrlq(XMMRegister dst, XMMRegister shift);
  1845   void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1964   void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1846   void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1965   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1847   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1966   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1848   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1967   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1849   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1968   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1850   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1969   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1851 
  1970 
  1852   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
  1971   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
  1853   void psraw(XMMRegister dst, int shift);
  1972   void psraw(XMMRegister dst, int shift);
  1854   void psrad(XMMRegister dst, int shift);
  1973   void psrad(XMMRegister dst, int shift);
  1855   void psraw(XMMRegister dst, XMMRegister shift);
  1974   void psraw(XMMRegister dst, XMMRegister shift);
  1856   void psrad(XMMRegister dst, XMMRegister shift);
  1975   void psrad(XMMRegister dst, XMMRegister shift);
  1857   void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1976   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1858   void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
  1977   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
  1859   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1978   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1860   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
  1979   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
  1861 
  1980 
  1862   // And packed integers
  1981   // And packed integers
  1863   void pand(XMMRegister dst, XMMRegister src);
  1982   void pand(XMMRegister dst, XMMRegister src);
  1864   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1983   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1865   void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1984   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1866 
  1985 
  1867   // Or packed integers
  1986   // Or packed integers
  1868   void por(XMMRegister dst, XMMRegister src);
  1987   void por(XMMRegister dst, XMMRegister src);
  1869   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1988   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1870   void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1989   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1871 
  1990 
  1872   // Xor packed integers
  1991   // Xor packed integers
  1873   void pxor(XMMRegister dst, XMMRegister src);
  1992   void pxor(XMMRegister dst, XMMRegister src);
  1874   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
  1993   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
  1875   void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  1994   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  1876 
  1995 
  1877   // Copy low 128bit into high 128bit of YMM registers.
  1996   // Copy low 128bit into high 128bit of YMM registers.
  1878   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
  1997   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
  1879   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
  1998   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
  1880   void vextractf128h(XMMRegister dst, XMMRegister src);
  1999   void vextractf128h(XMMRegister dst, XMMRegister src);
       
  2000   void vextracti128h(XMMRegister dst, XMMRegister src);
  1881 
  2001 
  1882   // Load/store high 128bit of YMM registers which does not destroy other half.
  2002   // Load/store high 128bit of YMM registers which does not destroy other half.
  1883   void vinsertf128h(XMMRegister dst, Address src);
  2003   void vinsertf128h(XMMRegister dst, Address src);
  1884   void vinserti128h(XMMRegister dst, Address src);
  2004   void vinserti128h(XMMRegister dst, Address src);
  1885   void vextractf128h(Address dst, XMMRegister src);
  2005   void vextractf128h(Address dst, XMMRegister src);
  1886   void vextracti128h(Address dst, XMMRegister src);
  2006   void vextracti128h(Address dst, XMMRegister src);
  1887 
  2007 
       
  2008   // Copy low 256bit into high 256bit of ZMM registers.
       
  2009   void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
       
  2010   void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
       
  2011   void vextracti64x4h(XMMRegister dst, XMMRegister src);
       
  2012   void vextractf64x4h(XMMRegister dst, XMMRegister src);
       
  2013   void vextractf64x4h(Address dst, XMMRegister src);
       
  2014   void vinsertf64x4h(XMMRegister dst, Address src);
       
  2015 
       
  2016   // Copy targeted 128bit segments of the ZMM registers
       
  2017   void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
       
  2018   void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
       
  2019   void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
       
  2020 
  1888   // duplicate 4-bytes integer data from src into 8 locations in dest
  2021   // duplicate 4-bytes integer data from src into 8 locations in dest
  1889   void vpbroadcastd(XMMRegister dst, XMMRegister src);
  2022   void vpbroadcastd(XMMRegister dst, XMMRegister src);
       
  2023 
       
  2024   // duplicate 4-bytes integer data from src into vector_len locations in dest
       
  2025   void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
  1890 
  2026 
  1891   // Carry-Less Multiplication Quadword
  2027   // Carry-Less Multiplication Quadword
  1892   void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
  2028   void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
  1893   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
  2029   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
  1894 
  2030