diff -r 7dd181af6a26 -r bf248009cbbe hotspot/src/cpu/x86/vm/assembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 13 17:10:52 2011 -0800 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Dec 14 14:54:38 2011 -0800 @@ -533,6 +533,19 @@ case 0x0F: // movx..., etc. switch (0xFF & *ip++) { + case 0x3A: // pcmpestri + tail_size = 1; + case 0x38: // ptest, pmovzxbw + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; + + case 0x70: // pshufd r, r/a, #8 + debug_only(has_disp32 = true); // has both kinds of operands! + case 0x73: // psrldq r, #8 + tail_size = 1; + break; + case 0x12: // movlps case 0x28: // movaps case 0x2E: // ucomiss @@ -543,9 +556,7 @@ case 0x57: // xorps case 0x6E: // movd case 0x7E: // movd - case 0xAE: // ldmxcsr a - // 64bit side says it these have both operands but that doesn't - // appear to be true + case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush debug_only(has_disp32 = true); break; @@ -565,6 +576,12 @@ // fall out of the switch to decode the address break; + case 0xC4: // pinsrw r, a, #8 + debug_only(has_disp32 = true); + case 0xC5: // pextrw r, r, #8 + tail_size = 1; // the imm8 + break; + case 0xAC: // shrd r, a, #8 debug_only(has_disp32 = true); tail_size = 1; // the imm8 @@ -625,11 +642,44 @@ tail_size = 1; // the imm8 break; - case 0xE8: // call rdisp32 - case 0xE9: // jmp rdisp32 - if (which == end_pc_operand) return ip + 4; - assert(which == call32_operand, "call has no disp32 or imm"); - return ip; + case 0xC4: // VEX_3bytes + case 0xC5: // VEX_2bytes + assert((UseAVX > 0), "shouldn't have VEX prefix"); + assert(ip == inst+1, "no prefixes allowed"); + // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions + // but they have prefix 0x0F and processed when 0x0F processed above. + // + // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES + // instructions (these instructions are not supported in 64-bit mode). + // To distinguish them bits [7:6] are set in the VEX second byte since + // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set + // those VEX bits REX and vvvv bits are inverted. + // + // Fortunately C2 doesn't generate these instructions so we don't need + // to check for them in product version. + + // Check second byte + NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); + + // First byte + if ((0xFF & *inst) == VEX_3bytes) { + ip++; // third byte + is_64bit = ((VEX_W & *ip) == VEX_W); + } + ip++; // opcode + // To find the end of instruction (which == end_pc_operand). + switch (0xFF & *ip) { + case 0x61: // pcmpestri r, r/a, #8 + case 0x70: // pshufd r, r/a, #8 + case 0x73: // psrldq r, #8 + tail_size = 1; // the imm8 + break; + default: + break; + } + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl @@ -643,6 +693,12 @@ debug_only(has_disp32 = true); break; + case 0xE8: // call rdisp32 + case 0xE9: // jmp rdisp32 + if (which == end_pc_operand) return ip + 4; + assert(which == call32_operand, "call has no disp32 or imm"); + return ip; + case 0xF0: // Lock assert(os::is_MP(), "only on MP"); goto again_after_prefix; @@ -918,9 +974,7 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x58); emit_byte(0xC0 | encode); } @@ -928,18 +982,14 @@ void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x58); emit_operand(dst, src); } void Assembler::addss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x58); emit_byte(0xC0 | encode); } @@ -947,13 +997,19 @@ void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x58); emit_operand(dst, src); } +void Assembler::andl(Address dst, int32_t imm32) { + InstructionMark im(this); + prefix(dst); + emit_byte(0x81); + emit_operand(rsp, dst, 4); + emit_long(imm32); +} + void Assembler::andl(Register dst, int32_t imm32) { prefix(dst); emit_arith(0x81, 0xE0, dst, imm32); @@ -974,13 +1030,33 @@ void Assembler::andpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x54); emit_operand(dst, src); } +void Assembler::andpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x54); + emit_byte(0xC0 | encode); +} + +void Assembler::andps(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_NONE); + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::andps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); + emit_byte(0x54); + emit_byte(0xC0 | encode); +} + void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_byte(0x0F); @@ -1025,19 +1101,7 @@ } void Assembler::call(Register dst) { - // This was originally using a 32bit register encoding - // and surely we want 64bit! - // this is a 32bit encoding but in 64bit mode the default - // operand size is 64bit so there is no need for the - // wide prefix. So prefix only happens if we use the - // new registers. Much like push/pop. - int x = offset(); - // this may be true but dbx disassembles it as if it - // were 32bits... - // int encode = prefix_and_encode(dst->encoding()); - // if (offset() != x) assert(dst->encoding() >= 8, "what?"); - int encode = prefixq_and_encode(dst->encoding()); - + int encode = prefix_and_encode(dst->encoding()); emit_byte(0xFF); emit_byte(0xD0 | encode); } @@ -1157,87 +1221,119 @@ // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - comiss(dst, src); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x2F); + emit_operand(dst, src); +} + +void Assembler::comisd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + emit_byte(0x2F); + emit_byte(0xC0 | encode); } void Assembler::comiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - - InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_NONE); emit_byte(0x2F); emit_operand(dst, src); } +void Assembler::comiss(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); + emit_byte(0x2F); + emit_byte(0xC0 | encode); +} + void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0xE6); emit_byte(0xC0 | encode); } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x5B); emit_byte(0xC0 | encode); } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5A); emit_byte(0xC0 | encode); } +void Assembler::cvtsd2ss(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x5A); + emit_operand(dst, src); +} + void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5A); emit_byte(0xC0 | encode); } +void Assembler::cvtss2sd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x5A); + emit_operand(dst, src); +} + + void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); emit_byte(0x2C); emit_byte(0xC0 | encode); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0x2C); emit_byte(0xC0 | encode); } @@ -1253,18 +1349,14 @@ void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5E); emit_operand(dst, src); } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5E); emit_byte(0xC0 | encode); } @@ -1272,18 +1364,14 @@ void Assembler::divss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5E); emit_operand(dst, src); } void Assembler::divss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5E); emit_byte(0xC0 | encode); } @@ -1509,49 +1597,16 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int dstenc = dst->encoding(); - int srcenc = src->encoding(); - emit_byte(0x66); - if (dstenc < 8) { - if (srcenc >= 8) { - prefix(REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - prefix(REX_R); - } else { - prefix(REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x28); - emit_byte(0xC0 | dstenc << 3 | srcenc); + emit_byte(0xC0 | encode); } void Assembler::movaps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int dstenc = dst->encoding(); - int srcenc = src->encoding(); - if (dstenc < 8) { - if (srcenc >= 8) { - prefix(REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - prefix(REX_R); - } else { - prefix(REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x28); - emit_byte(0xC0 | dstenc << 3 | srcenc); + emit_byte(0xC0 | encode); } void Assembler::movb(Register dst, Address src) { @@ -1582,19 +1637,15 @@ void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_byte(0xC0 | encode); } void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); // swap src/dst to get correct prefix - int encode = prefix_and_encode(src->encoding(), dst->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); emit_byte(0x7E); emit_byte(0xC0 | encode); } @@ -1602,58 +1653,29 @@ void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_operand(dst, src); } - -void Assembler::movdqa(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x6F); - emit_operand(dst, src); -} - void Assembler::movdqa(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x6F); emit_byte(0xC0 | encode); } -void Assembler::movdqa(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(dst, src); - emit_byte(0x0F); - emit_byte(0x7F); - emit_operand(src, dst); -} - void Assembler::movdqu(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x6F); emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0x6F); emit_byte(0xC0 | encode); } @@ -1661,9 +1683,7 @@ void Assembler::movdqu(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x7F); emit_operand(src, dst); } @@ -1710,9 +1730,7 @@ void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x12); emit_operand(dst, src); } @@ -1740,9 +1758,7 @@ void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x7E); emit_operand(dst, src); } @@ -1750,9 +1766,7 @@ void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0xD6); emit_operand(src, dst); } @@ -1775,9 +1789,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x10); emit_byte(0xC0 | encode); } @@ -1785,9 +1797,7 @@ void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x10); emit_operand(dst, src); } @@ -1795,18 +1805,14 @@ void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x11); emit_operand(src, dst); } void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x10); emit_byte(0xC0 | encode); } @@ -1814,9 +1820,7 @@ void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x10); emit_operand(dst, src); } @@ -1824,9 +1828,7 @@ void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x11); emit_operand(src, dst); } @@ -1919,18 +1921,14 @@ void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x59); emit_operand(dst, src); } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x59); emit_byte(0xC0 | encode); } @@ -1938,18 +1936,14 @@ void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x59); emit_operand(dst, src); } void Assembler::mulss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x59); emit_byte(0xC0 | encode); } @@ -2237,14 +2231,26 @@ emit_arith(0x0B, 0xC0, dst, src); } +void Assembler::packuswb(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x67); + emit_operand(dst, src); +} + +void Assembler::packuswb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x67); + emit_byte(0xC0 | encode); +} + void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x3A); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_operand(dst, src); emit_byte(imm8); @@ -2252,16 +2258,27 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x3A); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_byte(0xC0 | encode); emit_byte(imm8); } +void Assembler::pmovzxbw(XMMRegister dst, Address src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x30); + emit_operand(dst, src); +} + +void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x30); + emit_byte(0xC0 | encode); +} + // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); @@ -2360,22 +2377,24 @@ void Assembler::por(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0xEB); emit_byte(0xC0 | encode); } +void Assembler::por(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0xEB); + emit_operand(dst, src); +} + void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x70); emit_byte(0xC0 | encode); emit_byte(mode & 0xFF); @@ -2385,11 +2404,9 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x70); emit_operand(dst, src); emit_byte(mode & 0xFF); @@ -2398,10 +2415,7 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); emit_byte(0x70); emit_byte(0xC0 | encode); emit_byte(mode & 0xFF); @@ -2410,11 +2424,9 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); // QQ new - emit_byte(0x0F); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x70); emit_operand(dst, src); emit_byte(mode & 0xFF); @@ -2425,11 +2437,8 @@ // HMM Table D-1 says sse2 or mmx. // Do not confuse it with psrldq SSE2 instruction which // shifts 128 bit value in xmm register by number of bytes. - NOT_LP64(assert(VM_Version::supports_sse(), "")); - - int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); - emit_byte(0x66); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); emit_byte(0x73); emit_byte(0xC0 | encode); emit_byte(shift); @@ -2438,10 +2447,7 @@ void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); - emit_byte(0x66); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); emit_byte(0x73); emit_byte(0xC0 | encode); emit_byte(shift); @@ -2449,36 +2455,52 @@ void Assembler::ptest(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); - - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x38); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x38); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_byte(0xC0 | encode); } +void Assembler::punpcklbw(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x60); + emit_operand(dst, src); +} + void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0x60); emit_byte(0xC0 | encode); } +void Assembler::punpckldq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x62); + emit_operand(dst, src); +} + +void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x62); + emit_byte(0xC0 | encode); +} + void Assembler::push(int32_t imm32) { // in 64bits we push 64bits onto the stack but only // take a 32bit immediate @@ -2508,20 +2530,16 @@ void Assembler::pxor(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0xEF); emit_operand(dst, src); } void Assembler::pxor(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0xEF); emit_byte(0xC0 | encode); } @@ -2683,12 +2701,8 @@ } void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { - // HMM Table D-1 says sse2 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x51); emit_byte(0xC0 | encode); } @@ -2696,30 +2710,22 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x51); emit_operand(dst, src); } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { - // HMM Table D-1 says sse2 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x51); emit_byte(0xC0 | encode); } void Assembler::sqrtss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x51); emit_operand(dst, src); } @@ -2765,9 +2771,7 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5C); emit_byte(0xC0 | encode); } @@ -2775,18 +2779,14 @@ void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5C); emit_operand(dst, src); } void Assembler::subss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5C); emit_byte(0xC0 | encode); } @@ -2794,9 +2794,7 @@ void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5C); emit_operand(dst, src); } @@ -2836,30 +2834,30 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - ucomiss(dst, src); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x2E); + emit_operand(dst, src); } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - ucomiss(dst, src); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + emit_byte(0x2E); + emit_byte(0xC0 | encode); } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - - InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_NONE); emit_byte(0x2E); emit_operand(dst, src); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x2E); emit_byte(0xC0 | encode); } @@ -2905,16 +2903,15 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - xorps(dst, src); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x57); + emit_byte(0xC0 | encode); } void Assembler::xorpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x57); emit_operand(dst, src); } @@ -2922,8 +2919,7 @@ void Assembler::xorps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); emit_byte(0x57); emit_byte(0xC0 | encode); } @@ -2931,8 +2927,7 @@ void Assembler::xorps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_NONE); emit_byte(0x57); emit_operand(dst, src); } @@ -3394,12 +3389,114 @@ emit_byte(0xF1); } +// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. +static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; +// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. +static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; + +// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. +void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { + if (pre > 0) { + emit_byte(simd_pre[pre]); + } + if (rex_w) { + prefixq(adr, xreg); + } else { + prefix(adr, xreg); + } + if (opc > 0) { + emit_byte(0x0F); + int opc2 = simd_opc[opc]; + if (opc2 > 0) { + emit_byte(opc2); + } + } +} + +int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { + if (pre > 0) { + emit_byte(simd_pre[pre]); + } + int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : + prefix_and_encode(dst_enc, src_enc); + if (opc > 0) { + emit_byte(0x0F); + int opc2 = simd_opc[opc]; + if (opc2 > 0) { + emit_byte(opc2); + } + } + return encode; +} + + +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { + if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { + prefix(VEX_3bytes); + + int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); + byte1 = (~byte1) & 0xE0; + byte1 |= opc; + a_byte(byte1); + + int byte2 = ((~nds_enc) & 0xf) << 3; + byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; + emit_byte(byte2); + } else { + prefix(VEX_2bytes); + + int byte1 = vex_r ? VEX_R : 0; + byte1 = (~byte1) & 0x80; + byte1 |= ((~nds_enc) & 0xf) << 3; + byte1 |= (vector256 ? 4 : 0) | pre; + emit_byte(byte1); + } +} + +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ + bool vex_r = (xreg_enc >= 8); + bool vex_b = adr.base_needs_rex(); + bool vex_x = adr.index_needs_rex(); + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); +} + +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { + bool vex_r = (dst_enc >= 8); + bool vex_b = (src_enc >= 8); + bool vex_x = false; + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); + return (((dst_enc & 7) << 3) | (src_enc & 7)); +} + + +void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { + if (UseAVX > 0) { + int xreg_enc = xreg->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); + } else { + assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); + rex_prefix(adr, xreg, pre, opc, rex_w); + } +} + +int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (UseAVX > 0) { + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); + } else { + assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); + return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); + } +} #ifndef _LP64 void Assembler::incl(Register dst) { // Don't use it directly. Use MacroAssembler::incrementl() instead. - emit_byte(0x40 | dst->encoding()); + emit_byte(0x40 | dst->encoding()); } void Assembler::lea(Register dst, Address src) { @@ -3756,6 +3853,38 @@ } } +void Assembler::prefixq(Address adr, XMMRegister src) { + if (src->encoding() < 8) { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + prefix(REX_WXB); + } else { + prefix(REX_WB); + } + } else { + if (adr.index_needs_rex()) { + prefix(REX_WX); + } else { + prefix(REX_W); + } + } + } else { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + prefix(REX_WRXB); + } else { + prefix(REX_WRB); + } + } else { + if (adr.index_needs_rex()) { + prefix(REX_WRX); + } else { + prefix(REX_WR); + } + } + } +} + void Assembler::adcq(Register dst, int32_t imm32) { (void) prefixq_and_encode(dst->encoding()); emit_arith(0x81, 0xD0, dst, imm32); @@ -3918,36 +4047,44 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix_q(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix_q(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); emit_byte(0x2C); emit_byte(0xC0 | encode); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); emit_byte(0x2C); emit_byte(0xC0 | encode); } @@ -4107,21 +4244,17 @@ void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_byte(0xC0 | encode); } void Assembler::movdq(Register dst, XMMRegister src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); - emit_byte(0x66); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix - int encode = prefixq_and_encode(src->encoding(), dst->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); emit_byte(0x7E); emit_byte(0xC0 | encode); } @@ -5680,6 +5813,24 @@ LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } +void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::addsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::addsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + addss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + addss(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::align(int modulus) { if (offset() % modulus != 0) { nop(modulus - (offset() % modulus)); @@ -5687,11 +5838,24 @@ } void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { - andpd(dst, as_Address(src)); + Assembler::andpd(dst, as_Address(src)); } else { lea(rscratch1, src); - andpd(dst, Address(rscratch1, 0)); + Assembler::andpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::andps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::andps(dst, Address(rscratch1, 0)); } } @@ -6268,19 +6432,19 @@ void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - comisd(dst, as_Address(src)); + Assembler::comisd(dst, as_Address(src)); } else { lea(rscratch1, src); - comisd(dst, Address(rscratch1, 0)); + Assembler::comisd(dst, Address(rscratch1, 0)); } } void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - comiss(dst, as_Address(src)); + Assembler::comiss(dst, as_Address(src)); } else { lea(rscratch1, src); - comiss(dst, Address(rscratch1, 0)); + Assembler::comiss(dst, Address(rscratch1, 0)); } } @@ -6364,6 +6528,24 @@ sarl(reg, shift_value); } +void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divss(dst, Address(rscratch1, 0)); + } +} + // !defined(COMPILER2) is because of stupid core builds #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) void MacroAssembler::empty_FPU_stack() { @@ -6803,12 +6985,39 @@ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } +void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movsd(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - movss(dst, as_Address(src)); + Assembler::movss(dst, as_Address(src)); } else { lea(rscratch1, src); - movss(dst, Address(rscratch1, 0)); + Assembler::movss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulss(dst, Address(rscratch1, 0)); } } @@ -6990,6 +7199,42 @@ testl(dst, as_Address(src)); } +void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subss(dst, Address(rscratch1, 0)); + } +} + ////////////////////////////////////////////////////////////////////////////////// #ifndef SERIALGC @@ -7875,28 +8120,42 @@ void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { - ucomisd(dst, as_Address(src)); + if (reachable(src)) { + Assembler::ucomisd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomisd(dst, Address(rscratch1, 0)); + } } void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { - ucomiss(dst, as_Address(src)); + if (reachable(src)) { + Assembler::ucomiss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomiss(dst, Address(rscratch1, 0)); + } } void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { - xorpd(dst, as_Address(src)); + Assembler::xorpd(dst, as_Address(src)); } else { lea(rscratch1, src); - xorpd(dst, Address(rscratch1, 0)); + Assembler::xorpd(dst, Address(rscratch1, 0)); } } void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { - xorps(dst, as_Address(src)); + Assembler::xorps(dst, as_Address(src)); } else { lea(rscratch1, src); - xorps(dst, Address(rscratch1, 0)); + Assembler::xorps(dst, Address(rscratch1, 0)); } }