6662967: Optimize I2D conversion on new x86
Summary: Use CVTDQ2PS and CVTDQ2PD for integer values conversions to float and double values on new AMD cpu.
Reviewed-by: sgoldman, never
--- a/hotspot/src/cpu/x86/vm/assembler_x86_32.cpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_32.cpp Wed Mar 19 15:33:25 2008 -0700
@@ -2672,6 +2672,22 @@
emit_sse_operand(dst, src);
}
+void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+
+ emit_byte(0xF3);
+ emit_byte(0x0F);
+ emit_byte(0xE6);
+ emit_sse_operand(dst, src);
+}
+
+void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+
+ emit_byte(0x0F);
+ emit_byte(0x5B);
+ emit_sse_operand(dst, src);
+}
emit_sse_instruction(andps, sse, 0, 0x54, XMMRegister, XMMRegister);
emit_sse_instruction(andpd, sse2, 0x66, 0x54, XMMRegister, XMMRegister);
--- a/hotspot/src/cpu/x86/vm/assembler_x86_32.hpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_32.hpp Wed Mar 19 15:33:25 2008 -0700
@@ -901,6 +901,8 @@
void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtsd2ss(XMMRegister dst, Address src); // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src);
+ void cvtdq2pd(XMMRegister dst, XMMRegister src);
+ void cvtdq2ps(XMMRegister dst, XMMRegister src);
void cvtsi2ss(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
void cvtsi2ss(XMMRegister dst, Register src);
--- a/hotspot/src/cpu/x86/vm/assembler_x86_64.cpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_64.cpp Wed Mar 19 15:33:25 2008 -0700
@@ -3372,6 +3372,21 @@
emit_byte(0xC0 | encode);
}
+void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
+ emit_byte(0xF3);
+ int encode = prefix_and_encode(dst->encoding(), src->encoding());
+ emit_byte(0x0F);
+ emit_byte(0xE6);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
+ int encode = prefix_and_encode(dst->encoding(), src->encoding());
+ emit_byte(0x0F);
+ emit_byte(0x5B);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
emit_byte(0xF2);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
--- a/hotspot/src/cpu/x86/vm/assembler_x86_64.hpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_64.hpp Wed Mar 19 15:33:25 2008 -0700
@@ -922,6 +922,8 @@
void cvttsd2siq(Register dst, XMMRegister src); // truncates
void cvtss2sd(XMMRegister dst, XMMRegister src);
void cvtsd2ss(XMMRegister dst, XMMRegister src);
+ void cvtdq2pd(XMMRegister dst, XMMRegister src);
+ void cvtdq2ps(XMMRegister dst, XMMRegister src);
void pxor(XMMRegister dst, Address src); // Xor Packed Byte Integer Values
void pxor(XMMRegister dst, XMMRegister src); // Xor Packed Byte Integer Values
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp Wed Mar 19 15:33:25 2008 -0700
@@ -321,6 +321,20 @@
UseXmmRegToRegMoveAll = false;
}
}
+ if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
+ if( supports_sse4a() ) {
+ UseXmmI2F = true;
+ } else {
+ UseXmmI2F = false;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
+ if( supports_sse4a() ) {
+ UseXmmI2D = true;
+ } else {
+ UseXmmI2D = false;
+ }
+ }
}
if( is_intel() ) { // Intel cpus specific settings
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp Wed Mar 19 15:33:25 2008 -0700
@@ -265,6 +265,20 @@
UseXmmRegToRegMoveAll = false;
}
}
+ if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
+ if( supports_sse4a() ) {
+ UseXmmI2F = true;
+ } else {
+ UseXmmI2F = false;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
+ if( supports_sse4a() ) {
+ UseXmmI2D = true;
+ } else {
+ UseXmmI2D = false;
+ }
+ }
}
if( is_intel() ) { // Intel cpus specific settings
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Wed Mar 19 15:33:25 2008 -0700
@@ -10970,7 +10970,7 @@
%}
instruct convI2XD_reg(regXD dst, eRegI src) %{
- predicate( UseSSE>=2 );
+ predicate( UseSSE>=2 && !UseXmmI2D );
match(Set dst (ConvI2D src));
format %{ "CVTSI2SD $dst,$src" %}
opcode(0xF2, 0x0F, 0x2A);
@@ -10987,6 +10987,20 @@
ins_pipe( pipe_slow );
%}
+instruct convXI2XD_reg(regXD dst, eRegI src)
+%{
+ predicate( UseSSE>=2 && UseXmmI2D );
+ match(Set dst (ConvI2D src));
+
+ format %{ "MOVD $dst,$src\n\t"
+ "CVTDQ2PD $dst,$dst\t# i2d" %}
+ ins_encode %{
+ __ movd($dst$$XMMRegister, $src$$Register);
+ __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow); // XXX
+%}
+
instruct convI2D_mem(regD dst, memory mem) %{
predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2D (LoadI mem)));
@@ -11062,7 +11076,7 @@
// Convert an int to a float in xmm; no rounding step needed.
instruct convI2X_reg(regX dst, eRegI src) %{
- predicate(UseSSE>=1);
+ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
match(Set dst (ConvI2F src));
format %{ "CVTSI2SS $dst, $src" %}
@@ -11071,6 +11085,20 @@
ins_pipe( pipe_slow );
%}
+ instruct convXI2X_reg(regX dst, eRegI src)
+%{
+ predicate( UseSSE>=2 && UseXmmI2F );
+ match(Set dst (ConvI2F src));
+
+ format %{ "MOVD $dst,$src\n\t"
+ "CVTDQ2PS $dst,$dst\t# i2f" %}
+ ins_encode %{
+ __ movd($dst$$XMMRegister, $src$$Register);
+ __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow); // XXX
+%}
+
instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
match(Set dst (ConvI2L src));
effect(KILL cr);
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Wed Mar 19 15:33:25 2008 -0700
@@ -10098,6 +10098,7 @@
instruct convI2F_reg_reg(regF dst, rRegI src)
%{
+ predicate(!UseXmmI2F);
match(Set dst (ConvI2F src));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
@@ -10118,6 +10119,7 @@
instruct convI2D_reg_reg(regD dst, rRegI src)
%{
+ predicate(!UseXmmI2D);
match(Set dst (ConvI2D src));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
@@ -10136,6 +10138,34 @@
ins_pipe(pipe_slow); // XXX
%}
+instruct convXI2F_reg(regF dst, rRegI src)
+%{
+ predicate(UseXmmI2F);
+ match(Set dst (ConvI2F src));
+
+ format %{ "movdl $dst, $src\n\t"
+ "cvtdq2psl $dst, $dst\t# i2f" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow); // XXX
+%}
+
+instruct convXI2D_reg(regD dst, rRegI src)
+%{
+ predicate(UseXmmI2D);
+ match(Set dst (ConvI2D src));
+
+ format %{ "movdl $dst, $src\n\t"
+ "cvtdq2pdl $dst, $dst\t# i2d" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe(pipe_slow); // XXX
+%}
+
instruct convL2F_reg_reg(regF dst, rRegL src)
%{
match(Set dst (ConvL2F src));
--- a/hotspot/src/share/vm/runtime/globals.hpp Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Wed Mar 19 15:33:25 2008 -0700
@@ -949,6 +949,12 @@
product(bool, UseXmmRegToRegMoveAll, false, \
"Copy all XMM register bits when moving value between registers") \
\
+ product(bool, UseXmmI2D, false, \
+ "Use SSE2 CVTDQ2PD instruction to convert Integer to Double") \
+ \
+ product(bool, UseXmmI2F, false, \
+ "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \
+ \
product(intx, FieldsAllocationStyle, 1, \
"0 - type based with oops first, 1 - with oops last") \
\