6662967: Optimize I2D conversion on new x86
authorkvn
Wed, 19 Mar 2008 15:33:25 -0700
changeset 244 c8ad6f221400
parent 243 79b67a7a584a
child 245 b9df534a2faa
6662967: Optimize I2D conversion on new x86 Summary: Use CVTDQ2PS and CVTDQ2PD for integer values conversions to float and double values on new AMD cpu. Reviewed-by: sgoldman, never
hotspot/src/cpu/x86/vm/assembler_x86_32.cpp
hotspot/src/cpu/x86/vm/assembler_x86_32.hpp
hotspot/src/cpu/x86/vm/assembler_x86_64.cpp
hotspot/src/cpu/x86/vm/assembler_x86_64.hpp
hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp
hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp
hotspot/src/cpu/x86/vm/x86_32.ad
hotspot/src/cpu/x86/vm/x86_64.ad
hotspot/src/share/vm/runtime/globals.hpp
--- a/hotspot/src/cpu/x86/vm/assembler_x86_32.cpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_32.cpp	Wed Mar 19 15:33:25 2008 -0700
@@ -2672,6 +2672,22 @@
   emit_sse_operand(dst, src);
 }
 
+void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse2(), "");
+
+  emit_byte(0xF3);
+  emit_byte(0x0F);
+  emit_byte(0xE6);
+  emit_sse_operand(dst, src);
+}
+
+void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse2(), "");
+
+  emit_byte(0x0F);
+  emit_byte(0x5B);
+  emit_sse_operand(dst, src);
+}
 
 emit_sse_instruction(andps,  sse,  0,    0x54, XMMRegister, XMMRegister);
 emit_sse_instruction(andpd,  sse2, 0x66, 0x54, XMMRegister, XMMRegister);
--- a/hotspot/src/cpu/x86/vm/assembler_x86_32.hpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_32.hpp	Wed Mar 19 15:33:25 2008 -0700
@@ -901,6 +901,8 @@
   void cvtss2sd(XMMRegister dst, XMMRegister src);
   void cvtsd2ss(XMMRegister dst, Address src);   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
+  void cvtdq2pd(XMMRegister dst, XMMRegister src);
+  void cvtdq2ps(XMMRegister dst, XMMRegister src);
 
   void cvtsi2ss(XMMRegister dst, Address src);   // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
   void cvtsi2ss(XMMRegister dst, Register src);
--- a/hotspot/src/cpu/x86/vm/assembler_x86_64.cpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_64.cpp	Wed Mar 19 15:33:25 2008 -0700
@@ -3372,6 +3372,21 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
+  emit_byte(0xF3);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xE6);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
+  int encode = prefix_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0x5B);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   emit_byte(0xF2);
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
--- a/hotspot/src/cpu/x86/vm/assembler_x86_64.hpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86_64.hpp	Wed Mar 19 15:33:25 2008 -0700
@@ -922,6 +922,8 @@
   void cvttsd2siq(Register dst, XMMRegister src); // truncates
   void cvtss2sd(XMMRegister dst, XMMRegister src);
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
+  void cvtdq2pd(XMMRegister dst, XMMRegister src);
+  void cvtdq2ps(XMMRegister dst, XMMRegister src);
 
   void pxor(XMMRegister dst, Address src);       // Xor Packed Byte Integer Values
   void pxor(XMMRegister dst, XMMRegister src);   // Xor Packed Byte Integer Values
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Mar 19 15:33:25 2008 -0700
@@ -321,6 +321,20 @@
         UseXmmRegToRegMoveAll = false;
       }
     }
+    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
+      if( supports_sse4a() ) {
+        UseXmmI2F = true;
+      } else {
+        UseXmmI2F = false;
+      }
+    }
+    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
+      if( supports_sse4a() ) {
+        UseXmmI2D = true;
+      } else {
+        UseXmmI2D = false;
+      }
+    }
   }
 
   if( is_intel() ) { // Intel cpus specific settings
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Mar 19 15:33:25 2008 -0700
@@ -265,6 +265,20 @@
         UseXmmRegToRegMoveAll = false;
       }
     }
+    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
+      if( supports_sse4a() ) {
+        UseXmmI2F = true;
+      } else {
+        UseXmmI2F = false;
+      }
+    }
+    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
+      if( supports_sse4a() ) {
+        UseXmmI2D = true;
+      } else {
+        UseXmmI2D = false;
+      }
+    }
   }
 
   if( is_intel() ) { // Intel cpus specific settings
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Mar 19 15:33:25 2008 -0700
@@ -10970,7 +10970,7 @@
 %}
 
 instruct convI2XD_reg(regXD dst, eRegI src) %{
-  predicate( UseSSE>=2 );
+  predicate( UseSSE>=2 && !UseXmmI2D );
   match(Set dst (ConvI2D src));
   format %{ "CVTSI2SD $dst,$src" %}
   opcode(0xF2, 0x0F, 0x2A);
@@ -10987,6 +10987,20 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct convXI2XD_reg(regXD dst, eRegI src)
+%{
+  predicate( UseSSE>=2 && UseXmmI2D );
+  match(Set dst (ConvI2D src));
+
+  format %{ "MOVD  $dst,$src\n\t"
+            "CVTDQ2PD $dst,$dst\t# i2d" %}
+  ins_encode %{
+    __ movd($dst$$XMMRegister, $src$$Register);
+    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow); // XXX
+%}
+
 instruct convI2D_mem(regD dst, memory mem) %{
   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2D (LoadI mem)));
@@ -11062,7 +11076,7 @@
 
 // Convert an int to a float in xmm; no rounding step needed.
 instruct convI2X_reg(regX dst, eRegI src) %{
-  predicate(UseSSE>=1);
+  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
   match(Set dst (ConvI2F src));
   format %{ "CVTSI2SS $dst, $src" %}
 
@@ -11071,6 +11085,20 @@
   ins_pipe( pipe_slow );
 %}
 
+ instruct convXI2X_reg(regX dst, eRegI src)
+%{
+  predicate( UseSSE>=2 && UseXmmI2F );
+  match(Set dst (ConvI2F src));
+
+  format %{ "MOVD  $dst,$src\n\t"
+            "CVTDQ2PS $dst,$dst\t# i2f" %}
+  ins_encode %{
+    __ movd($dst$$XMMRegister, $src$$Register);
+    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow); // XXX
+%}
+
 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
   match(Set dst (ConvI2L src));
   effect(KILL cr);
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Mar 19 15:33:25 2008 -0700
@@ -10098,6 +10098,7 @@
 
 instruct convI2F_reg_reg(regF dst, rRegI src)
 %{
+  predicate(!UseXmmI2F);
   match(Set dst (ConvI2F src));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
@@ -10118,6 +10119,7 @@
 
 instruct convI2D_reg_reg(regD dst, rRegI src)
 %{
+  predicate(!UseXmmI2D);
   match(Set dst (ConvI2D src));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
@@ -10136,6 +10138,34 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
+instruct convXI2F_reg(regF dst, rRegI src)
+%{
+  predicate(UseXmmI2F);
+  match(Set dst (ConvI2F src));
+
+  format %{ "movdl $dst, $src\n\t"
+            "cvtdq2psl $dst, $dst\t# i2f" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow); // XXX
+%}
+
+instruct convXI2D_reg(regD dst, rRegI src)
+%{
+  predicate(UseXmmI2D);
+  match(Set dst (ConvI2D src));
+
+  format %{ "movdl $dst, $src\n\t"
+            "cvtdq2pdl $dst, $dst\t# i2d" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow); // XXX
+%}
+
 instruct convL2F_reg_reg(regF dst, rRegL src)
 %{
   match(Set dst (ConvL2F src));
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Mar 19 15:14:36 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Mar 19 15:33:25 2008 -0700
@@ -949,6 +949,12 @@
   product(bool, UseXmmRegToRegMoveAll, false,                               \
           "Copy all XMM register bits when moving value between registers") \
                                                                             \
+  product(bool, UseXmmI2D, false,                                           \
+          "Use SSE2 CVTDQ2PD instruction to convert Integer to Double")     \
+                                                                            \
+  product(bool, UseXmmI2F, false,                                           \
+          "Use SSE2 CVTDQ2PS instruction to convert Integer to Float")      \
+                                                                            \
   product(intx, FieldsAllocationStyle, 1,                                   \
           "0 - type based with oops first, 1 - with oops last")             \
                                                                             \