6708714: Optimize long LShift on 32-bits x86
Summary: For small (1-3 bits) left long shifts in 32-bits VM use sets of add+addc instructions instead of shld+shl on new AMD cpus.
Reviewed-by: never
Contributed-by: shrinivas.joshi@amd.com
--- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp Sat Jun 21 10:03:31 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp Mon Jun 23 14:11:12 2008 -0700
@@ -307,6 +307,10 @@
// Use it on new AMD cpus starting from Opteron.
UseAddressNop = true;
}
+ if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
+ // Use it on new AMD cpus starting from Opteron.
+ UseNewLongLShift = true;
+ }
if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
if( supports_sse4a() ) {
UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Sat Jun 21 10:03:31 2008 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Mon Jun 23 14:11:12 2008 -0700
@@ -4754,6 +4754,33 @@
interface(CONST_INTER);
%}
+operand immI_1() %{
+ predicate( n->get_int() == 1 );
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_2() %{
+ predicate( n->get_int() == 2 );
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_3() %{
+ predicate( n->get_int() == 3 );
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// Pointer Immediate
operand immP() %{
match(ConP);
@@ -8943,6 +8970,63 @@
ins_pipe( ialu_reg_long_mem );
%}
+// Shift Left Long by 1
+instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
+ predicate(UseNewLongLShift);
+ match(Set dst (LShiftL dst cnt));
+ effect(KILL cr);
+ ins_cost(100);
+ format %{ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi" %}
+ ins_encode %{
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 2
+instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
+ predicate(UseNewLongLShift);
+ match(Set dst (LShiftL dst cnt));
+ effect(KILL cr);
+ ins_cost(100);
+ format %{ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi\n\t"
+ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi" %}
+ ins_encode %{
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 3
+instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
+ predicate(UseNewLongLShift);
+ match(Set dst (LShiftL dst cnt));
+ effect(KILL cr);
+ ins_cost(100);
+ format %{ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi\n\t"
+ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi\n\t"
+ "ADD $dst.lo,$dst.lo\n\t"
+ "ADC $dst.hi,$dst.hi" %}
+ ins_encode %{
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ __ addl($dst$$Register,$dst$$Register);
+ __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
+ %}
+ ins_pipe( ialu_reg_long );
+%}
+
// Shift Left Long by 1-31
instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
match(Set dst (LShiftL dst cnt));
--- a/hotspot/src/share/vm/runtime/globals.hpp Sat Jun 21 10:03:31 2008 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Mon Jun 23 14:11:12 2008 -0700
@@ -946,6 +946,9 @@
diagnostic(bool, UseIncDec, true, \
"Use INC, DEC instructions on x86") \
\
+ product(bool, UseNewLongLShift, false, \
+ "Use optimized bitwise shift left") \
+ \
product(bool, UseStoreImmI16, true, \
"Use store immediate 16-bits value instruction on x86") \
\