7006044: materialize cheap non-oop pointers on 64-bit SPARC
Summary: After 6961690 we load non-oop pointers for the constant table which could easily be materialized in a few instructions.
Reviewed-by: never, kvn
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp Mon Dec 13 22:41:03 2010 -0800
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp Tue Dec 14 12:44:30 2010 -0800
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "asm/assembler.hpp"
#include "assembler_sparc.inline.hpp"
#include "gc_interface/collectedHeap.inline.hpp"
#include "interpreter/interpreter.hpp"
@@ -1327,37 +1328,38 @@
}
-int MacroAssembler::size_of_sethi(address a, bool worst_case) {
+int MacroAssembler::insts_for_sethi(address a, bool worst_case) {
#ifdef _LP64
- if (worst_case) return 7;
- intptr_t iaddr = (intptr_t)a;
- int hi32 = (int)(iaddr >> 32);
- int lo32 = (int)(iaddr);
- int inst_count;
- if (hi32 == 0 && lo32 >= 0)
- inst_count = 1;
- else if (hi32 == -1)
- inst_count = 2;
+ if (worst_case) return 7;
+ intptr_t iaddr = (intptr_t) a;
+ int msb32 = (int) (iaddr >> 32);
+ int lsb32 = (int) (iaddr);
+ int count;
+ if (msb32 == 0 && lsb32 >= 0)
+ count = 1;
+ else if (msb32 == -1)
+ count = 2;
else {
- inst_count = 2;
- if ( hi32 & 0x3ff )
- inst_count++;
- if ( lo32 & 0xFFFFFC00 ) {
- if( (lo32 >> 20) & 0xfff ) inst_count += 2;
- if( (lo32 >> 10) & 0x3ff ) inst_count += 2;
+ count = 2;
+ if (msb32 & 0x3ff)
+ count++;
+ if (lsb32 & 0xFFFFFC00 ) {
+ if ((lsb32 >> 20) & 0xfff) count += 2;
+ if ((lsb32 >> 10) & 0x3ff) count += 2;
}
}
- return BytesPerInstWord * inst_count;
+ return count;
#else
- return BytesPerInstWord;
+ return 1;
#endif
}
-int MacroAssembler::worst_case_size_of_set() {
- return size_of_sethi(NULL, true) + 1;
+int MacroAssembler::worst_case_insts_for_set() {
+ return insts_for_sethi(NULL, true) + 1;
}
+// Keep in sync with MacroAssembler::insts_for_internal_set
void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
intptr_t value = addrlit.value();
@@ -1379,6 +1381,23 @@
}
}
+// Keep in sync with MacroAssembler::internal_set
+int MacroAssembler::insts_for_internal_set(intptr_t value) {
+ // can optimize
+ if (-4096 <= value && value <= 4095) {
+ return 1;
+ }
+ if (inv_hi22(hi22(value)) == value) {
+ return insts_for_sethi((address) value);
+ }
+ int count = insts_for_sethi((address) value);
+ AddressLiteral al(value);
+ if (al.low10() != 0) {
+ count++;
+ }
+ return count;
+}
+
void MacroAssembler::set(const AddressLiteral& al, Register d) {
internal_set(al, d, false);
}
@@ -1443,11 +1462,11 @@
}
}
-int MacroAssembler::size_of_set64(jlong value) {
+int MacroAssembler::insts_for_set64(jlong value) {
v9_dep();
- int hi = (int)(value >> 32);
- int lo = (int)(value & ~0);
+ int hi = (int) (value >> 32);
+ int lo = (int) (value & ~0);
int count = 0;
// (Matcher::isSimpleConstant64 knows about the following optimizations.)
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Mon Dec 13 22:41:03 2010 -0800
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Tue Dec 14 12:44:30 2010 -0800
@@ -1884,23 +1884,24 @@
void sethi(const AddressLiteral& addrlit, Register d);
void patchable_sethi(const AddressLiteral& addrlit, Register d);
- // compute the size of a sethi/set
- static int size_of_sethi( address a, bool worst_case = false );
- static int worst_case_size_of_set();
+ // compute the number of instructions for a sethi/set
+ static int insts_for_sethi( address a, bool worst_case = false );
+ static int worst_case_insts_for_set();
// set may be either setsw or setuw (high 32 bits may be zero or sign)
private:
void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable);
+ static int insts_for_internal_set(intptr_t value);
public:
void set(const AddressLiteral& addrlit, Register d);
void set(intptr_t value, Register d);
void set(address addr, Register d, RelocationHolder const& rspec);
+ static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); }
+
void patchable_set(const AddressLiteral& addrlit, Register d);
void patchable_set(intptr_t value, Register d);
void set64(jlong value, Register d, Register tmp);
-
- // Compute size of set64.
- static int size_of_set64(jlong value);
+ static int insts_for_set64(jlong value);
// sign-extend 32 to 64
inline void signx( Register s, Register d ) { sra( s, G0, d); }
--- a/hotspot/src/cpu/sparc/vm/sparc.ad Mon Dec 13 22:41:03 2010 -0800
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad Tue Dec 14 12:44:30 2010 -0800
@@ -1086,9 +1086,9 @@
uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
if (UseRDPCForConstantTableBase) {
// This is really the worst case but generally it's only 1 instruction.
- return 4 /*rdpc*/ + 4 /*sub*/ + MacroAssembler::worst_case_size_of_set();
+ return (1 /*rdpc*/ + 1 /*sub*/ + MacroAssembler::worst_case_insts_for_set()) * BytesPerInstWord;
} else {
- return MacroAssembler::worst_case_size_of_set();
+ return MacroAssembler::worst_case_insts_for_set() * BytesPerInstWord;
}
}
@@ -1240,7 +1240,7 @@
int MachEpilogNode::safepoint_offset() const {
assert( do_polling(), "no return for this epilog node");
- return MacroAssembler::size_of_sethi(os::get_polling_page());
+ return MacroAssembler::insts_for_sethi(os::get_polling_page()) * BytesPerInstWord;
}
//=============================================================================
@@ -3553,7 +3553,8 @@
interface(CONST_INTER);
%}
-// Pointer Immediate: 32 or 64-bit
+#ifdef _LP64
+// Pointer Immediate: 64-bit
operand immP_set() %{
predicate(!VM_Version::is_niagara1_plus());
match(ConP);
@@ -3564,10 +3565,10 @@
interface(CONST_INTER);
%}
-// Pointer Immediate: 32 or 64-bit
+// Pointer Immediate: 64-bit
// From Niagara2 processors on a load should be better than materializing.
operand immP_load() %{
- predicate(VM_Version::is_niagara1_plus());
+ predicate(VM_Version::is_niagara1_plus() && (n->bottom_type()->isa_oop_ptr() || (MacroAssembler::insts_for_set(n->get_ptr()) > 3)));
match(ConP);
op_cost(5);
@@ -3576,6 +3577,18 @@
interface(CONST_INTER);
%}
+// Pointer Immediate: 64-bit
+operand immP_no_oop_cheap() %{
+ predicate(VM_Version::is_niagara1_plus() && !n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set(n->get_ptr()) <= 3));
+ match(ConP);
+
+ op_cost(5);
+ // formats are generated automatically for constants and base registers
+ format %{ %}
+ interface(CONST_INTER);
+%}
+#endif
+
operand immP13() %{
predicate((-4096 < n->get_ptr()) && (n->get_ptr() <= 4095));
match(ConP);
@@ -3673,7 +3686,7 @@
// Long Immediate: cheap (materialize in <= 3 instructions)
operand immL_cheap() %{
- predicate(!VM_Version::is_niagara1_plus() || MacroAssembler::size_of_set64(n->get_long()) <= 3);
+ predicate(!VM_Version::is_niagara1_plus() || MacroAssembler::insts_for_set64(n->get_long()) <= 3);
match(ConL);
op_cost(0);
@@ -3683,7 +3696,7 @@
// Long Immediate: expensive (materialize in > 3 instructions)
operand immL_expensive() %{
- predicate(VM_Version::is_niagara1_plus() && MacroAssembler::size_of_set64(n->get_long()) > 3);
+ predicate(VM_Version::is_niagara1_plus() && MacroAssembler::insts_for_set64(n->get_long()) > 3);
match(ConL);
op_cost(0);
@@ -6094,8 +6107,18 @@
ins_cost(MEMORY_REF_COST);
format %{ "LD [$constanttablebase + $constantoffset],$dst\t! load from constant table: ptr=$con" %}
ins_encode %{
- RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
- __ ld_ptr($constanttablebase, con_offset, $dst$$Register);
+ RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
+ __ ld_ptr($constanttablebase, con_offset, $dst$$Register);
+ %}
+ ins_pipe(loadConP);
+%}
+
+instruct loadConP_no_oop_cheap(iRegP dst, immP_no_oop_cheap con) %{
+ match(Set dst con);
+ ins_cost(DEFAULT_COST * 3/2);
+ format %{ "SET $con,$dst\t! non-oop ptr" %}
+ ins_encode %{
+ __ set($con$$constant, $dst$$Register);
%}
ins_pipe(loadConP);
%}