8154826: AArch64: take advantage better of base + shifted offset addressing mode
Summary: reshape address subtree to fit aarch64 addressing mode
Reviewed-by: kvn, aph
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon May 09 11:34:09 2016 +0200
@@ -996,6 +996,7 @@
source_hpp %{
#include "gc/shared/cardTableModRefBS.hpp"
+#include "opto/addnode.hpp"
class CallStubImpl {
@@ -1061,6 +1062,9 @@
// predicate controlling translation of StoreCM
bool unnecessary_storestore(const Node *storecm);
+
+ // predicate controlling addressing modes
+ bool size_fits_all_mem_uses(AddPNode* addp, int shift);
%}
source %{
@@ -3449,11 +3453,6 @@
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
-// Should the Matcher clone shifts on addressing modes, expecting them
-// to be subsumed into complex addressing expressions or compute them
-// into registers? True for Intel but false for most RISCs
-const bool Matcher::clone_shift_expressions = false;
-
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;
@@ -3572,8 +3571,119 @@
return FP_REG_mask();
}
+bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
+ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
+ Node* u = addp->fast_out(i);
+ if (u->is_Mem()) {
+ int opsize = u->as_Mem()->memory_size();
+ assert(opsize > 0, "unexpected memory operand size");
+ if (u->as_Mem()->memory_size() != (1<<shift)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
const bool Matcher::convi2l_type_required = false;
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ if (clone_base_plus_offset_address(m, mstack, address_visited)) {
+ return true;
+ }
+
+ Node *off = m->in(AddPNode::Offset);
+ if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
+ size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
+ // Are there other uses besides address expressions?
+ !is_visited(off)) {
+ address_visited.set(off->_idx); // Flag as address_visited
+ mstack.push(off->in(2), Visit);
+ Node *conv = off->in(1);
+ if (conv->Opcode() == Op_ConvI2L &&
+ // Are there other uses besides address expressions?
+ !is_visited(conv)) {
+ address_visited.set(conv->_idx); // Flag as address_visited
+ mstack.push(conv->in(1), Pre_Visit);
+ } else {
+ mstack.push(conv, Pre_Visit);
+ }
+ address_visited.test_set(m->_idx); // Flag as address_visited
+ mstack.push(m->in(AddPNode::Address), Pre_Visit);
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
+ return true;
+ } else if (off->Opcode() == Op_ConvI2L &&
+ // Are there other uses besides address expressions?
+ !is_visited(off)) {
+ address_visited.test_set(m->_idx); // Flag as address_visited
+ address_visited.set(off->_idx); // Flag as address_visited
+ mstack.push(off->in(1), Pre_Visit);
+ mstack.push(m->in(AddPNode::Address), Pre_Visit);
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
+ return true;
+ }
+ return false;
+}
+
+// Transform:
+// (AddP base (AddP base address (LShiftL index con)) offset)
+// into:
+// (AddP base (AddP base offset) (LShiftL index con))
+// to take full advantage of ARM's addressing modes
+void Compile::reshape_address(AddPNode* addp) {
+ Node *addr = addp->in(AddPNode::Address);
+ if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
+ const AddPNode *addp2 = addr->as_AddP();
+ if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
+ addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
+ size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
+ addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
+
+ // Any use that can't embed the address computation?
+ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
+ Node* u = addp->fast_out(i);
+ if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
+ return;
+ }
+ }
+
+ Node* off = addp->in(AddPNode::Offset);
+ Node* addr2 = addp2->in(AddPNode::Address);
+ Node* base = addp->in(AddPNode::Base);
+
+ Node* new_addr = NULL;
+ // Check whether the graph already has the new AddP we need
+ // before we create one (no GVN available here).
+ for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
+ Node* u = addr2->fast_out(i);
+ if (u->is_AddP() &&
+ u->in(AddPNode::Base) == base &&
+ u->in(AddPNode::Address) == addr2 &&
+ u->in(AddPNode::Offset) == off) {
+ new_addr = u;
+ break;
+ }
+ }
+
+ if (new_addr == NULL) {
+ new_addr = new AddPNode(base, addr2, off);
+ }
+ Node* new_off = addp2->in(AddPNode::Offset);
+ addp->set_req(AddPNode::Address, new_addr);
+ if (addr->outcnt() == 0) {
+ addr->disconnect_inputs(NULL, this);
+ }
+ addp->set_req(AddPNode::Offset, new_off);
+ if (off->outcnt() == 0) {
+ off->disconnect_inputs(NULL, this);
+ }
+ }
+ }
+}
+
// helper for encoding java_to_runtime calls on sim
//
// this is needed to compute the extra arguments required when
@@ -3643,12 +3753,10 @@
// encoder that the index needs to be sign extended, so we have to
// enumerate all the cases.
switch (opcode) {
- case INDINDEXSCALEDOFFSETI2L:
case INDINDEXSCALEDI2L:
- case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN:
- case INDINDEXOFFSETI2L:
- case INDINDEXOFFSETI2LN:
+ case INDINDEXI2L:
+ case INDINDEXI2LN:
scale = Address::sxtw(size);
break;
default:
@@ -3658,12 +3766,8 @@
if (index == -1) {
(masm.*insn)(reg, Address(base, disp));
} else {
- if (disp == 0) {
- (masm.*insn)(reg, Address(base, as_Register(index), scale));
- } else {
- masm.lea(rscratch1, Address(base, disp));
- (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
- }
+ assert(disp == 0, "unsupported address mode: disp = %d", disp);
+ (masm.*insn)(reg, Address(base, as_Register(index), scale));
}
}
@@ -3674,9 +3778,7 @@
Address::extend scale;
switch (opcode) {
- case INDINDEXSCALEDOFFSETI2L:
case INDINDEXSCALEDI2L:
- case INDINDEXSCALEDOFFSETI2LN:
case INDINDEXSCALEDI2LN:
scale = Address::sxtw(size);
break;
@@ -3687,12 +3789,8 @@
if (index == -1) {
(masm.*insn)(reg, Address(base, disp));
} else {
- if (disp == 0) {
- (masm.*insn)(reg, Address(base, as_Register(index), scale));
- } else {
- masm.lea(rscratch1, Address(base, disp));
- (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale));
- }
+ assert(disp == 0, "unsupported address mode: disp = %d", disp);
+ (masm.*insn)(reg, Address(base, as_Register(index), scale));
}
}
@@ -6106,65 +6204,10 @@
%}
%}
-operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off)
-%{
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP reg (LShiftL lreg scale)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $lreg lsl($scale), $off" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($lreg);
- scale($scale);
- disp($off);
- %}
-%}
-
-operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off)
-%{
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP reg (LShiftL lreg scale)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $lreg lsl($scale), $off" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($lreg);
- scale($scale);
- disp($off);
- %}
-%}
-
-operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
-%{
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP reg (ConvI2L ireg)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $ireg, $off I2L" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale(0x0);
- disp($off);
- %}
-%}
-
-operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
-%{
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $ireg sxtw($scale), $off I2L" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale($scale);
- disp($off);
- %}
-%}
-
operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
%{
constraint(ALLOC_IN_RC(ptr_reg));
+ predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
match(AddP reg (LShiftL (ConvI2L ireg) scale));
op_cost(0);
format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
@@ -6179,6 +6222,7 @@
operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
%{
constraint(ALLOC_IN_RC(ptr_reg));
+ predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
match(AddP reg (LShiftL lreg scale));
op_cost(0);
format %{ "$reg, $lreg lsl($scale)" %}
@@ -6190,6 +6234,20 @@
%}
%}
+operand indIndexI2L(iRegP reg, iRegI ireg)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg (ConvI2L ireg));
+ op_cost(0);
+ format %{ "$reg, $ireg, 0, I2L" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index($ireg);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
operand indIndex(iRegP reg, iRegL lreg)
%{
constraint(ALLOC_IN_RC(ptr_reg));
@@ -6331,69 +6389,9 @@
%}
%}
-operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off)
-%{
- predicate(Universe::narrow_oop_shift() == 0);
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
- op_cost(0);
- format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($lreg);
- scale($scale);
- disp($off);
- %}
-%}
-
-operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off)
-%{
- predicate(Universe::narrow_oop_shift() == 0);
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($lreg);
- scale($scale);
- disp($off);
- %}
-%}
-
-operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
-%{
- predicate(Universe::narrow_oop_shift() == 0);
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $ireg, $off I2L\t# narrow" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale(0x0);
- disp($off);
- %}
-%}
-
-operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
-%{
- predicate(Universe::narrow_oop_shift() == 0);
- constraint(ALLOC_IN_RC(ptr_reg));
- match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
- op_cost(INSN_COST);
- format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale($scale);
- disp($off);
- %}
-%}
-
operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
%{
- predicate(Universe::narrow_oop_shift() == 0);
+ predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
op_cost(0);
@@ -6408,7 +6406,7 @@
operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
%{
- predicate(Universe::narrow_oop_shift() == 0);
+ predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL lreg scale));
op_cost(0);
@@ -6421,6 +6419,21 @@
%}
%}
+operand indIndexI2LN(iRegN reg, iRegI ireg)
+%{
+ predicate(Universe::narrow_oop_shift() == 0);
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP (DecodeN reg) (ConvI2L ireg));
+ op_cost(0);
+ format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index($ireg);
+ scale(0x0);
+ disp(0x0);
+ %}
+%}
+
operand indIndexN(iRegN reg, iRegL lreg)
%{
predicate(Universe::narrow_oop_shift() == 0);
@@ -6641,9 +6654,8 @@
// memory is used to define read/write location for load/store
// instruction defs. we can turn a memory op into an Address
-opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
- indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
-
+opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
+ indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
// operations. it allows the src to be either an iRegI or a (ConvL2I
--- a/hotspot/src/cpu/ppc/vm/ppc.ad Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad Mon May 09 11:34:09 2016 +0200
@@ -817,6 +817,16 @@
source %{
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
// Optimize load-acquire.
//
// Check if acquire is unnecessary due to following operation that does
@@ -2157,11 +2167,6 @@
// Power6 requires postalloc expand (see block.cpp for description of postalloc expand).
const bool Matcher::require_postalloc_expand = true;
-// Should the Matcher clone shifts on addressing modes, expecting them to
-// be subsumed into complex addressing expressions or compute them into
-// registers? True for Intel but false for most RISCs.
-const bool Matcher::clone_shift_expressions = false;
-
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
// PowerPC requires masked shift counts.
--- a/hotspot/src/cpu/sparc/vm/sparc.ad Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad Mon May 09 11:34:09 2016 +0200
@@ -1995,11 +1995,6 @@
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
-// Should the Matcher clone shifts on addressing modes, expecting them to
-// be subsumed into complex addressing expressions or compute them into
-// registers? True for Intel but false for most RISCs
-const bool Matcher::clone_shift_expressions = false;
-
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;
@@ -2133,8 +2128,19 @@
return L7_REGP_mask();
}
+
const bool Matcher::convi2l_type_required = true;
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
%}
--- a/hotspot/src/cpu/x86/vm/x86.ad Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/x86.ad Mon May 09 11:34:09 2016 +0200
@@ -1586,6 +1586,8 @@
source %{
+#include "opto/addnode.hpp"
+
// Emit exception handler code.
// Stuff framesize into a register and call a VM stub routine.
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
@@ -1861,8 +1863,79 @@
return false;
}
+
const bool Matcher::convi2l_type_required = true;
+// Check for shift by small constant as well
+static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
+ if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
+ shift->in(2)->get_int() <= 3 &&
+ // Are there other uses besides address expressions?
+ !matcher->is_visited(shift)) {
+ address_visited.set(shift->_idx); // Flag as address_visited
+ mstack.push(shift->in(2), Matcher::Visit);
+ Node *conv = shift->in(1);
+#ifdef _LP64
+ // Allow Matcher to match the rule which bypass
+ // ConvI2L operation for an array index on LP64
+ // if the index value is positive.
+ if (conv->Opcode() == Op_ConvI2L &&
+ conv->as_Type()->type()->is_long()->_lo >= 0 &&
+ // Are there other uses besides address expressions?
+ !matcher->is_visited(conv)) {
+ address_visited.set(conv->_idx); // Flag as address_visited
+ mstack.push(conv->in(1), Matcher::Pre_Visit);
+ } else
+#endif
+ mstack.push(conv, Matcher::Pre_Visit);
+ return true;
+ }
+ return false;
+}
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ Node *off = m->in(AddPNode::Offset);
+ if (off->is_Con()) {
+ address_visited.test_set(m->_idx); // Flag as address_visited
+ Node *adr = m->in(AddPNode::Address);
+
+ // Intel can handle 2 adds in addressing mode
+ // AtomicAdd is not an addressing expression.
+ // Cheap to find it by looking for screwy base.
+ if (adr->is_AddP() &&
+ !adr->in(AddPNode::Base)->is_top() &&
+ // Are there other uses besides address expressions?
+ !is_visited(adr)) {
+ address_visited.set(adr->_idx); // Flag as address_visited
+ Node *shift = adr->in(AddPNode::Offset);
+ if (!clone_shift(shift, this, mstack, address_visited)) {
+ mstack.push(shift, Pre_Visit);
+ }
+ mstack.push(adr->in(AddPNode::Address), Pre_Visit);
+ mstack.push(adr->in(AddPNode::Base), Pre_Visit);
+ } else {
+ mstack.push(adr, Pre_Visit);
+ }
+
+ // Clone X+offset as it also folds into most addressing expressions
+ mstack.push(off, Visit);
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
+ return true;
+ } else if (clone_shift(off, this, mstack, address_visited)) {
+ address_visited.test_set(m->_idx); // Flag as address_visited
+ mstack.push(m->in(AddPNode::Address), Pre_Visit);
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
+ return true;
+ }
+ return false;
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) {
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Mon May 09 11:34:09 2016 +0200
@@ -1438,11 +1438,6 @@
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
-// Should the Matcher clone shifts on addressing modes, expecting them to
-// be subsumed into complex addressing expressions or compute them into
-// registers? True for Intel but false for most RISCs
-const bool Matcher::clone_shift_expressions = true;
-
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Mon May 09 11:34:09 2016 +0200
@@ -1646,11 +1646,6 @@
// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_postalloc_expand = false;
-// Should the Matcher clone shifts on addressing modes, expecting them
-// to be subsumed into complex addressing expressions or compute them
-// into registers? True for Intel but false for most RISCs
-const bool Matcher::clone_shift_expressions = true;
-
// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;
--- a/hotspot/src/share/vm/opto/compile.cpp Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp Mon May 09 11:34:09 2016 +0200
@@ -2905,6 +2905,8 @@
}
}
#endif
+ // platform dependent reshaping of the address expression
+ reshape_address(n->as_AddP());
break;
}
--- a/hotspot/src/share/vm/opto/compile.hpp Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/share/vm/opto/compile.hpp Mon May 09 11:34:09 2016 +0200
@@ -44,6 +44,7 @@
#include "trace/tracing.hpp"
#include "utilities/ticks.hpp"
+class AddPNode;
class Block;
class Bundle;
class C2Compiler;
@@ -579,6 +580,8 @@
int _scratch_const_size; // For temporary code buffers.
bool _in_scratch_emit_size; // true when in scratch_emit_size.
+ void reshape_address(AddPNode* n);
+
public:
// Accessors
--- a/hotspot/src/share/vm/opto/matcher.cpp Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp Mon May 09 11:34:09 2016 +0200
@@ -963,44 +963,6 @@
}
#endif
-
-//------------------------------MStack-----------------------------------------
-// State and MStack class used in xform() and find_shared() iterative methods.
-enum Node_State { Pre_Visit, // node has to be pre-visited
- Visit, // visit node
- Post_Visit, // post-visit node
- Alt_Post_Visit // alternative post-visit path
- };
-
-class MStack: public Node_Stack {
- public:
- MStack(int size) : Node_Stack(size) { }
-
- void push(Node *n, Node_State ns) {
- Node_Stack::push(n, (uint)ns);
- }
- void push(Node *n, Node_State ns, Node *parent, int indx) {
- ++_inode_top;
- if ((_inode_top + 1) >= _inode_max) grow();
- _inode_top->node = parent;
- _inode_top->indx = (uint)indx;
- ++_inode_top;
- _inode_top->node = n;
- _inode_top->indx = (uint)ns;
- }
- Node *parent() {
- pop();
- return node();
- }
- Node_State state() const {
- return (Node_State)index();
- }
- void set_state(Node_State ns) {
- set_index((uint)ns);
- }
-};
-
-
//------------------------------xform------------------------------------------
// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
// Node in new-space. Given a new-space Node, recursively walk his children.
@@ -2046,37 +2008,22 @@
}
#endif // X86
-// A method-klass-holder may be passed in the inline_cache_reg
-// and then expanded into the inline_cache_reg and a method_oop register
-// defined in ad_<arch>.cpp
-
-// Check for shift by small constant as well
-static bool clone_shift(Node* shift, Matcher* matcher, MStack& mstack, VectorSet& address_visited) {
- if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
- shift->in(2)->get_int() <= 3 &&
- // Are there other uses besides address expressions?
- !matcher->is_visited(shift)) {
- address_visited.set(shift->_idx); // Flag as address_visited
- mstack.push(shift->in(2), Visit);
- Node *conv = shift->in(1);
-#ifdef _LP64
- // Allow Matcher to match the rule which bypass
- // ConvI2L operation for an array index on LP64
- // if the index value is positive.
- if (conv->Opcode() == Op_ConvI2L &&
- conv->as_Type()->type()->is_long()->_lo >= 0 &&
- // Are there other uses besides address expressions?
- !matcher->is_visited(conv)) {
- address_visited.set(conv->_idx); // Flag as address_visited
- mstack.push(conv->in(1), Pre_Visit);
- } else
-#endif
- mstack.push(conv, Pre_Visit);
+bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+ Node *off = m->in(AddPNode::Offset);
+ if (off->is_Con()) {
+ address_visited.test_set(m->_idx); // Flag as address_visited
+ mstack.push(m->in(AddPNode::Address), Pre_Visit);
+ // Clone X+offset as it also folds into most addressing expressions
+ mstack.push(off, Visit);
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
return true;
}
return false;
}
+// A method-klass-holder may be passed in the inline_cache_reg
+// and then expanded into the inline_cache_reg and a method_oop register
+// defined in ad_<arch>.cpp
//------------------------------find_shared------------------------------------
// Set bits if Node is shared or otherwise a root
@@ -2251,40 +2198,9 @@
// But they should be marked as shared if there are other uses
// besides address expressions.
- Node *off = m->in(AddPNode::Offset);
- if (off->is_Con()) {
- address_visited.test_set(m->_idx); // Flag as address_visited
- Node *adr = m->in(AddPNode::Address);
-
- // Intel, ARM and friends can handle 2 adds in addressing mode
- if( clone_shift_expressions && adr->is_AddP() &&
- // AtomicAdd is not an addressing expression.
- // Cheap to find it by looking for screwy base.
- !adr->in(AddPNode::Base)->is_top() &&
- // Are there other uses besides address expressions?
- !is_visited(adr) ) {
- address_visited.set(adr->_idx); // Flag as address_visited
- Node *shift = adr->in(AddPNode::Offset);
- if (!clone_shift(shift, this, mstack, address_visited)) {
- mstack.push(shift, Pre_Visit);
- }
- mstack.push(adr->in(AddPNode::Address), Pre_Visit);
- mstack.push(adr->in(AddPNode::Base), Pre_Visit);
- } else { // Sparc, Alpha, PPC and friends
- mstack.push(adr, Pre_Visit);
- }
-
- // Clone X+offset as it also folds into most addressing expressions
- mstack.push(off, Visit);
- mstack.push(m->in(AddPNode::Base), Pre_Visit);
- continue; // for(int i = ...)
- } else if (clone_shift_expressions &&
- clone_shift(off, this, mstack, address_visited)) {
- address_visited.test_set(m->_idx); // Flag as address_visited
- mstack.push(m->in(AddPNode::Address), Pre_Visit);
- mstack.push(m->in(AddPNode::Base), Pre_Visit);
- continue;
- } // if( off->is_Con() )
+ if (clone_address_expressions(m->as_AddP(), mstack, address_visited)) {
+ continue;
+ }
} // if( mem_op &&
mstack.push(m, Pre_Visit);
} // for(int i = ...)
--- a/hotspot/src/share/vm/opto/matcher.hpp Mon May 09 01:21:55 2016 -0700
+++ b/hotspot/src/share/vm/opto/matcher.hpp Mon May 09 11:34:09 2016 +0200
@@ -40,6 +40,45 @@
//---------------------------Matcher-------------------------------------------
class Matcher : public PhaseTransform {
friend class VMStructs;
+
+public:
+
+ // State and MStack class used in xform() and find_shared() iterative methods.
+ enum Node_State { Pre_Visit, // node has to be pre-visited
+ Visit, // visit node
+ Post_Visit, // post-visit node
+ Alt_Post_Visit // alternative post-visit path
+ };
+
+ class MStack: public Node_Stack {
+ public:
+ MStack(int size) : Node_Stack(size) { }
+
+ void push(Node *n, Node_State ns) {
+ Node_Stack::push(n, (uint)ns);
+ }
+ void push(Node *n, Node_State ns, Node *parent, int indx) {
+ ++_inode_top;
+ if ((_inode_top + 1) >= _inode_max) grow();
+ _inode_top->node = parent;
+ _inode_top->indx = (uint)indx;
+ ++_inode_top;
+ _inode_top->node = n;
+ _inode_top->indx = (uint)ns;
+ }
+ Node *parent() {
+ pop();
+ return node();
+ }
+ Node_State state() const {
+ return (Node_State)index();
+ }
+ void set_state(Node_State ns) {
+ set_index((uint)ns);
+ }
+ };
+
+private:
// Private arena of State objects
ResourceArea _states_arena;
@@ -411,7 +450,9 @@
// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers? True for Intel but false for most RISCs
- static const bool clone_shift_expressions;
+ bool clone_address_expressions(AddPNode* m, MStack& mstack, VectorSet& address_visited);
+ // Clone base + offset address expression
+ bool clone_base_plus_offset_address(AddPNode* m, MStack& mstack, VectorSet& address_visited);
static bool narrow_oop_use_complex_address();
static bool narrow_klass_use_complex_address();