# HG changeset patch # User roland # Date 1462786449 -7200 # Node ID 0ddb6f84e138b9eed11539059128f88bebfd3b34 # Parent 20b85a0ba7966bcb558db90cc80f4efd1fd9083c 8154826: AArch64: take advantage better of base + shifted offset addressing mode Summary: reshape address subtree to fit aarch64 addressing mode Reviewed-by: kvn, aph diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/cpu/aarch64/vm/aarch64.ad --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon May 09 11:34:09 2016 +0200 @@ -996,6 +996,7 @@ source_hpp %{ #include "gc/shared/cardTableModRefBS.hpp" +#include "opto/addnode.hpp" class CallStubImpl { @@ -1061,6 +1062,9 @@ // predicate controlling translation of StoreCM bool unnecessary_storestore(const Node *storecm); + + // predicate controlling addressing modes + bool size_fits_all_mem_uses(AddPNode* addp, int shift); %} source %{ @@ -3449,11 +3453,6 @@ // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them -// to be subsumed into complex addressing expressions or compute them -// into registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = false; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; @@ -3572,8 +3571,119 @@ return FP_REG_mask(); } +bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { + Node* u = addp->fast_out(i); + if (u->is_Mem()) { + int opsize = u->as_Mem()->memory_size(); + assert(opsize > 0, "unexpected memory operand size"); + if (u->as_Mem()->memory_size() != (1<in(AddPNode::Offset); + if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() && + size_fits_all_mem_uses(m, off->in(2)->get_int()) && + // Are there other uses besides address expressions? + !is_visited(off)) { + address_visited.set(off->_idx); // Flag as address_visited + mstack.push(off->in(2), Visit); + Node *conv = off->in(1); + if (conv->Opcode() == Op_ConvI2L && + // Are there other uses besides address expressions? + !is_visited(conv)) { + address_visited.set(conv->_idx); // Flag as address_visited + mstack.push(conv->in(1), Pre_Visit); + } else { + mstack.push(conv, Pre_Visit); + } + address_visited.test_set(m->_idx); // Flag as address_visited + mstack.push(m->in(AddPNode::Address), Pre_Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } else if (off->Opcode() == Op_ConvI2L && + // Are there other uses besides address expressions? + !is_visited(off)) { + address_visited.test_set(m->_idx); // Flag as address_visited + address_visited.set(off->_idx); // Flag as address_visited + mstack.push(off->in(1), Pre_Visit); + mstack.push(m->in(AddPNode::Address), Pre_Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } + return false; +} + +// Transform: +// (AddP base (AddP base address (LShiftL index con)) offset) +// into: +// (AddP base (AddP base offset) (LShiftL index con)) +// to take full advantage of ARM's addressing modes +void Compile::reshape_address(AddPNode* addp) { + Node *addr = addp->in(AddPNode::Address); + if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) { + const AddPNode *addp2 = addr->as_AddP(); + if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL && + addp2->in(AddPNode::Offset)->in(2)->is_Con() && + size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) || + addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) { + + // Any use that can't embed the address computation? + for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { + Node* u = addp->fast_out(i); + if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) { + return; + } + } + + Node* off = addp->in(AddPNode::Offset); + Node* addr2 = addp2->in(AddPNode::Address); + Node* base = addp->in(AddPNode::Base); + + Node* new_addr = NULL; + // Check whether the graph already has the new AddP we need + // before we create one (no GVN available here). + for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) { + Node* u = addr2->fast_out(i); + if (u->is_AddP() && + u->in(AddPNode::Base) == base && + u->in(AddPNode::Address) == addr2 && + u->in(AddPNode::Offset) == off) { + new_addr = u; + break; + } + } + + if (new_addr == NULL) { + new_addr = new AddPNode(base, addr2, off); + } + Node* new_off = addp2->in(AddPNode::Offset); + addp->set_req(AddPNode::Address, new_addr); + if (addr->outcnt() == 0) { + addr->disconnect_inputs(NULL, this); + } + addp->set_req(AddPNode::Offset, new_off); + if (off->outcnt() == 0) { + off->disconnect_inputs(NULL, this); + } + } + } +} + // helper for encoding java_to_runtime calls on sim // // this is needed to compute the extra arguments required when @@ -3643,12 +3753,10 @@ // encoder that the index needs to be sign extended, so we have to // enumerate all the cases. switch (opcode) { - case INDINDEXSCALEDOFFSETI2L: case INDINDEXSCALEDI2L: - case INDINDEXSCALEDOFFSETI2LN: case INDINDEXSCALEDI2LN: - case INDINDEXOFFSETI2L: - case INDINDEXOFFSETI2LN: + case INDINDEXI2L: + case INDINDEXI2LN: scale = Address::sxtw(size); break; default: @@ -3658,12 +3766,8 @@ if (index == -1) { (masm.*insn)(reg, Address(base, disp)); } else { - if (disp == 0) { - (masm.*insn)(reg, Address(base, as_Register(index), scale)); - } else { - masm.lea(rscratch1, Address(base, disp)); - (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); - } + assert(disp == 0, "unsupported address mode: disp = %d", disp); + (masm.*insn)(reg, Address(base, as_Register(index), scale)); } } @@ -3674,9 +3778,7 @@ Address::extend scale; switch (opcode) { - case INDINDEXSCALEDOFFSETI2L: case INDINDEXSCALEDI2L: - case INDINDEXSCALEDOFFSETI2LN: case INDINDEXSCALEDI2LN: scale = Address::sxtw(size); break; @@ -3687,12 +3789,8 @@ if (index == -1) { (masm.*insn)(reg, Address(base, disp)); } else { - if (disp == 0) { - (masm.*insn)(reg, Address(base, as_Register(index), scale)); - } else { - masm.lea(rscratch1, Address(base, disp)); - (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); - } + assert(disp == 0, "unsupported address mode: disp = %d", disp); + (masm.*insn)(reg, Address(base, as_Register(index), scale)); } } @@ -6106,65 +6204,10 @@ %} %} -operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (LShiftL lreg scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $lreg lsl($scale), $off" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (LShiftL lreg scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $lreg lsl($scale), $off" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (ConvI2L ireg)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg, $off I2L" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp($off); - %} -%} - -operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg sxtw($scale), $off I2L" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp($off); - %} -%} - operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale) %{ constraint(ALLOC_IN_RC(ptr_reg)); + predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); match(AddP reg (LShiftL (ConvI2L ireg) scale)); op_cost(0); format %{ "$reg, $ireg sxtw($scale), 0, I2L" %} @@ -6179,6 +6222,7 @@ operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale) %{ constraint(ALLOC_IN_RC(ptr_reg)); + predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); match(AddP reg (LShiftL lreg scale)); op_cost(0); format %{ "$reg, $lreg lsl($scale)" %} @@ -6190,6 +6234,20 @@ %} %} +operand indIndexI2L(iRegP reg, iRegI ireg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (ConvI2L ireg)); + op_cost(0); + format %{ "$reg, $ireg, 0, I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp(0x0); + %} +%} + operand indIndex(iRegP reg, iRegL lreg) %{ constraint(ALLOC_IN_RC(ptr_reg)); @@ -6331,69 +6389,9 @@ %} %} -operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); - op_cost(0); - format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg, $off I2L\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp($off); - %} -%} - -operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off) -%{ - predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off); - op_cost(INSN_COST); - format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp($off); - %} -%} - operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale) %{ - predicate(Universe::narrow_oop_shift() == 0); + predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); constraint(ALLOC_IN_RC(ptr_reg)); match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)); op_cost(0); @@ -6408,7 +6406,7 @@ operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale) %{ - predicate(Universe::narrow_oop_shift() == 0); + predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int())); constraint(ALLOC_IN_RC(ptr_reg)); match(AddP (DecodeN reg) (LShiftL lreg scale)); op_cost(0); @@ -6421,6 +6419,21 @@ %} %} +operand indIndexI2LN(iRegN reg, iRegI ireg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) (ConvI2L ireg)); + op_cost(0); + format %{ "$reg, $ireg, 0, I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp(0x0); + %} +%} + operand indIndexN(iRegN reg, iRegL lreg) %{ predicate(Universe::narrow_oop_shift() == 0); @@ -6641,9 +6654,8 @@ // memory is used to define read/write location for load/store // instruction defs. we can turn a memory op into an Address -opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, - indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); - +opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL, + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN); // iRegIorL2I is used for src inputs in rules for 32 bit int (I) // operations. it allows the src to be either an iRegI or a (ConvL2I diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/cpu/ppc/vm/ppc.ad --- a/hotspot/src/cpu/ppc/vm/ppc.ad Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/cpu/ppc/vm/ppc.ad Mon May 09 11:34:09 2016 +0200 @@ -817,6 +817,16 @@ source %{ +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + // Optimize load-acquire. // // Check if acquire is unnecessary due to following operation that does @@ -2157,11 +2167,6 @@ // Power6 requires postalloc expand (see block.cpp for description of postalloc expand). const bool Matcher::require_postalloc_expand = true; -// Should the Matcher clone shifts on addressing modes, expecting them to -// be subsumed into complex addressing expressions or compute them into -// registers? True for Intel but false for most RISCs. -const bool Matcher::clone_shift_expressions = false; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? // PowerPC requires masked shift counts. diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/cpu/sparc/vm/sparc.ad --- a/hotspot/src/cpu/sparc/vm/sparc.ad Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/cpu/sparc/vm/sparc.ad Mon May 09 11:34:09 2016 +0200 @@ -1995,11 +1995,6 @@ // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them to -// be subsumed into complex addressing expressions or compute them into -// registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = false; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; @@ -2133,8 +2128,19 @@ return L7_REGP_mask(); } + const bool Matcher::convi2l_type_required = true; +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + %} diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/cpu/x86/vm/x86.ad --- a/hotspot/src/cpu/x86/vm/x86.ad Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/cpu/x86/vm/x86.ad Mon May 09 11:34:09 2016 +0200 @@ -1586,6 +1586,8 @@ source %{ +#include "opto/addnode.hpp" + // Emit exception handler code. // Stuff framesize into a register and call a VM stub routine. int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { @@ -1861,8 +1863,79 @@ return false; } + const bool Matcher::convi2l_type_required = true; +// Check for shift by small constant as well +static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { + if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && + shift->in(2)->get_int() <= 3 && + // Are there other uses besides address expressions? + !matcher->is_visited(shift)) { + address_visited.set(shift->_idx); // Flag as address_visited + mstack.push(shift->in(2), Matcher::Visit); + Node *conv = shift->in(1); +#ifdef _LP64 + // Allow Matcher to match the rule which bypass + // ConvI2L operation for an array index on LP64 + // if the index value is positive. + if (conv->Opcode() == Op_ConvI2L && + conv->as_Type()->type()->is_long()->_lo >= 0 && + // Are there other uses besides address expressions? + !matcher->is_visited(conv)) { + address_visited.set(conv->_idx); // Flag as address_visited + mstack.push(conv->in(1), Matcher::Pre_Visit); + } else +#endif + mstack.push(conv, Matcher::Pre_Visit); + return true; + } + return false; +} + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + Node *off = m->in(AddPNode::Offset); + if (off->is_Con()) { + address_visited.test_set(m->_idx); // Flag as address_visited + Node *adr = m->in(AddPNode::Address); + + // Intel can handle 2 adds in addressing mode + // AtomicAdd is not an addressing expression. + // Cheap to find it by looking for screwy base. + if (adr->is_AddP() && + !adr->in(AddPNode::Base)->is_top() && + // Are there other uses besides address expressions? + !is_visited(adr)) { + address_visited.set(adr->_idx); // Flag as address_visited + Node *shift = adr->in(AddPNode::Offset); + if (!clone_shift(shift, this, mstack, address_visited)) { + mstack.push(shift, Pre_Visit); + } + mstack.push(adr->in(AddPNode::Address), Pre_Visit); + mstack.push(adr->in(AddPNode::Base), Pre_Visit); + } else { + mstack.push(adr, Pre_Visit); + } + + // Clone X+offset as it also folds into most addressing expressions + mstack.push(off, Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } else if (clone_shift(off, this, mstack, address_visited)) { + address_visited.test_set(m->_idx); // Flag as address_visited + mstack.push(m->in(AddPNode::Address), Pre_Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); + return true; + } + return false; +} + +void Compile::reshape_address(AddPNode* addp) { +} + // Helper methods for MachSpillCopyNode::implementation(). static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st) { diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/cpu/x86/vm/x86_32.ad --- a/hotspot/src/cpu/x86/vm/x86_32.ad Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/cpu/x86/vm/x86_32.ad Mon May 09 11:34:09 2016 +0200 @@ -1438,11 +1438,6 @@ // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them to -// be subsumed into complex addressing expressions or compute them into -// registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = true; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/cpu/x86/vm/x86_64.ad --- a/hotspot/src/cpu/x86/vm/x86_64.ad Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/cpu/x86/vm/x86_64.ad Mon May 09 11:34:09 2016 +0200 @@ -1646,11 +1646,6 @@ // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; -// Should the Matcher clone shifts on addressing modes, expecting them -// to be subsumed into complex addressing expressions or compute them -// into registers? True for Intel but false for most RISCs -const bool Matcher::clone_shift_expressions = true; - // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/share/vm/opto/compile.cpp --- a/hotspot/src/share/vm/opto/compile.cpp Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/share/vm/opto/compile.cpp Mon May 09 11:34:09 2016 +0200 @@ -2905,6 +2905,8 @@ } } #endif + // platform dependent reshaping of the address expression + reshape_address(n->as_AddP()); break; } diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/share/vm/opto/compile.hpp --- a/hotspot/src/share/vm/opto/compile.hpp Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/share/vm/opto/compile.hpp Mon May 09 11:34:09 2016 +0200 @@ -44,6 +44,7 @@ #include "trace/tracing.hpp" #include "utilities/ticks.hpp" +class AddPNode; class Block; class Bundle; class C2Compiler; @@ -579,6 +580,8 @@ int _scratch_const_size; // For temporary code buffers. bool _in_scratch_emit_size; // true when in scratch_emit_size. + void reshape_address(AddPNode* n); + public: // Accessors diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/share/vm/opto/matcher.cpp --- a/hotspot/src/share/vm/opto/matcher.cpp Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/share/vm/opto/matcher.cpp Mon May 09 11:34:09 2016 +0200 @@ -963,44 +963,6 @@ } #endif - -//------------------------------MStack----------------------------------------- -// State and MStack class used in xform() and find_shared() iterative methods. -enum Node_State { Pre_Visit, // node has to be pre-visited - Visit, // visit node - Post_Visit, // post-visit node - Alt_Post_Visit // alternative post-visit path - }; - -class MStack: public Node_Stack { - public: - MStack(int size) : Node_Stack(size) { } - - void push(Node *n, Node_State ns) { - Node_Stack::push(n, (uint)ns); - } - void push(Node *n, Node_State ns, Node *parent, int indx) { - ++_inode_top; - if ((_inode_top + 1) >= _inode_max) grow(); - _inode_top->node = parent; - _inode_top->indx = (uint)indx; - ++_inode_top; - _inode_top->node = n; - _inode_top->indx = (uint)ns; - } - Node *parent() { - pop(); - return node(); - } - Node_State state() const { - return (Node_State)index(); - } - void set_state(Node_State ns) { - set_index((uint)ns); - } -}; - - //------------------------------xform------------------------------------------ // Given a Node in old-space, Match him (Label/Reduce) to produce a machine // Node in new-space. Given a new-space Node, recursively walk his children. @@ -2046,37 +2008,22 @@ } #endif // X86 -// A method-klass-holder may be passed in the inline_cache_reg -// and then expanded into the inline_cache_reg and a method_oop register -// defined in ad_.cpp - -// Check for shift by small constant as well -static bool clone_shift(Node* shift, Matcher* matcher, MStack& mstack, VectorSet& address_visited) { - if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && - shift->in(2)->get_int() <= 3 && - // Are there other uses besides address expressions? - !matcher->is_visited(shift)) { - address_visited.set(shift->_idx); // Flag as address_visited - mstack.push(shift->in(2), Visit); - Node *conv = shift->in(1); -#ifdef _LP64 - // Allow Matcher to match the rule which bypass - // ConvI2L operation for an array index on LP64 - // if the index value is positive. - if (conv->Opcode() == Op_ConvI2L && - conv->as_Type()->type()->is_long()->_lo >= 0 && - // Are there other uses besides address expressions? - !matcher->is_visited(conv)) { - address_visited.set(conv->_idx); // Flag as address_visited - mstack.push(conv->in(1), Pre_Visit); - } else -#endif - mstack.push(conv, Pre_Visit); +bool Matcher::clone_base_plus_offset_address(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + Node *off = m->in(AddPNode::Offset); + if (off->is_Con()) { + address_visited.test_set(m->_idx); // Flag as address_visited + mstack.push(m->in(AddPNode::Address), Pre_Visit); + // Clone X+offset as it also folds into most addressing expressions + mstack.push(off, Visit); + mstack.push(m->in(AddPNode::Base), Pre_Visit); return true; } return false; } +// A method-klass-holder may be passed in the inline_cache_reg +// and then expanded into the inline_cache_reg and a method_oop register +// defined in ad_.cpp //------------------------------find_shared------------------------------------ // Set bits if Node is shared or otherwise a root @@ -2251,40 +2198,9 @@ // But they should be marked as shared if there are other uses // besides address expressions. - Node *off = m->in(AddPNode::Offset); - if (off->is_Con()) { - address_visited.test_set(m->_idx); // Flag as address_visited - Node *adr = m->in(AddPNode::Address); - - // Intel, ARM and friends can handle 2 adds in addressing mode - if( clone_shift_expressions && adr->is_AddP() && - // AtomicAdd is not an addressing expression. - // Cheap to find it by looking for screwy base. - !adr->in(AddPNode::Base)->is_top() && - // Are there other uses besides address expressions? - !is_visited(adr) ) { - address_visited.set(adr->_idx); // Flag as address_visited - Node *shift = adr->in(AddPNode::Offset); - if (!clone_shift(shift, this, mstack, address_visited)) { - mstack.push(shift, Pre_Visit); - } - mstack.push(adr->in(AddPNode::Address), Pre_Visit); - mstack.push(adr->in(AddPNode::Base), Pre_Visit); - } else { // Sparc, Alpha, PPC and friends - mstack.push(adr, Pre_Visit); - } - - // Clone X+offset as it also folds into most addressing expressions - mstack.push(off, Visit); - mstack.push(m->in(AddPNode::Base), Pre_Visit); - continue; // for(int i = ...) - } else if (clone_shift_expressions && - clone_shift(off, this, mstack, address_visited)) { - address_visited.test_set(m->_idx); // Flag as address_visited - mstack.push(m->in(AddPNode::Address), Pre_Visit); - mstack.push(m->in(AddPNode::Base), Pre_Visit); - continue; - } // if( off->is_Con() ) + if (clone_address_expressions(m->as_AddP(), mstack, address_visited)) { + continue; + } } // if( mem_op && mstack.push(m, Pre_Visit); } // for(int i = ...) diff -r 20b85a0ba796 -r 0ddb6f84e138 hotspot/src/share/vm/opto/matcher.hpp --- a/hotspot/src/share/vm/opto/matcher.hpp Mon May 09 01:21:55 2016 -0700 +++ b/hotspot/src/share/vm/opto/matcher.hpp Mon May 09 11:34:09 2016 +0200 @@ -40,6 +40,45 @@ //---------------------------Matcher------------------------------------------- class Matcher : public PhaseTransform { friend class VMStructs; + +public: + + // State and MStack class used in xform() and find_shared() iterative methods. + enum Node_State { Pre_Visit, // node has to be pre-visited + Visit, // visit node + Post_Visit, // post-visit node + Alt_Post_Visit // alternative post-visit path + }; + + class MStack: public Node_Stack { + public: + MStack(int size) : Node_Stack(size) { } + + void push(Node *n, Node_State ns) { + Node_Stack::push(n, (uint)ns); + } + void push(Node *n, Node_State ns, Node *parent, int indx) { + ++_inode_top; + if ((_inode_top + 1) >= _inode_max) grow(); + _inode_top->node = parent; + _inode_top->indx = (uint)indx; + ++_inode_top; + _inode_top->node = n; + _inode_top->indx = (uint)ns; + } + Node *parent() { + pop(); + return node(); + } + Node_State state() const { + return (Node_State)index(); + } + void set_state(Node_State ns) { + set_index((uint)ns); + } + }; + +private: // Private arena of State objects ResourceArea _states_arena; @@ -411,7 +450,9 @@ // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs - static const bool clone_shift_expressions; + bool clone_address_expressions(AddPNode* m, MStack& mstack, VectorSet& address_visited); + // Clone base + offset address expression + bool clone_base_plus_offset_address(AddPNode* m, MStack& mstack, VectorSet& address_visited); static bool narrow_oop_use_complex_address(); static bool narrow_klass_use_complex_address();