--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -675,8 +675,8 @@
AbstractAssembler::flush();
}
- inline void emit_long(int); // shadows AbstractAssembler::emit_long
- inline void emit_data(int x) { emit_long(x); }
+ inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
+ inline void emit_data(int x) { emit_int32(x); }
inline void emit_data(int, RelocationHolder const&);
inline void emit_data(int, relocInfo::relocType rtype);
// helper for above fcns
@@ -691,12 +691,12 @@
inline void add(Register s1, Register s2, Register d );
inline void add(Register s1, int simm13a, Register d );
- void addcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void addcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void addc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 ) | rs1(s1) | rs2(s2) ); }
- void addc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void addccc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void addccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void addcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void addcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void addc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 ) | rs1(s1) | rs2(s2) ); }
+ void addc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void addccc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 136
@@ -749,76 +749,76 @@
// at address s1 is swapped with the data in d. If the values are not equal,
// the the contents of memory at s1 is loaded into d, without the swap.
- void casa( Register s1, Register s2, Register d, int ia = -1 ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(casa_op3 ) | rs1(s1) | (ia == -1 ? immed(true) : imm_asi(ia)) | rs2(s2)); }
- void casxa( Register s1, Register s2, Register d, int ia = -1 ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(casxa_op3) | rs1(s1) | (ia == -1 ? immed(true) : imm_asi(ia)) | rs2(s2)); }
+ void casa( Register s1, Register s2, Register d, int ia = -1 ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(casa_op3 ) | rs1(s1) | (ia == -1 ? immed(true) : imm_asi(ia)) | rs2(s2)); }
+ void casxa( Register s1, Register s2, Register d, int ia = -1 ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(casxa_op3) | rs1(s1) | (ia == -1 ? immed(true) : imm_asi(ia)) | rs2(s2)); }
// pp 152
- void udiv( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3 ) | rs1(s1) | rs2(s2)); }
- void udiv( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void sdiv( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3 ) | rs1(s1) | rs2(s2)); }
- void sdiv( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void udivcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
- void udivcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void sdivcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
- void sdivcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void udiv( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3 ) | rs1(s1) | rs2(s2)); }
+ void udiv( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void sdiv( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3 ) | rs1(s1) | rs2(s2)); }
+ void sdiv( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void udivcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
+ void udivcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(udiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void sdivcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | rs2(s2)); }
+ void sdivcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sdiv_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 155
- void done() { v9_only(); cti(); emit_long( op(arith_op) | fcn(0) | op3(done_op3) ); }
- void retry() { v9_only(); cti(); emit_long( op(arith_op) | fcn(1) | op3(retry_op3) ); }
+ void done() { v9_only(); cti(); emit_int32( op(arith_op) | fcn(0) | op3(done_op3) ); }
+ void retry() { v9_only(); cti(); emit_int32( op(arith_op) | fcn(1) | op3(retry_op3) ); }
// pp 156
- void fadd( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x40 + w) | fs2(s2, w)); }
- void fsub( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x44 + w) | fs2(s2, w)); }
+ void fadd( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x40 + w) | fs2(s2, w)); }
+ void fsub( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x44 + w) | fs2(s2, w)); }
// pp 157
- void fcmp( FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc); emit_long( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x50 + w) | fs2(s2, w)); }
- void fcmpe( FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc); emit_long( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x54 + w) | fs2(s2, w)); }
+ void fcmp( FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc); emit_int32( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x50 + w) | fs2(s2, w)); }
+ void fcmpe( FloatRegisterImpl::Width w, CC cc, FloatRegister s1, FloatRegister s2) { v8_no_cc(cc); emit_int32( op(arith_op) | cmpcc(cc) | op3(fpop2_op3) | fs1(s1, w) | opf(0x54 + w) | fs2(s2, w)); }
// pp 159
- void ftox( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only(); emit_long( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(fpop1_op3) | opf(0x80 + w) | fs2(s, w)); }
- void ftoi( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(fpop1_op3) | opf(0xd0 + w) | fs2(s, w)); }
+ void ftox( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(fpop1_op3) | opf(0x80 + w) | fs2(s, w)); }
+ void ftoi( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(fpop1_op3) | opf(0xd0 + w) | fs2(s, w)); }
// pp 160
- void ftof( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | opf(0xc0 + sw + dw*4) | fs2(s, sw)); }
+ void ftof( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | opf(0xc0 + sw + dw*4) | fs2(s, sw)); }
// pp 161
- void fxtof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only(); emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x80 + w*4) | fs2(s, FloatRegisterImpl::D)); }
- void fitof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0xc0 + w*4) | fs2(s, FloatRegisterImpl::S)); }
+ void fxtof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v9_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x80 + w*4) | fs2(s, FloatRegisterImpl::D)); }
+ void fitof( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0xc0 + w*4) | fs2(s, FloatRegisterImpl::S)); }
// pp 162
- void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w); emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x00 + w) | fs2(s, w)); }
+ void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w); emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x00 + w) | fs2(s, w)); }
- void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w); emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(s, w)); }
+ void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w); emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(s, w)); }
// page 144 sparc v8 architecture (double prec works on v8 if the source and destination registers are the same). fnegs is the only instruction available
// on v8 to do negation of single, double and quad precision floats.
- void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(sd, w)); else emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x05) | fs2(sd, w)); }
+ void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x04 + w) | fs2(sd, w)); else emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x05) | fs2(sd, w)); }
- void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w); emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(s, w)); }
+ void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { v8_s_only(w); emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(s, w)); }
// page 144 sparc v8 architecture (double prec works on v8 if the source and destination registers are the same). fabss is the only instruction available
// on v8 to do abs operation on single/double/quad precision floats.
- void fabs( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(sd, w)); else emit_long( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x09) | fs2(sd, w)); }
+ void fabs( FloatRegisterImpl::Width w, FloatRegister sd ) { if (VM_Version::v9_instructions_work()) emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x08 + w) | fs2(sd, w)); else emit_int32( op(arith_op) | fd(sd, w) | op3(fpop1_op3) | opf(0x09) | fs2(sd, w)); }
// pp 163
- void fmul( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x48 + w) | fs2(s2, w)); }
- void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
- void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); }
+ void fmul( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x48 + w) | fs2(s2, w)); }
+ void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); }
+ void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); }
// pp 164
- void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_long( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
+ void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); }
// pp 165
@@ -827,22 +827,22 @@
// pp 167
- void flushw() { v9_only(); emit_long( op(arith_op) | op3(flushw_op3) ); }
+ void flushw() { v9_only(); emit_int32( op(arith_op) | op3(flushw_op3) ); }
// pp 168
- void illtrap( int const22a) { if (const22a != 0) v9_only(); emit_long( op(branch_op) | u_field(const22a, 21, 0) ); }
+ void illtrap( int const22a) { if (const22a != 0) v9_only(); emit_int32( op(branch_op) | u_field(const22a, 21, 0) ); }
// v8 unimp == illtrap(0)
// pp 169
- void impdep1( int id1, int const19a ) { v9_only(); emit_long( op(arith_op) | fcn(id1) | op3(impdep1_op3) | u_field(const19a, 18, 0)); }
- void impdep2( int id1, int const19a ) { v9_only(); emit_long( op(arith_op) | fcn(id1) | op3(impdep2_op3) | u_field(const19a, 18, 0)); }
+ void impdep1( int id1, int const19a ) { v9_only(); emit_int32( op(arith_op) | fcn(id1) | op3(impdep1_op3) | u_field(const19a, 18, 0)); }
+ void impdep2( int id1, int const19a ) { v9_only(); emit_int32( op(arith_op) | fcn(id1) | op3(impdep2_op3) | u_field(const19a, 18, 0)); }
// pp 149 (v8)
- void cpop1( int opc, int cr1, int cr2, int crd ) { v8_only(); emit_long( op(arith_op) | fcn(crd) | op3(impdep1_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
- void cpop2( int opc, int cr1, int cr2, int crd ) { v8_only(); emit_long( op(arith_op) | fcn(crd) | op3(impdep2_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
+ void cpop1( int opc, int cr1, int cr2, int crd ) { v8_only(); emit_int32( op(arith_op) | fcn(crd) | op3(impdep1_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
+ void cpop2( int opc, int cr1, int cr2, int crd ) { v8_only(); emit_int32( op(arith_op) | fcn(crd) | op3(impdep2_op3) | u_field(cr1, 18, 14) | opf(opc) | u_field(cr2, 4, 0)); }
// pp 170
@@ -872,8 +872,8 @@
// 173
- void ldfa( FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d ) { v9_only(); emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldfa( FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d ) { v9_only(); emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldfa( FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d ) { v9_only(); emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldfa( FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d ) { v9_only(); emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 175, lduw is ld on v8
@@ -896,22 +896,22 @@
// pp 177
- void ldsba( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldsba( Register s1, int simm13a, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void ldsha( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldsha( Register s1, int simm13a, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void ldswa( Register s1, Register s2, int ia, Register d ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldswa( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void lduba( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void lduba( Register s1, int simm13a, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void lduha( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void lduha( Register s1, int simm13a, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void lduwa( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void lduwa( Register s1, int simm13a, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void ldxa( Register s1, Register s2, int ia, Register d ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(ldx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldxa( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(ldx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void ldda( Register s1, Register s2, int ia, Register d ) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(ldd_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldda( Register s1, int simm13a, Register d ) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(ldd_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldsba( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldsba( Register s1, int simm13a, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldsha( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldsha( Register s1, int simm13a, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldsh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldswa( Register s1, Register s2, int ia, Register d ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldswa( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(ldsw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void lduba( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void lduba( Register s1, int simm13a, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void lduha( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void lduha( Register s1, int simm13a, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(lduh_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void lduwa( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void lduwa( Register s1, int simm13a, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(lduw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldxa( Register s1, Register s2, int ia, Register d ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(ldx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldxa( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(ldx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldda( Register s1, Register s2, int ia, Register d ) { v9_dep(); emit_int32( op(ldst_op) | rd(d) | op3(ldd_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldda( Register s1, int simm13a, Register d ) { v9_dep(); emit_int32( op(ldst_op) | rd(d) | op3(ldd_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 179
@@ -920,111 +920,111 @@
// pp 180
- void ldstuba( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void ldstuba( Register s1, int simm13a, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void ldstuba( Register s1, Register s2, int ia, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void ldstuba( Register s1, int simm13a, Register d ) { emit_int32( op(ldst_op) | rd(d) | op3(ldstub_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 181
- void and3( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3 ) | rs1(s1) | rs2(s2) ); }
- void and3( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void andcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void andcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void andn( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 ) | rs1(s1) | rs2(s2) ); }
- void andn( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void andncc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void andncc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void or3( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3 ) | rs1(s1) | rs2(s2) ); }
- void or3( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void orcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void orcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void orn( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | rs2(s2) ); }
- void orn( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void orncc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void orncc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(orn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void xor3( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3 ) | rs1(s1) | rs2(s2) ); }
- void xor3( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void xorcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void xorcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void xnor( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3 ) | rs1(s1) | rs2(s2) ); }
- void xnor( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void xnorcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void xnorcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void and3( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3 ) | rs1(s1) | rs2(s2) ); }
+ void and3( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void andcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void andcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void andn( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3 ) | rs1(s1) | rs2(s2) ); }
+ void andn( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void andncc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void andncc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void or3( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3 ) | rs1(s1) | rs2(s2) ); }
+ void or3( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void orcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void orcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(or_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void orn( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | rs2(s2) ); }
+ void orn( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void orncc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void orncc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(orn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void xor3( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3 ) | rs1(s1) | rs2(s2) ); }
+ void xor3( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void xorcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void xorcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void xnor( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3 ) | rs1(s1) | rs2(s2) ); }
+ void xnor( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void xnorcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void xnorcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(xnor_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 183
- void membar( Membar_mask_bits const7a ) { v9_only(); emit_long( op(arith_op) | op3(membar_op3) | rs1(O7) | immed(true) | u_field( int(const7a), 6, 0)); }
+ void membar( Membar_mask_bits const7a ) { v9_only(); emit_int32( op(arith_op) | op3(membar_op3) | rs1(O7) | immed(true) | u_field( int(const7a), 6, 0)); }
// pp 185
- void fmov( FloatRegisterImpl::Width w, Condition c, bool floatCC, CC cca, FloatRegister s2, FloatRegister d ) { v9_only(); emit_long( op(arith_op) | fd(d, w) | op3(fpop2_op3) | cond_mov(c) | opf_cc(cca, floatCC) | opf_low6(w) | fs2(s2, w)); }
+ void fmov( FloatRegisterImpl::Width w, Condition c, bool floatCC, CC cca, FloatRegister s2, FloatRegister d ) { v9_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fpop2_op3) | cond_mov(c) | opf_cc(cca, floatCC) | opf_low6(w) | fs2(s2, w)); }
// pp 189
- void fmov( FloatRegisterImpl::Width w, RCondition c, Register s1, FloatRegister s2, FloatRegister d ) { v9_only(); emit_long( op(arith_op) | fd(d, w) | op3(fpop2_op3) | rs1(s1) | rcond(c) | opf_low5(4 + w) | fs2(s2, w)); }
+ void fmov( FloatRegisterImpl::Width w, RCondition c, Register s1, FloatRegister s2, FloatRegister d ) { v9_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fpop2_op3) | rs1(s1) | rcond(c) | opf_low5(4 + w) | fs2(s2, w)); }
// pp 191
- void movcc( Condition c, bool floatCC, CC cca, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | rs2(s2) ); }
- void movcc( Condition c, bool floatCC, CC cca, int simm11a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | immed(true) | simm(simm11a, 11) ); }
+ void movcc( Condition c, bool floatCC, CC cca, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | rs2(s2) ); }
+ void movcc( Condition c, bool floatCC, CC cca, int simm11a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(movcc_op3) | mov_cc(cca, floatCC) | cond_mov(c) | immed(true) | simm(simm11a, 11) ); }
// pp 195
- void movr( RCondition c, Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | rs2(s2) ); }
- void movr( RCondition c, Register s1, int simm10a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | immed(true) | simm(simm10a, 10) ); }
+ void movr( RCondition c, Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | rs2(s2) ); }
+ void movr( RCondition c, Register s1, int simm10a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(movr_op3) | rs1(s1) | rcond(c) | immed(true) | simm(simm10a, 10) ); }
// pp 196
- void mulx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | rs2(s2) ); }
- void mulx( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void sdivx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | rs2(s2) ); }
- void sdivx( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void udivx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | rs2(s2) ); }
- void udivx( Register s1, int simm13a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void mulx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | rs2(s2) ); }
+ void mulx( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(mulx_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void sdivx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | rs2(s2) ); }
+ void sdivx( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sdivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void udivx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | rs2(s2) ); }
+ void udivx( Register s1, int simm13a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(udivx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 197
- void umul( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3 ) | rs1(s1) | rs2(s2) ); }
- void umul( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void smul( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3 ) | rs1(s1) | rs2(s2) ); }
- void smul( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void umulcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void umulcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void smulcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void smulcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void umul( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3 ) | rs1(s1) | rs2(s2) ); }
+ void umul( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void smul( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3 ) | rs1(s1) | rs2(s2) ); }
+ void smul( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void umulcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void umulcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(umul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void smulcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void smulcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(smul_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 199
- void mulscc( Register s1, Register s2, Register d ) { v9_dep(); emit_long( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | rs2(s2) ); }
- void mulscc( Register s1, int simm13a, Register d ) { v9_dep(); emit_long( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void mulscc( Register s1, Register s2, Register d ) { v9_dep(); emit_int32( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | rs2(s2) ); }
+ void mulscc( Register s1, int simm13a, Register d ) { v9_dep(); emit_int32( op(arith_op) | rd(d) | op3(mulscc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 201
- void nop() { emit_long( op(branch_op) | op2(sethi_op2) ); }
+ void nop() { emit_int32( op(branch_op) | op2(sethi_op2) ); }
// pp 202
- void popc( Register s, Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(popc_op3) | rs2(s)); }
- void popc( int simm13a, Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(popc_op3) | immed(true) | simm(simm13a, 13)); }
+ void popc( Register s, Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(popc_op3) | rs2(s)); }
+ void popc( int simm13a, Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(popc_op3) | immed(true) | simm(simm13a, 13)); }
// pp 203
- void prefetch( Register s1, Register s2, PrefetchFcn f) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2) ); }
+ void prefetch( Register s1, Register s2, PrefetchFcn f) { v9_only(); emit_int32( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2) ); }
void prefetch( Register s1, int simm13a, PrefetchFcn f) { v9_only(); emit_data( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
- void prefetcha( Register s1, Register s2, int ia, PrefetchFcn f ) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void prefetcha( Register s1, int simm13a, PrefetchFcn f ) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void prefetcha( Register s1, Register s2, int ia, PrefetchFcn f ) { v9_only(); emit_int32( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void prefetcha( Register s1, int simm13a, PrefetchFcn f ) { v9_only(); emit_int32( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 208
// not implementing read privileged register
- inline void rdy( Register d) { v9_dep(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(0, 18, 14)); }
- inline void rdccr( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(2, 18, 14)); }
- inline void rdasi( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(3, 18, 14)); }
- inline void rdtick( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(4, 18, 14)); } // Spoon!
- inline void rdpc( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(5, 18, 14)); }
- inline void rdfprs( Register d) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(6, 18, 14)); }
+ inline void rdy( Register d) { v9_dep(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(0, 18, 14)); }
+ inline void rdccr( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(2, 18, 14)); }
+ inline void rdasi( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(3, 18, 14)); }
+ inline void rdtick( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(4, 18, 14)); } // Spoon!
+ inline void rdpc( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(5, 18, 14)); }
+ inline void rdfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(rdreg_op3) | u_field(6, 18, 14)); }
// pp 213
@@ -1033,47 +1033,47 @@
// pp 214
- void save( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
+ void save( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
void save( Register s1, int simm13a, Register d ) {
// make sure frame is at least large enough for the register save area
assert(-simm13a >= 16 * wordSize, "frame too small");
- emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
+ emit_int32( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
}
- void restore( Register s1 = G0, Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
- void restore( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void restore( Register s1 = G0, Register s2 = G0, Register d = G0 ) { emit_int32( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
+ void restore( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 216
- void saved() { v9_only(); emit_long( op(arith_op) | fcn(0) | op3(saved_op3)); }
- void restored() { v9_only(); emit_long( op(arith_op) | fcn(1) | op3(saved_op3)); }
+ void saved() { v9_only(); emit_int32( op(arith_op) | fcn(0) | op3(saved_op3)); }
+ void restored() { v9_only(); emit_int32( op(arith_op) | fcn(1) | op3(saved_op3)); }
// pp 217
inline void sethi( int imm22a, Register d, RelocationHolder const& rspec = RelocationHolder() );
// pp 218
- void sll( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
- void sll( Register s1, int imm5a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
- void srl( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
- void srl( Register s1, int imm5a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
- void sra( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
- void sra( Register s1, int imm5a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
+ void sll( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
+ void sll( Register s1, int imm5a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
+ void srl( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
+ void srl( Register s1, int imm5a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
+ void sra( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | rs2(s2) ); }
+ void sra( Register s1, int imm5a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(0) | immed(true) | u_field(imm5a, 4, 0) ); }
- void sllx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
- void sllx( Register s1, int imm6a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
- void srlx( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
- void srlx( Register s1, int imm6a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
- void srax( Register s1, Register s2, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
- void srax( Register s1, int imm6a, Register d ) { v9_only(); emit_long( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
+ void sllx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
+ void sllx( Register s1, int imm6a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sll_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
+ void srlx( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
+ void srlx( Register s1, int imm6a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(srl_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
+ void srax( Register s1, Register s2, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | rs2(s2) ); }
+ void srax( Register s1, int imm6a, Register d ) { v9_only(); emit_int32( op(arith_op) | rd(d) | op3(sra_op3) | rs1(s1) | sx(1) | immed(true) | u_field(imm6a, 5, 0) ); }
// pp 220
- void sir( int simm13a ) { emit_long( op(arith_op) | fcn(15) | op3(sir_op3) | immed(true) | simm(simm13a, 13)); }
+ void sir( int simm13a ) { emit_int32( op(arith_op) | fcn(15) | op3(sir_op3) | immed(true) | simm(simm13a, 13)); }
// pp 221
- void stbar() { emit_long( op(arith_op) | op3(membar_op3) | u_field(15, 18, 14)); }
+ void stbar() { emit_int32( op(arith_op) | op3(membar_op3) | u_field(15, 18, 14)); }
// pp 222
@@ -1087,8 +1087,8 @@
// pp 224
- void stfa( FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia ) { v9_only(); emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void stfa( FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a ) { v9_only(); emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void stfa( FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia ) { v9_only(); emit_int32( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void stfa( FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a ) { v9_only(); emit_int32( op(ldst_op) | fd(d, w) | alt_op3(stf_op3 | alt_bit_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// p 226
@@ -1105,16 +1105,16 @@
// pp 177
- void stba( Register d, Register s1, Register s2, int ia ) { emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void stba( Register d, Register s1, int simm13a ) { emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void stha( Register d, Register s1, Register s2, int ia ) { emit_long( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void stha( Register d, Register s1, int simm13a ) { emit_long( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void stwa( Register d, Register s1, Register s2, int ia ) { emit_long( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void stwa( Register d, Register s1, int simm13a ) { emit_long( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void stxa( Register d, Register s1, Register s2, int ia ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void stxa( Register d, Register s1, int simm13a ) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void stda( Register d, Register s1, Register s2, int ia ) { emit_long( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void stda( Register d, Register s1, int simm13a ) { emit_long( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void stba( Register d, Register s1, Register s2, int ia ) { emit_int32( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void stba( Register d, Register s1, int simm13a ) { emit_int32( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void stha( Register d, Register s1, Register s2, int ia ) { emit_int32( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void stha( Register d, Register s1, int simm13a ) { emit_int32( op(ldst_op) | rd(d) | op3(sth_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void stwa( Register d, Register s1, Register s2, int ia ) { emit_int32( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void stwa( Register d, Register s1, int simm13a ) { emit_int32( op(ldst_op) | rd(d) | op3(stw_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void stxa( Register d, Register s1, Register s2, int ia ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void stxa( Register d, Register s1, int simm13a ) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(stx_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void stda( Register d, Register s1, Register s2, int ia ) { emit_int32( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void stda( Register d, Register s1, int simm13a ) { emit_int32( op(ldst_op) | rd(d) | op3(std_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 97 (v8)
@@ -1129,15 +1129,15 @@
// pp 230
- void sub( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 ) | rs1(s1) | rs2(s2) ); }
- void sub( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void sub( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3 ) | rs1(s1) | rs2(s2) ); }
+ void sub( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void subcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | rs2(s2) ); }
- void subcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void subc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 ) | rs1(s1) | rs2(s2) ); }
- void subc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void subccc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
- void subccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void subcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | rs2(s2) ); }
+ void subcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void subc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3 ) | rs1(s1) | rs2(s2) ); }
+ void subc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void subccc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
+ void subccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(subc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 231
@@ -1146,55 +1146,55 @@
// pp 232
- void swapa( Register s1, Register s2, int ia, Register d ) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
- void swapa( Register s1, int simm13a, Register d ) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void swapa( Register s1, Register s2, int ia, Register d ) { v9_dep(); emit_int32( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
+ void swapa( Register s1, int simm13a, Register d ) { v9_dep(); emit_int32( op(ldst_op) | rd(d) | op3(swap_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 234, note op in book is wrong, see pp 268
- void taddcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(taddcc_op3 ) | rs1(s1) | rs2(s2) ); }
- void taddcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(taddcc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void taddcctv( Register s1, Register s2, Register d ) { v9_dep(); emit_long( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | rs2(s2) ); }
- void taddcctv( Register s1, int simm13a, Register d ) { v9_dep(); emit_long( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void taddcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(taddcc_op3 ) | rs1(s1) | rs2(s2) ); }
+ void taddcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(taddcc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void taddcctv( Register s1, Register s2, Register d ) { v9_dep(); emit_int32( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | rs2(s2) ); }
+ void taddcctv( Register s1, int simm13a, Register d ) { v9_dep(); emit_int32( op(arith_op) | rd(d) | op3(taddcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 235
- void tsubcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcc_op3 ) | rs1(s1) | rs2(s2) ); }
- void tsubcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
- void tsubcctv( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | rs2(s2) ); }
- void tsubcctv( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void tsubcc( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcc_op3 ) | rs1(s1) | rs2(s2) ); }
+ void tsubcc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+ void tsubcctv( Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | rs2(s2) ); }
+ void tsubcctv( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(tsubcctv_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
// pp 237
- void trap( Condition c, CC cc, Register s1, Register s2 ) { v8_no_cc(cc); emit_long( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | rs2(s2)); }
- void trap( Condition c, CC cc, Register s1, int trapa ) { v8_no_cc(cc); emit_long( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | immed(true) | u_field(trapa, 6, 0)); }
+ void trap( Condition c, CC cc, Register s1, Register s2 ) { v8_no_cc(cc); emit_int32( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | rs2(s2)); }
+ void trap( Condition c, CC cc, Register s1, int trapa ) { v8_no_cc(cc); emit_int32( op(arith_op) | cond(c) | op3(trap_op3) | rs1(s1) | trapcc(cc) | immed(true) | u_field(trapa, 6, 0)); }
// simple uncond. trap
void trap( int trapa ) { trap( always, icc, G0, trapa ); }
// pp 239 omit write priv register for now
- inline void wry( Register d) { v9_dep(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(0, 29, 25)); }
- inline void wrccr(Register s) { v9_only(); emit_long( op(arith_op) | rs1(s) | op3(wrreg_op3) | u_field(2, 29, 25)); }
- inline void wrccr(Register s, int simm13a) { v9_only(); emit_long( op(arith_op) |
+ inline void wry( Register d) { v9_dep(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(0, 29, 25)); }
+ inline void wrccr(Register s) { v9_only(); emit_int32( op(arith_op) | rs1(s) | op3(wrreg_op3) | u_field(2, 29, 25)); }
+ inline void wrccr(Register s, int simm13a) { v9_only(); emit_int32( op(arith_op) |
rs1(s) |
op3(wrreg_op3) |
u_field(2, 29, 25) |
immed(true) |
simm(simm13a, 13)); }
- inline void wrasi(Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
+ inline void wrasi(Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
// wrasi(d, imm) stores (d xor imm) to asi
- inline void wrasi(Register d, int simm13a) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) |
+ inline void wrasi(Register d, int simm13a) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) |
u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
- inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
+ inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
// VIS3 instructions
- void movstosw( FloatRegister s, Register d ) { vis3_only(); emit_long( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
- void movstouw( FloatRegister s, Register d ) { vis3_only(); emit_long( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstouw_opf) | fs2(s, FloatRegisterImpl::S)); }
- void movdtox( FloatRegister s, Register d ) { vis3_only(); emit_long( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mdtox_opf) | fs2(s, FloatRegisterImpl::D)); }
+ void movstosw( FloatRegister s, Register d ) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
+ void movstouw( FloatRegister s, Register d ) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstouw_opf) | fs2(s, FloatRegisterImpl::S)); }
+ void movdtox( FloatRegister s, Register d ) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mdtox_opf) | fs2(s, FloatRegisterImpl::D)); }
- void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_long( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
- void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_long( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
+ void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
+ void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -35,24 +35,24 @@
# endif
}
-inline void Assembler::emit_long(int x) {
+inline void Assembler::emit_int32(int x) {
check_delay();
- AbstractAssembler::emit_long(x);
+ AbstractAssembler::emit_int32(x);
}
inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
relocate(rtype);
- emit_long(x);
+ emit_int32(x);
}
inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
relocate(rspec);
- emit_long(x);
+ emit_int32(x);
}
-inline void Assembler::add(Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); }
-inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+inline void Assembler::add(Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); }
inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); }
@@ -79,93 +79,93 @@
inline void Assembler::call( address d, relocInfo::relocType rt ) { cti(); emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt); has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
inline void Assembler::call( Label& L, relocInfo::relocType rt ) { call( target(L), rt); }
-inline void Assembler::flush( Register s1, Register s2) { emit_long( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
+inline void Assembler::flush( Register s1, Register s2) { emit_int32( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti(); emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); }
+inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti(); emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); }
inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); }
-inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); }
-inline void Assembler::ldfsr( Register s1, Register s2) { v9_dep(); emit_long( op(ldst_op) | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldfsr( Register s1, Register s2) { v9_dep(); emit_int32( op(ldst_op) | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldfsr( Register s1, int simm13a) { v9_dep(); emit_data( op(ldst_op) | op3(ldfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldxfsr( Register s1, Register s2) { v9_only(); emit_long( op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldxfsr( Register s1, Register s2) { v9_only(); emit_int32( op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldxfsr( Register s1, int simm13a) { v9_only(); emit_data( op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldc( Register s1, Register s2, int crd) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(ldc_op3 ) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldc( Register s1, Register s2, int crd) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(ldc_op3 ) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldc( Register s1, int simm13a, int crd) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(ldc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::lddc( Register s1, Register s2, int crd) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(lddc_op3 ) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::lddc( Register s1, Register s2, int crd) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(lddc_op3 ) | rs1(s1) | rs2(s2) ); }
inline void Assembler::lddc( Register s1, int simm13a, int crd) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(lddc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldcsr( Register s1, Register s2, int crd) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(ldcsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldcsr( Register s1, Register s2, int crd) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(ldcsr_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldcsr( Register s1, int simm13a, int crd) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(ldcsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldsb( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldsb( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldsb( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldsb_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldsh( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldsh( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldsh( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldsh_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldsw( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldsw( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldsw( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldsw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldub( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldub( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldub( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::lduh( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::lduh( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::lduh( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(lduh_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::lduw( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::lduw( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::lduw( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(lduw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldx( Register s1, Register s2, Register d) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldx( Register s1, Register s2, Register d) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldx( Register s1, int simm13a, Register d) { v9_only(); emit_data( op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldd( Register s1, Register s2, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_long( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldd( Register s1, Register s2, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_int32( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldd( Register s1, int simm13a, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_data( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::ldstub( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::ldstub( Register s1, Register s2, Register d) { emit_int32( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::ldstub( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::rett( Register s1, Register s2 ) { cti(); emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); }
+inline void Assembler::rett( Register s1, Register s2 ) { cti(); emit_int32( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); }
inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { cti(); emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt); has_delay_slot(); }
inline void Assembler::sethi( int imm22a, Register d, RelocationHolder const& rspec ) { emit_data( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(imm22a), rspec); }
// pp 222
-inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2) { emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stfsr( Register s1, Register s2) { v9_dep(); emit_long( op(ldst_op) | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stfsr( Register s1, Register s2) { v9_dep(); emit_int32( op(ldst_op) | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stfsr( Register s1, int simm13a) { v9_dep(); emit_data( op(ldst_op) | op3(stfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stxfsr( Register s1, Register s2) { v9_only(); emit_long( op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stxfsr( Register s1, Register s2) { v9_only(); emit_int32( op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stxfsr( Register s1, int simm13a) { v9_only(); emit_data( op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
// p 226
-inline void Assembler::stb( Register d, Register s1, Register s2) { emit_long( op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stb( Register d, Register s1, Register s2) { emit_int32( op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stb( Register d, Register s1, int simm13a) { emit_data( op(ldst_op) | rd(d) | op3(stb_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::sth( Register d, Register s1, Register s2) { emit_long( op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::sth( Register d, Register s1, Register s2) { emit_int32( op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::sth( Register d, Register s1, int simm13a) { emit_data( op(ldst_op) | rd(d) | op3(sth_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stw( Register d, Register s1, Register s2) { emit_long( op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stw( Register d, Register s1, Register s2) { emit_int32( op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stw( Register d, Register s1, int simm13a) { emit_data( op(ldst_op) | rd(d) | op3(stw_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stx( Register d, Register s1, Register s2) { v9_only(); emit_long( op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stx( Register d, Register s1, Register s2) { v9_only(); emit_int32( op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stx( Register d, Register s1, int simm13a) { v9_only(); emit_data( op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::std( Register d, Register s1, Register s2) { v9_dep(); assert(d->is_even(), "not even"); emit_long( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::std( Register d, Register s1, Register s2) { v9_dep(); assert(d->is_even(), "not even"); emit_int32( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::std( Register d, Register s1, int simm13a) { v9_dep(); assert(d->is_even(), "not even"); emit_data( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
// v8 p 99
-inline void Assembler::stc( int crd, Register s1, Register s2) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stc( int crd, Register s1, Register s2) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stc( int crd, Register s1, int simm13a) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stdc( int crd, Register s1, Register s2) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(stdc_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stdc( int crd, Register s1, Register s2) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(stdc_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stdc( int crd, Register s1, int simm13a) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(stdc_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stcsr( int crd, Register s1, Register s2) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(stcsr_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stcsr( int crd, Register s1, Register s2) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(stcsr_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stcsr( int crd, Register s1, int simm13a) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(stcsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
-inline void Assembler::stdcq( int crd, Register s1, Register s2) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::stdcq( int crd, Register s1, Register s2) { v8_only(); emit_int32( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::stdcq( int crd, Register s1, int simm13a) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
// pp 231
-inline void Assembler::swap( Register s1, Register s2, Register d) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); }
+inline void Assembler::swap( Register s1, Register s2, Register d) { v9_dep(); emit_int32( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); }
inline void Assembler::swap( Register s1, int simm13a, Register d) { v9_dep(); emit_data( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
#endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
--- a/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -137,7 +137,7 @@
}
__ ret(); // return from interpreter activation
__ delayed()->restore(I5_savedSP, G0, SP); // remove interpreter frame
- NOT_PRODUCT(__ emit_long(0);) // marker for disassembly
+ NOT_PRODUCT(__ emit_int32(0);) // marker for disassembly
return entry;
}
@@ -232,7 +232,7 @@
}
__ retl(); // return from interpreter activation
__ delayed()->nop(); // schedule this better
- NOT_PRODUCT(__ emit_long(0);) // marker for disassembly
+ NOT_PRODUCT(__ emit_int32(0);) // marker for disassembly
return entry;
}
@@ -1473,7 +1473,7 @@
__ brx(Assembler::equal, false, Assembler::pt, skip); \
__ delayed()->nop(); \
__ breakpoint_trap(); \
- __ emit_long(marker); \
+ __ emit_int32(marker); \
__ bind(skip); \
}
#else
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -1224,7 +1224,7 @@
// Relocation with special format (see relocInfo_sparc.hpp).
relocate(rspec, 1);
// Assembler::sethi(0x3fffff, d);
- emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
+ emit_int32( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
// Don't add relocation for 'add'. Do patching during 'sethi' processing.
add(d, 0x3ff, d);
@@ -1240,7 +1240,7 @@
// Relocation with special format (see relocInfo_sparc.hpp).
relocate(rspec, 1);
// Assembler::sethi(encoded_k, d);
- emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) );
+ emit_int32( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) );
// Don't add relocation for 'add'. Do patching during 'sethi' processing.
add(d, low10(encoded_k), d);
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -259,7 +259,7 @@
}
__ ret(); // return from interpreter activation
__ delayed()->restore(I5_savedSP, G0, SP); // remove interpreter frame
- NOT_PRODUCT(__ emit_long(0);) // marker for disassembly
+ NOT_PRODUCT(__ emit_int32(0);) // marker for disassembly
return entry;
}
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -182,7 +182,7 @@
// make this go away someday
void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
if (rtype == relocInfo::none)
- emit_long(data);
+ emit_int32(data);
else emit_data(data, Relocation::spec_simple(rtype), format);
}
@@ -202,7 +202,7 @@
else
code_section()->relocate(inst_mark(), rspec, format);
}
- emit_long(data);
+ emit_int32(data);
}
static int encode(Register r) {
@@ -243,7 +243,7 @@
} else {
emit_int8(op1);
emit_int8(op2 | encode(dst));
- emit_long(imm32);
+ emit_int32(imm32);
}
}
@@ -254,7 +254,7 @@
assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
emit_int8(op1);
emit_int8(op2 | encode(dst));
- emit_long(imm32);
+ emit_int32(imm32);
}
// immediate-to-memory forms
@@ -268,7 +268,7 @@
} else {
emit_int8(op1);
emit_operand(rm, adr, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
}
@@ -976,7 +976,7 @@
emit_int8(0x1F);
emit_int8((unsigned char)0x80);
// emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
- emit_long(0); // 32-bits offset (4 bytes)
+ emit_int32(0); // 32-bits offset (4 bytes)
}
void Assembler::addr_nop_8() {
@@ -987,7 +987,7 @@
emit_int8((unsigned char)0x84);
// emit_rm(cbuf, 0x2, EAX_enc, 0x4);
emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
- emit_long(0); // 32-bits offset (4 bytes)
+ emit_int32(0); // 32-bits offset (4 bytes)
}
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
@@ -1076,7 +1076,7 @@
prefix(dst);
emit_int8((unsigned char)0x81);
emit_operand(rsp, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::andl(Register dst, int32_t imm32) {
@@ -1204,7 +1204,7 @@
prefix(dst);
emit_int8((unsigned char)0x81);
emit_operand(rdi, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::cmpl(Register dst, int32_t imm32) {
@@ -1408,7 +1408,7 @@
} else {
emit_int8(0x69);
emit_int8((unsigned char)(0xC0 | encode));
- emit_long(value);
+ emit_int32(value);
}
}
@@ -1440,7 +1440,7 @@
"must be 32bit offset (call4)");
emit_int8(0x0F);
emit_int8((unsigned char)(0x80 | cc));
- emit_long(offs - long_size);
+ emit_int32(offs - long_size);
}
} else {
// Note: could eliminate cond. jumps to this jump if condition
@@ -1450,7 +1450,7 @@
L.add_patch_at(code(), locator());
emit_int8(0x0F);
emit_int8((unsigned char)(0x80 | cc));
- emit_long(0);
+ emit_int32(0);
}
}
@@ -1498,7 +1498,7 @@
emit_int8((offs - short_size) & 0xFF);
} else {
emit_int8((unsigned char)0xE9);
- emit_long(offs - long_size);
+ emit_int32(offs - long_size);
}
} else {
// By default, forward jumps are always 32-bit displacements, since
@@ -1508,7 +1508,7 @@
InstructionMark im(this);
L.add_patch_at(code(), locator());
emit_int8((unsigned char)0xE9);
- emit_long(0);
+ emit_int32(0);
}
}
@@ -1732,7 +1732,7 @@
void Assembler::movl(Register dst, int32_t imm32) {
int encode = prefix_and_encode(dst->encoding());
emit_int8((unsigned char)(0xB8 | encode));
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::movl(Register dst, Register src) {
@@ -1753,7 +1753,7 @@
prefix(dst);
emit_int8((unsigned char)0xC7);
emit_operand(rax, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::movl(Address dst, Register src) {
@@ -2468,6 +2468,26 @@
emit_int8((unsigned char)(0xC0 | encode));
}
+void Assembler::vptest(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ assert(dst != xnoreg, "sanity");
+ int dst_enc = dst->encoding();
+ // swap src<->dst for encoding
+ vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+ emit_int8(0x17);
+ emit_operand(dst, src);
+}
+
+void Assembler::vptest(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ bool vector256 = true;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ emit_int8(0x17);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -2499,7 +2519,7 @@
// in 64bits we push 64bits onto the stack but only
// take a 32bit immediate
emit_int8(0x68);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::push(Register src) {
@@ -2544,12 +2564,18 @@
emit_int8((unsigned char)0xA5);
}
+// sets rcx bytes with rax, value at [edi]
+void Assembler::rep_stosb() {
+ emit_int8((unsigned char)0xF3); // REP
+ LP64_ONLY(prefix(REX_W));
+ emit_int8((unsigned char)0xAA); // STOSB
+}
+
// sets rcx pointer sized words with rax, value at [edi]
// generic
-void Assembler::rep_set() { // rep_set
- emit_int8((unsigned char)0xF3);
- // STOSQ
- LP64_ONLY(prefix(REX_W));
+void Assembler::rep_stos() {
+ emit_int8((unsigned char)0xF3); // REP
+ LP64_ONLY(prefix(REX_W)); // LP64:STOSQ, LP32:STOSD
emit_int8((unsigned char)0xAB);
}
@@ -2785,7 +2811,7 @@
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
}
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::testl(Register dst, Register src) {
@@ -3650,6 +3676,15 @@
emit_int8(0x01);
}
+// duplicate 4-bytes integer data from src into 8 locations in dest
+void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_avx2(), "");
+ bool vector256 = true;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+ emit_int8(0x58);
+ emit_int8((unsigned char)(0xC0 | encode));
+}
+
void Assembler::vzeroupper() {
assert(VM_Version::supports_avx(), "");
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
@@ -4720,7 +4755,7 @@
prefixq(dst);
emit_int8((unsigned char)0x81);
emit_operand(rsp, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::andq(Register dst, int32_t imm32) {
@@ -4793,7 +4828,7 @@
prefixq(dst);
emit_int8((unsigned char)0x81);
emit_operand(rdi, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::cmpq(Register dst, int32_t imm32) {
@@ -4932,7 +4967,7 @@
} else {
emit_int8(0x69);
emit_int8((unsigned char)(0xC0 | encode));
- emit_long(value);
+ emit_int32(value);
}
}
@@ -5085,7 +5120,7 @@
InstructionMark im(this);
int encode = prefixq_and_encode(dst->encoding());
emit_int8((unsigned char)(0xC7 | encode));
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::movslq(Address dst, int32_t imm32) {
@@ -5094,7 +5129,7 @@
prefixq(dst);
emit_int8((unsigned char)0xC7);
emit_operand(rax, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::movslq(Register dst, Address src) {
@@ -5172,7 +5207,7 @@
prefixq(dst);
emit_int8((unsigned char)0x81);
emit_operand(rcx, dst, 4);
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::orq(Register dst, int32_t imm32) {
@@ -5407,7 +5442,7 @@
emit_int8((unsigned char)0xF7);
emit_int8((unsigned char)(0xC0 | encode));
}
- emit_long(imm32);
+ emit_int32(imm32);
}
void Assembler::testq(Register dst, Register src) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -832,7 +832,8 @@
// These do register sized moves/scans
void rep_mov();
- void rep_set();
+ void rep_stos();
+ void rep_stosb();
void repne_scan();
#ifdef _LP64
void repne_scanl();
@@ -1443,9 +1444,12 @@
// Shift Right by bytes Logical DoubleQuadword Immediate
void psrldq(XMMRegister dst, int shift);
- // Logical Compare Double Quadword
+ // Logical Compare 128bit
void ptest(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, Address src);
+ // Logical Compare 256bit
+ void vptest(XMMRegister dst, XMMRegister src);
+ void vptest(XMMRegister dst, Address src);
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
@@ -1753,6 +1757,9 @@
void vextractf128h(Address dst, XMMRegister src);
void vextracti128h(Address dst, XMMRegister src);
+ // duplicate 4-bytes integer data from src into 8 locations in dest
+ void vpbroadcastd(XMMRegister dst, XMMRegister src);
+
// AVX instruction which is used to clear upper 128 bits of YMM registers and
// to avoid transaction penalty between AVX and SSE states. There is no
// penalty if legacy SSE instructions are encoded using VEX prefix because
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -120,6 +120,9 @@
product(bool, UseUnalignedLoadStores, false, \
"Use SSE2 MOVDQU instruction for Arraycopy") \
\
+ product(bool, UseFastStosb, false, \
+ "Use fast-string operation for zeroing: rep stosb") \
+ \
/* assembler */ \
product(bool, Use486InstrsOnly, false, \
"Use 80486 Compliant instruction subset") \
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -2540,7 +2540,7 @@
// 0000 1111 1000 tttn #32-bit disp
emit_int8(0x0F);
emit_int8((unsigned char)(0x80 | cc));
- emit_long(offs - long_size);
+ emit_int32(offs - long_size);
}
} else {
#ifdef ASSERT
@@ -5224,6 +5224,22 @@
}
+void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
+ // cnt - number of qwords (8-byte words).
+ // base - start address, qword aligned.
+ assert(base==rdi, "base register must be edi for rep stos");
+ assert(tmp==rax, "tmp register must be eax for rep stos");
+ assert(cnt==rcx, "cnt register must be ecx for rep stos");
+
+ xorptr(tmp, tmp);
+ if (UseFastStosb) {
+ shlptr(cnt,3); // convert to number of bytes
+ rep_stosb();
+ } else {
+ NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
+ rep_stos();
+ }
+}
// IndexOf for constant substrings with size >= 8 chars
// which don't need to be loaded through stack.
@@ -5659,42 +5675,114 @@
testl(cnt2, cnt2);
jcc(Assembler::zero, LENGTH_DIFF_LABEL);
- // Load first characters
+ // Compare first characters
load_unsigned_short(result, Address(str1, 0));
load_unsigned_short(cnt1, Address(str2, 0));
-
- // Compare first characters
subl(result, cnt1);
jcc(Assembler::notZero, POP_LABEL);
- decrementl(cnt2);
- jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
- {
- // Check after comparing first character to see if strings are equivalent
- Label LSkip2;
- // Check if the strings start at same location
- cmpptr(str1, str2);
- jccb(Assembler::notEqual, LSkip2);
-
- // Check if the length difference is zero (from stack)
- cmpl(Address(rsp, 0), 0x0);
- jcc(Assembler::equal, LENGTH_DIFF_LABEL);
-
- // Strings might not be equivalent
- bind(LSkip2);
- }
+ cmpl(cnt2, 1);
+ jcc(Assembler::equal, LENGTH_DIFF_LABEL);
+
+ // Check if the strings start at the same location.
+ cmpptr(str1, str2);
+ jcc(Assembler::equal, LENGTH_DIFF_LABEL);
Address::ScaleFactor scale = Address::times_2;
int stride = 8;
- // Advance to next element
- addptr(str1, 16/stride);
- addptr(str2, 16/stride);
-
- if (UseSSE42Intrinsics) {
+ if (UseAVX >= 2) {
+ Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
+ Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
+ Label COMPARE_TAIL_LONG;
+ int pcmpmask = 0x19;
+
+ // Setup to compare 16-chars (32-bytes) vectors,
+ // start from first character again because it has aligned address.
+ int stride2 = 16;
+ int adr_stride = stride << scale;
+ int adr_stride2 = stride2 << scale;
+
+ assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
+ // rax and rdx are used by pcmpestri as elements counters
+ movl(result, cnt2);
+ andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count
+ jcc(Assembler::zero, COMPARE_TAIL_LONG);
+
+ // fast path : compare first 2 8-char vectors.
+ bind(COMPARE_16_CHARS);
+ movdqu(vec1, Address(str1, 0));
+ pcmpestri(vec1, Address(str2, 0), pcmpmask);
+ jccb(Assembler::below, COMPARE_INDEX_CHAR);
+
+ movdqu(vec1, Address(str1, adr_stride));
+ pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
+ jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
+ addl(cnt1, stride);
+
+ // Compare the characters at index in cnt1
+ bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
+ load_unsigned_short(result, Address(str1, cnt1, scale));
+ load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+ subl(result, cnt2);
+ jmp(POP_LABEL);
+
+ // Setup the registers to start vector comparison loop
+ bind(COMPARE_WIDE_VECTORS);
+ lea(str1, Address(str1, result, scale));
+ lea(str2, Address(str2, result, scale));
+ subl(result, stride2);
+ subl(cnt2, stride2);
+ jccb(Assembler::zero, COMPARE_WIDE_TAIL);
+ negptr(result);
+
+ // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
+ bind(COMPARE_WIDE_VECTORS_LOOP);
+ vmovdqu(vec1, Address(str1, result, scale));
+ vpxor(vec1, Address(str2, result, scale));
+ vptest(vec1, vec1);
+ jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
+ addptr(result, stride2);
+ subl(cnt2, stride2);
+ jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
+
+ // compare wide vectors tail
+ bind(COMPARE_WIDE_TAIL);
+ testptr(result, result);
+ jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+
+ movl(result, stride2);
+ movl(cnt2, result);
+ negptr(result);
+ jmpb(COMPARE_WIDE_VECTORS_LOOP);
+
+ // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
+ bind(VECTOR_NOT_EQUAL);
+ lea(str1, Address(str1, result, scale));
+ lea(str2, Address(str2, result, scale));
+ jmp(COMPARE_16_CHARS);
+
+ // Compare tail chars, length between 1 to 15 chars
+ bind(COMPARE_TAIL_LONG);
+ movl(cnt2, result);
+ cmpl(cnt2, stride);
+ jccb(Assembler::less, COMPARE_SMALL_STR);
+
+ movdqu(vec1, Address(str1, 0));
+ pcmpestri(vec1, Address(str2, 0), pcmpmask);
+ jcc(Assembler::below, COMPARE_INDEX_CHAR);
+ subptr(cnt2, stride);
+ jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+ lea(str1, Address(str1, result, scale));
+ lea(str2, Address(str2, result, scale));
+ negptr(cnt2);
+ jmpb(WHILE_HEAD_LABEL);
+
+ bind(COMPARE_SMALL_STR);
+ } else if (UseSSE42Intrinsics) {
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
int pcmpmask = 0x19;
- // Setup to compare 16-byte vectors
+ // Setup to compare 8-char (16-byte) vectors,
+ // start from first character again because it has aligned address.
movl(result, cnt2);
andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
jccb(Assembler::zero, COMPARE_TAIL);
@@ -5726,7 +5814,7 @@
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
// compare wide vectors tail
- testl(result, result);
+ testptr(result, result);
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
movl(cnt2, stride);
@@ -5738,21 +5826,20 @@
// Mismatched characters in the vectors
bind(VECTOR_NOT_EQUAL);
- addptr(result, cnt1);
- movptr(cnt2, result);
- load_unsigned_short(result, Address(str1, cnt2, scale));
- load_unsigned_short(cnt1, Address(str2, cnt2, scale));
- subl(result, cnt1);
+ addptr(cnt1, result);
+ load_unsigned_short(result, Address(str1, cnt1, scale));
+ load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+ subl(result, cnt2);
jmpb(POP_LABEL);
bind(COMPARE_TAIL); // limit is zero
movl(cnt2, result);
// Fallthru to tail compare
}
-
// Shift str2 and str1 to the end of the arrays, negate min
- lea(str1, Address(str1, cnt2, scale, 0));
- lea(str2, Address(str2, cnt2, scale, 0));
+ lea(str1, Address(str1, cnt2, scale));
+ lea(str2, Address(str2, cnt2, scale));
+ decrementl(cnt2); // first character was compared already
negptr(cnt2);
// Compare the rest of the elements
@@ -5817,7 +5904,44 @@
shll(limit, 1); // byte count != 0
movl(result, limit); // copy
- if (UseSSE42Intrinsics) {
+ if (UseAVX >= 2) {
+ // With AVX2, use 32-byte vector compare
+ Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+ // Compare 32-byte vectors
+ andl(result, 0x0000001e); // tail count (in bytes)
+ andl(limit, 0xffffffe0); // vector count (in bytes)
+ jccb(Assembler::zero, COMPARE_TAIL);
+
+ lea(ary1, Address(ary1, limit, Address::times_1));
+ lea(ary2, Address(ary2, limit, Address::times_1));
+ negptr(limit);
+
+ bind(COMPARE_WIDE_VECTORS);
+ vmovdqu(vec1, Address(ary1, limit, Address::times_1));
+ vmovdqu(vec2, Address(ary2, limit, Address::times_1));
+ vpxor(vec1, vec2);
+
+ vptest(vec1, vec1);
+ jccb(Assembler::notZero, FALSE_LABEL);
+ addptr(limit, 32);
+ jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+ testl(result, result);
+ jccb(Assembler::zero, TRUE_LABEL);
+
+ vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
+ vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
+ vpxor(vec1, vec2);
+
+ vptest(vec1, vec1);
+ jccb(Assembler::notZero, FALSE_LABEL);
+ jmpb(TRUE_LABEL);
+
+ bind(COMPARE_TAIL); // limit is zero
+ movl(limit, result);
+ // Fallthru to tail compare
+ } else if (UseSSE42Intrinsics) {
// With SSE4.2, use double quad vector compare
Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
@@ -5995,29 +6119,53 @@
{
assert( UseSSE >= 2, "supported cpu only" );
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
- // Fill 32-byte chunks
movdl(xtmp, value);
- pshufd(xtmp, xtmp, 0);
-
- subl(count, 8 << shift);
- jcc(Assembler::less, L_check_fill_8_bytes);
- align(16);
-
- BIND(L_fill_32_bytes_loop);
-
- if (UseUnalignedLoadStores) {
- movdqu(Address(to, 0), xtmp);
- movdqu(Address(to, 16), xtmp);
+ if (UseAVX >= 2 && UseUnalignedLoadStores) {
+ // Fill 64-byte chunks
+ Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
+ vpbroadcastd(xtmp, xtmp);
+
+ subl(count, 16 << shift);
+ jcc(Assembler::less, L_check_fill_32_bytes);
+ align(16);
+
+ BIND(L_fill_64_bytes_loop);
+ vmovdqu(Address(to, 0), xtmp);
+ vmovdqu(Address(to, 32), xtmp);
+ addptr(to, 64);
+ subl(count, 16 << shift);
+ jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
+
+ BIND(L_check_fill_32_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::less, L_check_fill_8_bytes);
+ vmovdqu(Address(to, 0), xtmp);
+ addptr(to, 32);
+ subl(count, 8 << shift);
} else {
- movq(Address(to, 0), xtmp);
- movq(Address(to, 8), xtmp);
- movq(Address(to, 16), xtmp);
- movq(Address(to, 24), xtmp);
+ // Fill 32-byte chunks
+ pshufd(xtmp, xtmp, 0);
+
+ subl(count, 8 << shift);
+ jcc(Assembler::less, L_check_fill_8_bytes);
+ align(16);
+
+ BIND(L_fill_32_bytes_loop);
+
+ if (UseUnalignedLoadStores) {
+ movdqu(Address(to, 0), xtmp);
+ movdqu(Address(to, 16), xtmp);
+ } else {
+ movq(Address(to, 0), xtmp);
+ movq(Address(to, 8), xtmp);
+ movq(Address(to, 16), xtmp);
+ movq(Address(to, 24), xtmp);
+ }
+
+ addptr(to, 32);
+ subl(count, 8 << shift);
+ jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
}
-
- addptr(to, 32);
- subl(count, 8 << shift);
- jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
BIND(L_check_fill_8_bytes);
addl(count, 8 << shift);
jccb(Assembler::zero, L_exit);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -1011,6 +1011,10 @@
Assembler::vxorpd(dst, nds, src, vector256);
}
+ // Simple version for AVX2 256bit vectors
+ void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
+ void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
+
// Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
if (UseAVX > 1) // vinserti128h is available only in AVX2
@@ -1096,6 +1100,9 @@
// C2 compiled method's prolog code.
void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
+ // clear memory of size 'cnt' qwords, starting at 'base'.
+ void clear_mem(Register base, Register cnt, Register rtmp);
+
// IndexOf strings.
// Small strings are loaded through stack if they cross page boundary.
void string_indexof(Register str1, Register str2,
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -796,16 +796,22 @@
__ align(OptoLoopAlignment);
__ BIND(L_copy_64_bytes_loop);
- if(UseUnalignedLoadStores) {
- __ movdqu(xmm0, Address(from, 0));
- __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
- __ movdqu(xmm1, Address(from, 16));
- __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
- __ movdqu(xmm2, Address(from, 32));
- __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
- __ movdqu(xmm3, Address(from, 48));
- __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
-
+ if (UseUnalignedLoadStores) {
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0, Address(from, 0));
+ __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
+ __ vmovdqu(xmm1, Address(from, 32));
+ __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
+ } else {
+ __ movdqu(xmm0, Address(from, 0));
+ __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
+ __ movdqu(xmm1, Address(from, 16));
+ __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
+ __ movdqu(xmm2, Address(from, 32));
+ __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
+ __ movdqu(xmm3, Address(from, 48));
+ __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
+ }
} else {
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1, 0), xmm0);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -1286,23 +1286,54 @@
// end_to - destination array end address
// qword_count - 64-bits element count, negative
// to - scratch
- // L_copy_32_bytes - entry label
+ // L_copy_bytes - entry label
// L_copy_8_bytes - exit label
//
- void copy_32_bytes_forward(Register end_from, Register end_to,
+ void copy_bytes_forward(Register end_from, Register end_to,
Register qword_count, Register to,
- Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
+ Label& L_copy_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(OptoLoopAlignment);
- __ BIND(L_loop);
- if(UseUnalignedLoadStores) {
- __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
- __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
- __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
- __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
-
+ if (UseUnalignedLoadStores) {
+ Label L_end;
+ // Copy 64-bytes per iteration
+ __ BIND(L_loop);
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+ __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+ __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
+ __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
+ } else {
+ __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+ __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
+ __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
+ __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
+ }
+ __ BIND(L_copy_bytes);
+ __ addptr(qword_count, 8);
+ __ jcc(Assembler::lessEqual, L_loop);
+ __ subptr(qword_count, 4); // sub(8) and add(4)
+ __ jccb(Assembler::greater, L_end);
+ // Copy trailing 32 bytes
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+ __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+ } else {
+ __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+ __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+ }
+ __ addptr(qword_count, 4);
+ __ BIND(L_end);
} else {
+ // Copy 32-bytes per iteration
+ __ BIND(L_loop);
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
@@ -1311,15 +1342,15 @@
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+
+ __ BIND(L_copy_bytes);
+ __ addptr(qword_count, 4);
+ __ jcc(Assembler::lessEqual, L_loop);
}
- __ BIND(L_copy_32_bytes);
- __ addptr(qword_count, 4);
- __ jcc(Assembler::lessEqual, L_loop);
__ subptr(qword_count, 4);
__ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
}
-
// Copy big chunks backward
//
// Inputs:
@@ -1327,23 +1358,55 @@
// dest - destination array address
// qword_count - 64-bits element count
// to - scratch
- // L_copy_32_bytes - entry label
+ // L_copy_bytes - entry label
// L_copy_8_bytes - exit label
//
- void copy_32_bytes_backward(Register from, Register dest,
+ void copy_bytes_backward(Register from, Register dest,
Register qword_count, Register to,
- Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
+ Label& L_copy_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(OptoLoopAlignment);
- __ BIND(L_loop);
- if(UseUnalignedLoadStores) {
- __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
- __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
- __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
- __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
-
+ if (UseUnalignedLoadStores) {
+ Label L_end;
+ // Copy 64-bytes per iteration
+ __ BIND(L_loop);
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
+ __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
+ } else {
+ __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
+ __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
+ __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
+ __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
+ }
+ __ BIND(L_copy_bytes);
+ __ subptr(qword_count, 8);
+ __ jcc(Assembler::greaterEqual, L_loop);
+
+ __ addptr(qword_count, 4); // add(8) and sub(4)
+ __ jccb(Assembler::less, L_end);
+ // Copy trailing 32 bytes
+ if (UseAVX >= 2) {
+ __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
+ } else {
+ __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
+ __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
+ }
+ __ subptr(qword_count, 4);
+ __ BIND(L_end);
} else {
+ // Copy 32-bytes per iteration
+ __ BIND(L_loop);
__ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
@@ -1352,10 +1415,11 @@
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
+
+ __ BIND(L_copy_bytes);
+ __ subptr(qword_count, 4);
+ __ jcc(Assembler::greaterEqual, L_loop);
}
- __ BIND(L_copy_32_bytes);
- __ subptr(qword_count, 4);
- __ jcc(Assembler::greaterEqual, L_loop);
__ addptr(qword_count, 4);
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
}
@@ -1385,7 +1449,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
+ Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
Label L_copy_byte, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
@@ -1417,7 +1481,7 @@
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count); // make the count negative
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -1460,8 +1524,8 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
- // Copy in 32-bytes chunks
- copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
@@ -1488,7 +1552,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
+ Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
@@ -1531,10 +1595,10 @@
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
__ testl(byte_count, 4);
- __ jcc(Assembler::zero, L_copy_32_bytes);
+ __ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -1549,8 +1613,8 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
- // Copy in 32-bytes chunks
- copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
@@ -1585,7 +1649,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
+ Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
@@ -1616,7 +1680,7 @@
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -1652,8 +1716,8 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
- // Copy in 32-bytes chunks
- copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
@@ -1700,7 +1764,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes;
+ Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
@@ -1735,10 +1799,10 @@
// Check for and copy trailing dword
__ BIND(L_copy_4_bytes);
__ testl(word_count, 2);
- __ jcc(Assembler::zero, L_copy_32_bytes);
+ __ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -1753,8 +1817,8 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
- // Copy in 32-bytes chunks
- copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
@@ -1790,7 +1854,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
+ Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
@@ -1826,7 +1890,7 @@
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -1853,8 +1917,8 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
- // Copy 32-bytes chunks
- copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
@@ -1882,7 +1946,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
+ Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
@@ -1916,10 +1980,10 @@
// Check for and copy trailing dword
__ testl(dword_count, 1);
- __ jcc(Assembler::zero, L_copy_32_bytes);
+ __ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, dword_count, Address::times_4, -4));
__ movl(Address(to, dword_count, Address::times_4, -4), rax);
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -1937,8 +2001,8 @@
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
- // Copy in 32-bytes chunks
- copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ bind(L_exit);
if (is_oop) {
@@ -1976,7 +2040,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
+ Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register qword_count = rdx; // elements count
@@ -2008,7 +2072,7 @@
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -2027,8 +2091,8 @@
__ ret(0);
}
- // Copy 64-byte chunks
- copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
@@ -2065,7 +2129,7 @@
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
- Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
+ Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register qword_count = rdx; // elements count
@@ -2091,7 +2155,7 @@
gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
}
- __ jmp(L_copy_32_bytes);
+ __ jmp(L_copy_bytes);
// Copy trailing qwords
__ BIND(L_copy_8_bytes);
@@ -2110,8 +2174,8 @@
__ ret(0);
}
- // Copy in 32-bytes chunks
- copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+ // Copy in multi-bytes chunks
+ copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -429,7 +429,7 @@
}
char buf[256];
- jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@@ -446,6 +446,7 @@
(supports_avx() ? ", avx" : ""),
(supports_avx2() ? ", avx2" : ""),
(supports_aes() ? ", aes" : ""),
+ (supports_erms() ? ", erms" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
(supports_lzcnt() ? ", lzcnt": ""),
@@ -671,6 +672,16 @@
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
}
+ // Use fast-string operations if available.
+ if (supports_erms()) {
+ if (FLAG_IS_DEFAULT(UseFastStosb)) {
+ UseFastStosb = true;
+ }
+ } else if (UseFastStosb) {
+ warning("fast-string operations are not available on this CPU");
+ FLAG_SET_DEFAULT(UseFastStosb, false);
+ }
+
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(AlignVector)) {
// Modern processors allow misaligned memory operations for vectors.
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -204,7 +204,8 @@
avx2 : 1,
: 2,
bmi2 : 1,
- : 23;
+ erms : 1,
+ : 22;
} bits;
};
@@ -247,7 +248,8 @@
CPU_TSCINV = (1 << 16),
CPU_AVX = (1 << 17),
CPU_AVX2 = (1 << 18),
- CPU_AES = (1 << 19)
+ CPU_AES = (1 << 19),
+ CPU_ERMS = (1 << 20) // enhanced 'rep movsb/stosb' instructions
} cpuFeatureFlags;
enum {
@@ -425,6 +427,8 @@
result |= CPU_TSCINV;
if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
result |= CPU_AES;
+ if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
+ result |= CPU_ERMS;
// AMD features.
if (is_amd()) {
@@ -489,7 +493,7 @@
return (_cpuid_info.std_max_function >= 0xB) &&
// eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
// Some cpus have max cpuid >= 0xB but do not support processor topology.
- ((_cpuid_info.tpl_cpuidB0_eax & 0x1f | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
+ (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
}
static uint cores_per_cpu() {
@@ -550,6 +554,7 @@
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
+ static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
// Intel features
static bool is_intel_family_core() { return is_intel() &&
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Thu Jan 10 10:00:43 2013 -0800
@@ -11572,15 +11572,28 @@
// =======================================================================
// fast clearing of an array
instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+ predicate(!UseFastStosb);
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
- format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
- "XOR EAX,EAX\n\t"
+ format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
+ "SHL ECX,1\t# Convert doublewords to words\n\t"
"REP STOS\t# store EAX into [EDI++] while ECX--" %}
- opcode(0,0x4);
- ins_encode( Opcode(0xD1), RegOpc(ECX),
- OpcRegReg(0x33,EAX,EAX),
- Opcode(0xF3), Opcode(0xAB) );
+ ins_encode %{
+ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+ predicate(UseFastStosb);
+ match(Set dummy (ClearArray cnt base));
+ effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+ format %{ "XOR EAX,EAX\t# ClearArray:\n\t"
+ "SHL ECX,3\t# Convert doublewords to bytes\n\t"
+ "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
+ ins_encode %{
+ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+ %}
ins_pipe( pipe_slow );
%}
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Thu Jan 10 10:00:43 2013 -0800
@@ -10374,16 +10374,33 @@
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
rFlagsReg cr)
%{
+ predicate(!UseFastStosb);
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
- format %{ "xorl rax, rax\t# ClearArray:\n\t"
- "rep stosq\t# Store rax to *rdi++ while rcx--" %}
- ins_encode(opc_reg_reg(0x33, RAX, RAX), // xorl %eax, %eax
- Opcode(0xF3), Opcode(0x48), Opcode(0xAB)); // rep REX_W stos
+ format %{ "xorq rax, rax\t# ClearArray:\n\t"
+ "rep stosq\t# Store rax to *rdi++ while rcx--" %}
+ ins_encode %{
+ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+ %}
ins_pipe(pipe_slow);
%}
+instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
+ rFlagsReg cr)
+%{
+ predicate(UseFastStosb);
+ match(Set dummy (ClearArray cnt base));
+ effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+ format %{ "xorq rax, rax\t# ClearArray:\n\t"
+ "shlq rcx,3\t# Convert doublewords to bytes\n\t"
+ "rep stosb\t# Store rax to *rdi++ while rcx--" %}
+ ins_encode %{
+ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, regD tmp1, rFlagsReg cr)
%{
--- a/hotspot/src/share/vm/asm/assembler.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/asm/assembler.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -216,8 +216,6 @@
bool isByte(int x) const { return 0 <= x && x < 0x100; }
bool isShiftCount(int x) const { return 0 <= x && x < 32; }
- void emit_long(jint x) { emit_int32(x); } // deprecated
-
// Instruction boundaries (required when emitting relocatable values).
class InstructionMark: public StackObj {
private:
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -46,7 +46,8 @@
_method(callee),
_site_invoke_ratio(site_invoke_ratio),
_max_inline_level(max_inline_level),
- _count_inline_bcs(method()->code_size_for_inlining())
+ _count_inline_bcs(method()->code_size_for_inlining()),
+ _subtrees(c->comp_arena(), 2, 0, NULL)
{
NOT_PRODUCT(_count_inlines = 0;)
if (_caller_jvms != NULL) {
@@ -209,16 +210,18 @@
if ( callee_method->dont_inline()) return "don't inline by annotation";
if ( callee_method->has_unloaded_classes_in_signature()) return "unloaded signature classes";
- if (callee_method->force_inline() || callee_method->should_inline()) {
+ if (callee_method->should_inline()) {
// ignore heuristic controls on inlining
return NULL;
}
// Now perform checks which are heuristic
- if (callee_method->has_compiled_code() &&
- callee_method->instructions_size() > InlineSmallCode) {
+ if (!callee_method->force_inline()) {
+ if (callee_method->has_compiled_code() &&
+ callee_method->instructions_size() > InlineSmallCode) {
return "already compiled into a big method";
+ }
}
// don't inline exception code unless the top method belongs to an
@@ -277,12 +280,15 @@
//-----------------------------try_to_inline-----------------------------------
// return NULL if ok, reason for not inlining otherwise
// Relocated from "InliningClosure::try_to_inline"
-const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) {
-
+const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay) {
// Old algorithm had funny accumulating BC-size counters
if (UseOldInlining && ClipInlining
&& (int)count_inline_bcs() >= DesiredMethodLimit) {
- return "size > DesiredMethodLimit";
+ if (!callee_method->force_inline() || !IncrementalInline) {
+ return "size > DesiredMethodLimit";
+ } else if (!C->inlining_incrementally()) {
+ should_delay = true;
+ }
}
const char *msg = NULL;
@@ -303,8 +309,13 @@
if (callee_method->code_size() > MaxTrivialSize) {
// don't inline into giant methods
- if (C->unique() > (uint)NodeCountInliningCutoff) {
- return "NodeCountInliningCutoff";
+ if (C->over_inlining_cutoff()) {
+ if ((!callee_method->force_inline() && !caller_method->is_compiled_lambda_form())
+ || !IncrementalInline) {
+ return "NodeCountInliningCutoff";
+ } else {
+ should_delay = true;
+ }
}
if ((!UseInterpreter || CompileTheWorld) &&
@@ -323,7 +334,11 @@
return "not an accessor";
}
if (inline_level() > _max_inline_level) {
- return "inlining too deep";
+ if (!callee_method->force_inline() || !IncrementalInline) {
+ return "inlining too deep";
+ } else if (!C->inlining_incrementally()) {
+ should_delay = true;
+ }
}
// detect direct and indirect recursive inlining
@@ -348,7 +363,11 @@
if (UseOldInlining && ClipInlining
&& (int)count_inline_bcs() + size >= DesiredMethodLimit) {
- return "size > DesiredMethodLimit";
+ if (!callee_method->force_inline() || !IncrementalInline) {
+ return "size > DesiredMethodLimit";
+ } else if (!C->inlining_incrementally()) {
+ should_delay = true;
+ }
}
// ok, inline this method
@@ -413,8 +432,9 @@
}
//------------------------------ok_to_inline-----------------------------------
-WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
+WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci, bool& should_delay) {
assert(callee_method != NULL, "caller checks for optimized virtual!");
+ assert(!should_delay, "should be initialized to false");
#ifdef ASSERT
// Make sure the incoming jvms has the same information content as me.
// This means that we can eventually make this whole class AllStatic.
@@ -444,7 +464,7 @@
// Check if inlining policy says no.
WarmCallInfo wci = *(initial_wci);
- failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci);
+ failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci, should_delay);
if (failure_msg != NULL && C->log() != NULL) {
C->log()->inline_fail(failure_msg);
}
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -606,6 +606,16 @@
\
develop(bool, VerifyAliases, false, \
"perform extra checks on the results of alias analysis") \
+ \
+ product(bool, IncrementalInline, true, \
+ "do post parse inlining") \
+ \
+ develop(bool, AlwaysIncrementalInline, false, \
+ "do all inlining incrementally") \
+ \
+ product(intx, LiveNodeCountInliningCutoff, 20000, \
+ "max number of live nodes in a method") \
+
C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/hotspot/src/share/vm/opto/callGenerator.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -262,8 +262,11 @@
// Allow inlining decisions to be delayed
class LateInlineCallGenerator : public DirectCallGenerator {
+ protected:
CallGenerator* _inline_cg;
+ virtual bool do_late_inline_check(JVMState* jvms) { return true; }
+
public:
LateInlineCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
DirectCallGenerator(method, true), _inline_cg(inline_cg) {}
@@ -279,7 +282,9 @@
// Record that this call site should be revisited once the main
// parse is finished.
- Compile::current()->add_late_inline(this);
+ if (!is_mh_late_inline()) {
+ C->add_late_inline(this);
+ }
// Emit the CallStaticJava and request separate projections so
// that the late inlining logic can distinguish between fall
@@ -287,15 +292,34 @@
// as is done for allocations and macro expansion.
return DirectCallGenerator::generate(jvms);
}
+
+ virtual void print_inlining_late(const char* msg) {
+ CallNode* call = call_node();
+ Compile* C = Compile::current();
+ C->print_inlining_insert(this);
+ C->print_inlining(method(), call->jvms()->depth()-1, call->jvms()->bci(), msg);
+ }
+
};
-
void LateInlineCallGenerator::do_late_inline() {
// Can't inline it
if (call_node() == NULL || call_node()->outcnt() == 0 ||
call_node()->in(0) == NULL || call_node()->in(0)->is_top())
return;
+ for (int i1 = 0; i1 < method()->arg_size(); i1++) {
+ if (call_node()->in(TypeFunc::Parms + i1)->is_top()) {
+ assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
+ return;
+ }
+ }
+
+ if (call_node()->in(TypeFunc::Memory)->is_top()) {
+ assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
+ return;
+ }
+
CallStaticJavaNode* call = call_node();
// Make a clone of the JVMState that appropriate to use for driving a parse
@@ -324,6 +348,11 @@
}
}
+ if (!do_late_inline_check(jvms)) {
+ map->disconnect_inputs(NULL, C);
+ return;
+ }
+
C->print_inlining_insert(this);
CompileLog* log = C->log();
@@ -360,6 +389,10 @@
result = (result_size == 1) ? kit.pop() : kit.pop_pair();
}
+ C->set_has_loops(C->has_loops() || _inline_cg->method()->has_loops());
+ C->env()->notice_inlined_method(_inline_cg->method());
+ C->set_inlining_progress(true);
+
kit.replace_call(call, result);
}
@@ -368,6 +401,83 @@
return new LateInlineCallGenerator(method, inline_cg);
}
+class LateInlineMHCallGenerator : public LateInlineCallGenerator {
+ ciMethod* _caller;
+ int _attempt;
+ bool _input_not_const;
+
+ virtual bool do_late_inline_check(JVMState* jvms);
+ virtual bool already_attempted() const { return _attempt > 0; }
+
+ public:
+ LateInlineMHCallGenerator(ciMethod* caller, ciMethod* callee, bool input_not_const) :
+ LateInlineCallGenerator(callee, NULL), _caller(caller), _attempt(0), _input_not_const(input_not_const) {}
+
+ virtual bool is_mh_late_inline() const { return true; }
+
+ virtual JVMState* generate(JVMState* jvms) {
+ JVMState* new_jvms = LateInlineCallGenerator::generate(jvms);
+ if (_input_not_const) {
+ // inlining won't be possible so no need to enqueue right now.
+ call_node()->set_generator(this);
+ } else {
+ Compile::current()->add_late_inline(this);
+ }
+ return new_jvms;
+ }
+
+ virtual void print_inlining_late(const char* msg) {
+ if (!_input_not_const) return;
+ LateInlineCallGenerator::print_inlining_late(msg);
+ }
+};
+
+bool LateInlineMHCallGenerator::do_late_inline_check(JVMState* jvms) {
+
+ CallGenerator* cg = for_method_handle_inline(jvms, _caller, method(), _input_not_const);
+
+ if (!_input_not_const) {
+ _attempt++;
+ }
+
+ if (cg != NULL) {
+ assert(!cg->is_late_inline() && cg->is_inline(), "we're doing late inlining");
+ _inline_cg = cg;
+ Compile::current()->dec_number_of_mh_late_inlines();
+ return true;
+ }
+
+ call_node()->set_generator(this);
+ return false;
+}
+
+CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const) {
+ Compile::current()->inc_number_of_mh_late_inlines();
+ CallGenerator* cg = new LateInlineMHCallGenerator(caller, callee, input_not_const);
+ return cg;
+}
+
+class LateInlineStringCallGenerator : public LateInlineCallGenerator {
+
+ public:
+ LateInlineStringCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
+ LateInlineCallGenerator(method, inline_cg) {}
+
+ virtual JVMState* generate(JVMState* jvms) {
+ Compile *C = Compile::current();
+ C->print_inlining_skip(this);
+
+ C->add_string_late_inline(this);
+
+ JVMState* new_jvms = DirectCallGenerator::generate(jvms);
+ return new_jvms;
+ }
+};
+
+CallGenerator* CallGenerator::for_string_late_inline(ciMethod* method, CallGenerator* inline_cg) {
+ return new LateInlineStringCallGenerator(method, inline_cg);
+}
+
//---------------------------WarmCallGenerator--------------------------------
// Internal class which handles initial deferral of inlining decisions.
@@ -586,35 +696,53 @@
}
-CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee) {
+CallGenerator* CallGenerator::for_method_handle_call(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden) {
assert(callee->is_method_handle_intrinsic() ||
callee->is_compiled_lambda_form(), "for_method_handle_call mismatch");
- CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee);
- if (cg != NULL)
- return cg;
- return CallGenerator::for_direct_call(callee);
+ bool input_not_const;
+ CallGenerator* cg = CallGenerator::for_method_handle_inline(jvms, caller, callee, input_not_const);
+ Compile* C = Compile::current();
+ if (cg != NULL) {
+ if (!delayed_forbidden && AlwaysIncrementalInline) {
+ return CallGenerator::for_late_inline(callee, cg);
+ } else {
+ return cg;
+ }
+ }
+ int bci = jvms->bci();
+ ciCallProfile profile = caller->call_profile_at_bci(bci);
+ int call_site_count = caller->scale_count(profile.count());
+
+ if (IncrementalInline && call_site_count > 0 &&
+ (input_not_const || !C->inlining_incrementally() || C->over_inlining_cutoff())) {
+ return CallGenerator::for_mh_late_inline(caller, callee, input_not_const);
+ } else {
+ // Out-of-line call.
+ return CallGenerator::for_direct_call(callee);
+ }
}
-CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee) {
+CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const) {
GraphKit kit(jvms);
PhaseGVN& gvn = kit.gvn();
Compile* C = kit.C;
vmIntrinsics::ID iid = callee->intrinsic_id();
+ input_not_const = true;
switch (iid) {
case vmIntrinsics::_invokeBasic:
{
// Get MethodHandle receiver:
Node* receiver = kit.argument(0);
if (receiver->Opcode() == Op_ConP) {
+ input_not_const = false;
const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr();
ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget();
guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove
const int vtable_index = Method::invalid_vtable_index;
- CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS);
+ CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS, true, true);
+ assert(!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here");
if (cg != NULL && cg->is_inline())
return cg;
- } else {
- if (PrintInlining) C->print_inlining(callee, jvms->depth() - 1, jvms->bci(), "receiver not constant");
}
}
break;
@@ -627,6 +755,7 @@
// Get MemberName argument:
Node* member_name = kit.argument(callee->arg_size() - 1);
if (member_name->Opcode() == Op_ConP) {
+ input_not_const = false;
const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr();
ciMethod* target = oop_ptr->const_oop()->as_member_name()->get_vmtarget();
@@ -659,9 +788,25 @@
}
}
}
- const int vtable_index = Method::invalid_vtable_index;
- const bool call_is_virtual = target->is_abstract(); // FIXME workaround
- CallGenerator* cg = C->call_generator(target, vtable_index, call_is_virtual, jvms, true, PROB_ALWAYS);
+
+ // Try to get the most accurate receiver type
+ const bool is_virtual = (iid == vmIntrinsics::_linkToVirtual);
+ const bool is_virtual_or_interface = (is_virtual || iid == vmIntrinsics::_linkToInterface);
+ int vtable_index = Method::invalid_vtable_index;
+ bool call_does_dispatch = false;
+
+ if (is_virtual_or_interface) {
+ ciInstanceKlass* klass = target->holder();
+ Node* receiver_node = kit.argument(0);
+ const TypeOopPtr* receiver_type = gvn.type(receiver_node)->isa_oopptr();
+ // call_does_dispatch and vtable_index are out-parameters. They might be changed.
+ target = C->optimize_virtual_call(caller, jvms->bci(), klass, target, receiver_type,
+ is_virtual,
+ call_does_dispatch, vtable_index); // out-parameters
+ }
+
+ CallGenerator* cg = C->call_generator(target, vtable_index, call_does_dispatch, jvms, true, PROB_ALWAYS, true, true);
+ assert(!cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here");
if (cg != NULL && cg->is_inline())
return cg;
}
--- a/hotspot/src/share/vm/opto/callGenerator.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/callGenerator.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -68,6 +68,12 @@
// is_late_inline: supports conversion of call into an inline
virtual bool is_late_inline() const { return false; }
+ // same but for method handle calls
+ virtual bool is_mh_late_inline() const { return false; }
+
+ // for method handle calls: have we tried inlinining the call already?
+ virtual bool already_attempted() const { ShouldNotReachHere(); return false; }
+
// Replace the call with an inline version of the code
virtual void do_late_inline() { ShouldNotReachHere(); }
@@ -112,11 +118,13 @@
static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface
static CallGenerator* for_dynamic_call(ciMethod* m); // invokedynamic
- static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee);
- static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee);
+ static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden);
+ static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const);
// How to generate a replace a direct call with an inline version
static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
+ static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
+ static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
// How to make a call but defer the decision whether to inline or not.
static CallGenerator* for_warm_call(WarmCallInfo* ci,
@@ -147,6 +155,8 @@
CallGenerator* cg);
virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
+ virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); }
+
static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) {
if (PrintInlining)
C->print_inlining(callee, inline_level, bci, msg);
--- a/hotspot/src/share/vm/opto/callnode.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/callnode.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "ci/bcEscapeAnalyzer.hpp"
#include "compiler/oopMap.hpp"
+#include "opto/callGenerator.hpp"
#include "opto/callnode.hpp"
#include "opto/escape.hpp"
#include "opto/locknode.hpp"
@@ -775,16 +776,38 @@
// and the exception object may not exist if an exception handler
// swallows the exception but all the other must exist and be found.
assert(projs->fallthrough_proj != NULL, "must be found");
- assert(projs->fallthrough_catchproj != NULL, "must be found");
- assert(projs->fallthrough_memproj != NULL, "must be found");
- assert(projs->fallthrough_ioproj != NULL, "must be found");
- assert(projs->catchall_catchproj != NULL, "must be found");
+ assert(Compile::current()->inlining_incrementally() || projs->fallthrough_catchproj != NULL, "must be found");
+ assert(Compile::current()->inlining_incrementally() || projs->fallthrough_memproj != NULL, "must be found");
+ assert(Compile::current()->inlining_incrementally() || projs->fallthrough_ioproj != NULL, "must be found");
+ assert(Compile::current()->inlining_incrementally() || projs->catchall_catchproj != NULL, "must be found");
if (separate_io_proj) {
- assert(projs->catchall_memproj != NULL, "must be found");
- assert(projs->catchall_ioproj != NULL, "must be found");
+ assert(Compile::current()->inlining_incrementally() || projs->catchall_memproj != NULL, "must be found");
+ assert(Compile::current()->inlining_incrementally() || projs->catchall_ioproj != NULL, "must be found");
}
}
+Node *CallNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ CallGenerator* cg = generator();
+ if (can_reshape && cg != NULL && cg->is_mh_late_inline() && !cg->already_attempted()) {
+ // Check whether this MH handle call becomes a candidate for inlining
+ ciMethod* callee = cg->method();
+ vmIntrinsics::ID iid = callee->intrinsic_id();
+ if (iid == vmIntrinsics::_invokeBasic) {
+ if (in(TypeFunc::Parms)->Opcode() == Op_ConP) {
+ phase->C->prepend_late_inline(cg);
+ set_generator(NULL);
+ }
+ } else {
+ assert(callee->has_member_arg(), "wrong type of call?");
+ if (in(TypeFunc::Parms + callee->arg_size() - 1)->Opcode() == Op_ConP) {
+ phase->C->prepend_late_inline(cg);
+ set_generator(NULL);
+ }
+ }
+ }
+ return SafePointNode::Ideal(phase, can_reshape);
+}
+
//=============================================================================
uint CallJavaNode::size_of() const { return sizeof(*this); }
--- a/hotspot/src/share/vm/opto/callnode.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/callnode.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -507,6 +507,7 @@
Node* exobj;
};
+class CallGenerator;
//------------------------------CallNode---------------------------------------
// Call nodes now subsume the function of debug nodes at callsites, so they
@@ -517,26 +518,31 @@
const TypeFunc *_tf; // Function type
address _entry_point; // Address of method being called
float _cnt; // Estimate of number of times called
+ CallGenerator* _generator; // corresponding CallGenerator for some late inline calls
CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type)
: SafePointNode(tf->domain()->cnt(), NULL, adr_type),
_tf(tf),
_entry_point(addr),
- _cnt(COUNT_UNKNOWN)
+ _cnt(COUNT_UNKNOWN),
+ _generator(NULL)
{
init_class_id(Class_Call);
}
- const TypeFunc* tf() const { return _tf; }
- const address entry_point() const { return _entry_point; }
- const float cnt() const { return _cnt; }
+ const TypeFunc* tf() const { return _tf; }
+ const address entry_point() const { return _entry_point; }
+ const float cnt() const { return _cnt; }
+ CallGenerator* generator() const { return _generator; }
- void set_tf(const TypeFunc* tf) { _tf = tf; }
- void set_entry_point(address p) { _entry_point = p; }
- void set_cnt(float c) { _cnt = c; }
+ void set_tf(const TypeFunc* tf) { _tf = tf; }
+ void set_entry_point(address p) { _entry_point = p; }
+ void set_cnt(float c) { _cnt = c; }
+ void set_generator(CallGenerator* cg) { _generator = cg; }
virtual const Type *bottom_type() const;
virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual Node *Identity( PhaseTransform *phase ) { return this; }
virtual uint cmp( const Node &n ) const;
virtual uint size_of() const = 0;
--- a/hotspot/src/share/vm/opto/cfgnode.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -363,6 +363,49 @@
return true; // The Region node is unreachable - it is dead.
}
+bool RegionNode::try_clean_mem_phi(PhaseGVN *phase) {
+ // Incremental inlining + PhaseStringOpts sometimes produce:
+ //
+ // cmpP with 1 top input
+ // |
+ // If
+ // / \
+ // IfFalse IfTrue /- Some Node
+ // \ / / /
+ // Region / /-MergeMem
+ // \---Phi
+ //
+ //
+ // It's expected by PhaseStringOpts that the Region goes away and is
+ // replaced by If's control input but because there's still a Phi,
+ // the Region stays in the graph. The top input from the cmpP is
+ // propagated forward and a subgraph that is useful goes away. The
+ // code below replaces the Phi with the MergeMem so that the Region
+ // is simplified.
+
+ PhiNode* phi = has_unique_phi();
+ if (phi && phi->type() == Type::MEMORY && req() == 3 && phi->is_diamond_phi(true)) {
+ MergeMemNode* m = NULL;
+ assert(phi->req() == 3, "same as region");
+ for (uint i = 1; i < 3; ++i) {
+ Node *mem = phi->in(i);
+ if (mem && mem->is_MergeMem() && in(i)->outcnt() == 1) {
+ // Nothing is control-dependent on path #i except the region itself.
+ m = mem->as_MergeMem();
+ uint j = 3 - i;
+ Node* other = phi->in(j);
+ if (other && other == m->base_memory()) {
+ // m is a successor memory to other, and is not pinned inside the diamond, so push it out.
+ // This will allow the diamond to collapse completely.
+ phase->is_IterGVN()->replace_node(phi, m);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node. Must preserve
// the CFG, but we can still strip out dead paths.
@@ -375,6 +418,10 @@
bool has_phis = false;
if (can_reshape) { // Need DU info to check for Phi users
has_phis = (has_phi() != NULL); // Cache result
+ if (has_phis && try_clean_mem_phi(phase)) {
+ has_phis = false;
+ }
+
if (!has_phis) { // No Phi users? Nothing merging?
for (uint i = 1; i < req()-1; i++) {
Node *if1 = in(i);
@@ -1005,7 +1052,9 @@
//------------------------------is_diamond_phi---------------------------------
// Does this Phi represent a simple well-shaped diamond merge? Return the
// index of the true path or 0 otherwise.
-int PhiNode::is_diamond_phi() const {
+// If check_control_only is true, do not inspect the If node at the
+// top, and return -1 (not an edge number) on success.
+int PhiNode::is_diamond_phi(bool check_control_only) const {
// Check for a 2-path merge
Node *region = in(0);
if( !region ) return 0;
@@ -1018,6 +1067,7 @@
Node *iff = ifp1->in(0);
if( !iff || !iff->is_If() ) return 0;
if( iff != ifp2->in(0) ) return 0;
+ if (check_control_only) return -1;
// Check for a proper bool/cmp
const Node *b = iff->in(1);
if( !b->is_Bool() ) return 0;
--- a/hotspot/src/share/vm/opto/cfgnode.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -95,6 +95,7 @@
virtual Node *Identity( PhaseTransform *phase );
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const RegMask &out_RegMask() const;
+ bool try_clean_mem_phi(PhaseGVN *phase);
};
//------------------------------JProjNode--------------------------------------
@@ -181,7 +182,7 @@
LoopSafety simple_data_loop_check(Node *in) const;
// Is it unsafe data loop? It becomes a dead loop if this phi node removed.
bool is_unsafe_data_reference(Node *in) const;
- int is_diamond_phi() const;
+ int is_diamond_phi(bool check_control_only = false) const;
virtual int Opcode() const;
virtual bool pinned() const { return in(0) != 0; }
virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }
--- a/hotspot/src/share/vm/opto/compile.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/compile.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -136,7 +136,7 @@
void Compile::register_intrinsic(CallGenerator* cg) {
if (_intrinsics == NULL) {
- _intrinsics = new GrowableArray<CallGenerator*>(60);
+ _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
}
// This code is stolen from ciObjectFactory::insert.
// Really, GrowableArray should have methods for
@@ -365,6 +365,21 @@
}
}
+void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {
+ int shift = 0;
+ for (int i = 0; i < inlines->length(); i++) {
+ CallGenerator* cg = inlines->at(i);
+ CallNode* call = cg->call_node();
+ if (shift > 0) {
+ inlines->at_put(i-shift, cg);
+ }
+ if (!useful.member(call)) {
+ shift++;
+ }
+ }
+ inlines->trunc_to(inlines->length()-shift);
+}
+
// Disconnect all useless nodes by disconnecting those at the boundary.
void Compile::remove_useless_nodes(Unique_Node_List &useful) {
uint next = 0;
@@ -394,6 +409,9 @@
remove_macro_node(n);
}
}
+ // clean up the late inline lists
+ remove_useless_late_inlines(&_string_late_inlines, useful);
+ remove_useless_late_inlines(&_late_inlines, useful);
debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
}
@@ -611,6 +629,12 @@
_printer(IdealGraphPrinter::printer()),
#endif
_congraph(NULL),
+ _late_inlines(comp_arena(), 2, 0, NULL),
+ _string_late_inlines(comp_arena(), 2, 0, NULL),
+ _late_inlines_pos(0),
+ _number_of_mh_late_inlines(0),
+ _inlining_progress(false),
+ _inlining_incrementally(false),
_print_inlining_list(NULL),
_print_inlining(0) {
C = this;
@@ -737,29 +761,13 @@
rethrow_exceptions(kit.transfer_exceptions_into_jvms());
}
- if (!failing() && has_stringbuilder()) {
- {
- // remove useless nodes to make the usage analysis simpler
- ResourceMark rm;
- PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
- }
-
- {
- ResourceMark rm;
- print_method("Before StringOpts", 3);
- PhaseStringOpts pso(initial_gvn(), &for_igvn);
- print_method("After StringOpts", 3);
- }
-
- // now inline anything that we skipped the first time around
- while (_late_inlines.length() > 0) {
- CallGenerator* cg = _late_inlines.pop();
- cg->do_late_inline();
- if (failing()) return;
- }
+ assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");
+
+ if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
+ inline_string_calls(true);
}
- assert(_late_inlines.length() == 0, "should have been processed");
- dump_inlining();
+
+ if (failing()) return;
print_method("Before RemoveUseless", 3);
@@ -906,6 +914,9 @@
_dead_node_list(comp_arena()),
_dead_node_count(0),
_congraph(NULL),
+ _number_of_mh_late_inlines(0),
+ _inlining_progress(false),
+ _inlining_incrementally(false),
_print_inlining_list(NULL),
_print_inlining(0) {
C = this;
@@ -1760,6 +1771,124 @@
assert(predicate_count()==0, "should be clean!");
}
+// StringOpts and late inlining of string methods
+void Compile::inline_string_calls(bool parse_time) {
+ {
+ // remove useless nodes to make the usage analysis simpler
+ ResourceMark rm;
+ PhaseRemoveUseless pru(initial_gvn(), for_igvn());
+ }
+
+ {
+ ResourceMark rm;
+ print_method("Before StringOpts", 3);
+ PhaseStringOpts pso(initial_gvn(), for_igvn());
+ print_method("After StringOpts", 3);
+ }
+
+ // now inline anything that we skipped the first time around
+ if (!parse_time) {
+ _late_inlines_pos = _late_inlines.length();
+ }
+
+ while (_string_late_inlines.length() > 0) {
+ CallGenerator* cg = _string_late_inlines.pop();
+ cg->do_late_inline();
+ if (failing()) return;
+ }
+ _string_late_inlines.trunc_to(0);
+}
+
+void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
+ assert(IncrementalInline, "incremental inlining should be on");
+ PhaseGVN* gvn = initial_gvn();
+
+ set_inlining_progress(false);
+ for_igvn()->clear();
+ gvn->replace_with(&igvn);
+
+ int i = 0;
+
+ for (; i <_late_inlines.length() && !inlining_progress(); i++) {
+ CallGenerator* cg = _late_inlines.at(i);
+ _late_inlines_pos = i+1;
+ cg->do_late_inline();
+ if (failing()) return;
+ }
+ int j = 0;
+ for (; i < _late_inlines.length(); i++, j++) {
+ _late_inlines.at_put(j, _late_inlines.at(i));
+ }
+ _late_inlines.trunc_to(j);
+
+ {
+ ResourceMark rm;
+ PhaseRemoveUseless pru(C->initial_gvn(), C->for_igvn());
+ }
+
+ igvn = PhaseIterGVN(gvn);
+}
+
+// Perform incremental inlining until bound on number of live nodes is reached
+void Compile::inline_incrementally(PhaseIterGVN& igvn) {
+ PhaseGVN* gvn = initial_gvn();
+
+ set_inlining_incrementally(true);
+ set_inlining_progress(true);
+ uint low_live_nodes = 0;
+
+ while(inlining_progress() && _late_inlines.length() > 0) {
+
+ if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
+ if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
+ // PhaseIdealLoop is expensive so we only try it once we are
+ // out of loop and we only try it again if the previous helped
+ // got the number of nodes down significantly
+ PhaseIdealLoop ideal_loop( igvn, false, true );
+ if (failing()) return;
+ low_live_nodes = live_nodes();
+ _major_progress = true;
+ }
+
+ if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
+ break;
+ }
+ }
+
+ inline_incrementally_one(igvn);
+
+ if (failing()) return;
+
+ igvn.optimize();
+
+ if (failing()) return;
+ }
+
+ assert( igvn._worklist.size() == 0, "should be done with igvn" );
+
+ if (_string_late_inlines.length() > 0) {
+ assert(has_stringbuilder(), "inconsistent");
+ for_igvn()->clear();
+ initial_gvn()->replace_with(&igvn);
+
+ inline_string_calls(false);
+
+ if (failing()) return;
+
+ {
+ ResourceMark rm;
+ PhaseRemoveUseless pru(initial_gvn(), for_igvn());
+ }
+
+ igvn = PhaseIterGVN(gvn);
+
+ igvn.optimize();
+ }
+
+ set_inlining_incrementally(false);
+}
+
+
//------------------------------Optimize---------------------------------------
// Given a graph, optimize it.
void Compile::Optimize() {
@@ -1792,6 +1921,12 @@
if (failing()) return;
+ inline_incrementally(igvn);
+
+ print_method("Incremental Inline", 2);
+
+ if (failing()) return;
+
// Perform escape analysis
if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
if (has_loops()) {
@@ -1914,6 +2049,7 @@
} // (End scope of igvn; run destructor if necessary for asserts.)
+ dump_inlining();
// A method with only infinite loops has no edges entering loops from root
{
NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
@@ -3362,6 +3498,28 @@
void Compile::dump_inlining() {
if (PrintInlining) {
+ // Print inlining message for candidates that we couldn't inline
+ // for lack of space or non constant receiver
+ for (int i = 0; i < _late_inlines.length(); i++) {
+ CallGenerator* cg = _late_inlines.at(i);
+ cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff");
+ }
+ Unique_Node_List useful;
+ useful.push(root());
+ for (uint next = 0; next < useful.size(); ++next) {
+ Node* n = useful.at(next);
+ if (n->is_Call() && n->as_Call()->generator() != NULL && n->as_Call()->generator()->call_node() == n) {
+ CallNode* call = n->as_Call();
+ CallGenerator* cg = call->generator();
+ cg->print_inlining_late("receiver not constant");
+ }
+ uint max = n->len();
+ for ( uint i = 0; i < max; ++i ) {
+ Node *m = n->in(i);
+ if ( m == NULL ) continue;
+ useful.push(m);
+ }
+ }
for (int i = 0; i < _print_inlining_list->length(); i++) {
tty->print(_print_inlining_list->at(i).ss()->as_string());
}
--- a/hotspot/src/share/vm/opto/compile.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/compile.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -72,6 +72,7 @@
class JVMState;
class TypeData;
class TypePtr;
+class TypeOopPtr;
class TypeFunc;
class Unique_Node_List;
class nmethod;
@@ -280,6 +281,8 @@
int _orig_pc_slot_offset_in_bytes;
int _major_progress; // Count of something big happening
+ bool _inlining_progress; // progress doing incremental inlining?
+ bool _inlining_incrementally;// Are we doing incremental inlining (post parse)
bool _has_loops; // True if the method _may_ have some loops
bool _has_split_ifs; // True if the method _may_ have some split-if
bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores.
@@ -367,8 +370,13 @@
Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN
WarmCallInfo* _warm_calls; // Sorted work-list for heat-based inlining.
- GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after
- // main parsing has finished.
+ GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after
+ // main parsing has finished.
+ GrowableArray<CallGenerator*> _string_late_inlines; // same but for string operations
+
+ int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
+ uint _number_of_mh_late_inlines; // number of method handle late inlining still pending
+
// Inlining may not happen in parse order which would make
// PrintInlining output confusing. Keep track of PrintInlining
@@ -491,6 +499,10 @@
int fixed_slots() const { assert(_fixed_slots >= 0, ""); return _fixed_slots; }
void set_fixed_slots(int n) { _fixed_slots = n; }
int major_progress() const { return _major_progress; }
+ void set_inlining_progress(bool z) { _inlining_progress = z; }
+ int inlining_progress() const { return _inlining_progress; }
+ void set_inlining_incrementally(bool z) { _inlining_incrementally = z; }
+ int inlining_incrementally() const { return _inlining_incrementally; }
void set_major_progress() { _major_progress++; }
void clear_major_progress() { _major_progress = 0; }
int num_loop_opts() const { return _num_loop_opts; }
@@ -729,9 +741,17 @@
// Decide how to build a call.
// The profile factor is a discount to apply to this site's interp. profile.
- CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true);
+ CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_does_dispatch, JVMState* jvms, bool allow_inline, float profile_factor, bool allow_intrinsics = true, bool delayed_forbidden = false);
bool should_delay_inlining(ciMethod* call_method, JVMState* jvms);
+ // Helper functions to identify inlining potential at call-site
+ ciMethod* optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
+ ciMethod* callee, const TypeOopPtr* receiver_type,
+ bool is_virtual,
+ bool &call_does_dispatch, int &vtable_index);
+ ciMethod* optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+ ciMethod* callee, const TypeOopPtr* receiver_type);
+
// Report if there were too many traps at a current method and bci.
// Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
// If there is no MDO at all, report no trap unless told to assume it.
@@ -765,10 +785,39 @@
WarmCallInfo* pop_warm_call();
// Record this CallGenerator for inlining at the end of parsing.
- void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
+ void add_late_inline(CallGenerator* cg) {
+ _late_inlines.insert_before(_late_inlines_pos, cg);
+ _late_inlines_pos++;
+ }
+
+ void prepend_late_inline(CallGenerator* cg) {
+ _late_inlines.insert_before(0, cg);
+ }
+
+ void add_string_late_inline(CallGenerator* cg) {
+ _string_late_inlines.push(cg);
+ }
+
+ void remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful);
void dump_inlining();
+ bool over_inlining_cutoff() const {
+ if (!inlining_incrementally()) {
+ return unique() > (uint)NodeCountInliningCutoff;
+ } else {
+ return live_nodes() > (uint)LiveNodeCountInliningCutoff;
+ }
+ }
+
+ void inc_number_of_mh_late_inlines() { _number_of_mh_late_inlines++; }
+ void dec_number_of_mh_late_inlines() { assert(_number_of_mh_late_inlines > 0, "_number_of_mh_late_inlines < 0 !"); _number_of_mh_late_inlines--; }
+ bool has_mh_late_inlines() const { return _number_of_mh_late_inlines > 0; }
+
+ void inline_incrementally_one(PhaseIterGVN& igvn);
+ void inline_incrementally(PhaseIterGVN& igvn);
+ void inline_string_calls(bool parse_time);
+
// Matching, CFG layout, allocation, code generation
PhaseCFG* cfg() { return _cfg; }
bool select_24_bit_instr() const { return _select_24_bit_instr; }
--- a/hotspot/src/share/vm/opto/doCall.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/doCall.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -61,9 +61,9 @@
}
}
-CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_is_virtual,
+CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_does_dispatch,
JVMState* jvms, bool allow_inline,
- float prof_factor, bool allow_intrinsics) {
+ float prof_factor, bool allow_intrinsics, bool delayed_forbidden) {
ciMethod* caller = jvms->method();
int bci = jvms->bci();
Bytecodes::Code bytecode = caller->java_code_at_bci(bci);
@@ -82,7 +82,7 @@
// See how many times this site has been invoked.
int site_count = profile.count();
int receiver_count = -1;
- if (call_is_virtual && UseTypeProfile && profile.has_receiver(0)) {
+ if (call_does_dispatch && UseTypeProfile && profile.has_receiver(0)) {
// Receivers in the profile structure are ordered by call counts
// so that the most called (major) receiver is profile.receiver(0).
receiver_count = profile.receiver_count(0);
@@ -94,7 +94,7 @@
int r2id = (rid != -1 && profile.has_receiver(1))? log->identify(profile.receiver(1)):-1;
log->begin_elem("call method='%d' count='%d' prof_factor='%g'",
log->identify(callee), site_count, prof_factor);
- if (call_is_virtual) log->print(" virtual='1'");
+ if (call_does_dispatch) log->print(" virtual='1'");
if (allow_inline) log->print(" inline='1'");
if (receiver_count >= 0) {
log->print(" receiver='%d' receiver_count='%d'", rid, receiver_count);
@@ -111,12 +111,12 @@
// We do this before the strict f.p. check below because the
// intrinsics handle strict f.p. correctly.
if (allow_inline && allow_intrinsics) {
- CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
+ CallGenerator* cg = find_intrinsic(callee, call_does_dispatch);
if (cg != NULL) {
if (cg->is_predicted()) {
// Code without intrinsic but, hopefully, inlined.
CallGenerator* inline_cg = this->call_generator(callee,
- vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false);
+ vtable_index, call_does_dispatch, jvms, allow_inline, prof_factor, false);
if (inline_cg != NULL) {
cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
}
@@ -130,7 +130,9 @@
// MethodHandle.invoke* are native methods which obviously don't
// have bytecodes and so normal inlining fails.
if (callee->is_method_handle_intrinsic()) {
- return CallGenerator::for_method_handle_call(jvms, caller, callee);
+ CallGenerator* cg = CallGenerator::for_method_handle_call(jvms, caller, callee, delayed_forbidden);
+ assert(cg == NULL || !delayed_forbidden || !cg->is_late_inline() || cg->is_mh_late_inline(), "unexpected CallGenerator");
+ return cg;
}
// Do not inline strict fp into non-strict code, or the reverse
@@ -147,7 +149,7 @@
float expected_uses = past_uses;
// Try inlining a bytecoded method:
- if (!call_is_virtual) {
+ if (!call_does_dispatch) {
InlineTree* ilt;
if (UseOldInlining) {
ilt = InlineTree::find_subtree_from_root(this->ilt(), jvms->caller(), jvms->method());
@@ -161,32 +163,39 @@
WarmCallInfo scratch_ci;
if (!UseOldInlining)
scratch_ci.init(jvms, callee, profile, prof_factor);
- WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci);
+ bool should_delay = false;
+ WarmCallInfo* ci = ilt->ok_to_inline(callee, jvms, profile, &scratch_ci, should_delay);
assert(ci != &scratch_ci, "do not let this pointer escape");
bool allow_inline = (ci != NULL && !ci->is_cold());
bool require_inline = (allow_inline && ci->is_hot());
if (allow_inline) {
CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses);
- if (require_inline && cg != NULL && should_delay_inlining(callee, jvms)) {
+
+ if (require_inline && cg != NULL) {
// Delay the inlining of this method to give us the
// opportunity to perform some high level optimizations
// first.
- return CallGenerator::for_late_inline(callee, cg);
+ if (should_delay_inlining(callee, jvms)) {
+ assert(!delayed_forbidden, "strange");
+ return CallGenerator::for_string_late_inline(callee, cg);
+ } else if ((should_delay || AlwaysIncrementalInline) && !delayed_forbidden) {
+ return CallGenerator::for_late_inline(callee, cg);
+ }
}
- if (cg == NULL) {
+ if (cg == NULL || should_delay) {
// Fall through.
} else if (require_inline || !InlineWarmCalls) {
return cg;
} else {
- CallGenerator* cold_cg = call_generator(callee, vtable_index, call_is_virtual, jvms, false, prof_factor);
+ CallGenerator* cold_cg = call_generator(callee, vtable_index, call_does_dispatch, jvms, false, prof_factor);
return CallGenerator::for_warm_call(ci, cold_cg, cg);
}
}
}
// Try using the type profile.
- if (call_is_virtual && site_count > 0 && receiver_count > 0) {
+ if (call_does_dispatch && site_count > 0 && receiver_count > 0) {
// The major receiver's count >= TypeProfileMajorReceiverPercent of site_count.
bool have_major_receiver = (100.*profile.receiver_prob(0) >= (float)TypeProfileMajorReceiverPercent);
ciMethod* receiver_method = NULL;
@@ -200,7 +209,7 @@
if (receiver_method != NULL) {
// The single majority receiver sufficiently outweighs the minority.
CallGenerator* hit_cg = this->call_generator(receiver_method,
- vtable_index, !call_is_virtual, jvms, allow_inline, prof_factor);
+ vtable_index, !call_does_dispatch, jvms, allow_inline, prof_factor);
if (hit_cg != NULL) {
// Look up second receiver.
CallGenerator* next_hit_cg = NULL;
@@ -210,7 +219,7 @@
profile.receiver(1));
if (next_receiver_method != NULL) {
next_hit_cg = this->call_generator(next_receiver_method,
- vtable_index, !call_is_virtual, jvms,
+ vtable_index, !call_does_dispatch, jvms,
allow_inline, prof_factor);
if (next_hit_cg != NULL && !next_hit_cg->is_inline() &&
have_major_receiver && UseOnlyInlinedBimorphic) {
@@ -256,7 +265,7 @@
// There was no special inlining tactic, or it bailed out.
// Use a more generic tactic, like a simple call.
- if (call_is_virtual) {
+ if (call_does_dispatch) {
return CallGenerator::for_virtual_call(callee, vtable_index);
} else {
// Class Hierarchy Analysis or Type Profile reveals a unique target,
@@ -388,6 +397,7 @@
// orig_callee is the resolved callee which's signature includes the
// appendix argument.
const int nargs = orig_callee->arg_size();
+ const bool is_signature_polymorphic = MethodHandles::is_signature_polymorphic(orig_callee->intrinsic_id());
// Push appendix argument (MethodType, CallSite, etc.), if one.
if (iter().has_appendix()) {
@@ -404,25 +414,18 @@
// Then we may introduce a run-time check and inline on the path where it succeeds.
// The other path may uncommon_trap, check for another receiver, or do a v-call.
- // Choose call strategy.
- bool call_is_virtual = is_virtual_or_interface;
- int vtable_index = Method::invalid_vtable_index;
- ciMethod* callee = orig_callee;
+ // Try to get the most accurate receiver type
+ ciMethod* callee = orig_callee;
+ int vtable_index = Method::invalid_vtable_index;
+ bool call_does_dispatch = false;
- // Try to get the most accurate receiver type
if (is_virtual_or_interface) {
Node* receiver_node = stack(sp() - nargs);
const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr();
- ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, orig_callee, receiver_type);
-
- // Have the call been sufficiently improved such that it is no longer a virtual?
- if (optimized_virtual_method != NULL) {
- callee = optimized_virtual_method;
- call_is_virtual = false;
- } else if (!UseInlineCaches && is_virtual && callee->is_loaded()) {
- // We can make a vtable call at this site
- vtable_index = callee->resolve_vtable_index(method()->holder(), klass);
- }
+ // call_does_dispatch and vtable_index are out-parameters. They might be changed.
+ callee = C->optimize_virtual_call(method(), bci(), klass, orig_callee, receiver_type,
+ is_virtual,
+ call_does_dispatch, vtable_index); // out-parameters
}
// Note: It's OK to try to inline a virtual call.
@@ -438,7 +441,7 @@
// Decide call tactic.
// This call checks with CHA, the interpreter profile, intrinsics table, etc.
// It decides whether inlining is desirable or not.
- CallGenerator* cg = C->call_generator(callee, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+ CallGenerator* cg = C->call_generator(callee, vtable_index, call_does_dispatch, jvms, try_inline, prof_factor());
// NOTE: Don't use orig_callee and callee after this point! Use cg->method() instead.
orig_callee = callee = NULL;
@@ -478,7 +481,7 @@
// the call site, perhaps because it did not match a pattern the
// intrinsic was expecting to optimize. Should always be possible to
// get a normal java call that may inline in that case
- cg = C->call_generator(cg->method(), vtable_index, call_is_virtual, jvms, try_inline, prof_factor(), /* allow_intrinsics= */ false);
+ cg = C->call_generator(cg->method(), vtable_index, call_does_dispatch, jvms, try_inline, prof_factor(), /* allow_intrinsics= */ false);
if ((new_jvms = cg->generate(jvms)) == NULL) {
guarantee(failing(), "call failed to generate: calls should work");
return;
@@ -513,55 +516,44 @@
round_double_result(cg->method());
ciType* rtype = cg->method()->return_type();
- if (Bytecodes::has_optional_appendix(iter().cur_bc_raw())) {
+ ciType* ctype = declared_signature->return_type();
+
+ if (Bytecodes::has_optional_appendix(iter().cur_bc_raw()) || is_signature_polymorphic) {
// Be careful here with return types.
- ciType* ctype = declared_signature->return_type();
if (ctype != rtype) {
BasicType rt = rtype->basic_type();
BasicType ct = ctype->basic_type();
- Node* retnode = peek();
if (ct == T_VOID) {
// It's OK for a method to return a value that is discarded.
// The discarding does not require any special action from the caller.
// The Java code knows this, at VerifyType.isNullConversion.
pop_node(rt); // whatever it was, pop it
- retnode = top();
} else if (rt == T_INT || is_subword_type(rt)) {
- // FIXME: This logic should be factored out.
- if (ct == T_BOOLEAN) {
- retnode = _gvn.transform( new (C) AndINode(retnode, intcon(0x1)) );
- } else if (ct == T_CHAR) {
- retnode = _gvn.transform( new (C) AndINode(retnode, intcon(0xFFFF)) );
- } else if (ct == T_BYTE) {
- retnode = _gvn.transform( new (C) LShiftINode(retnode, intcon(24)) );
- retnode = _gvn.transform( new (C) RShiftINode(retnode, intcon(24)) );
- } else if (ct == T_SHORT) {
- retnode = _gvn.transform( new (C) LShiftINode(retnode, intcon(16)) );
- retnode = _gvn.transform( new (C) RShiftINode(retnode, intcon(16)) );
- } else {
- assert(ct == T_INT, err_msg_res("rt=%s, ct=%s", type2name(rt), type2name(ct)));
- }
+ // Nothing. These cases are handled in lambda form bytecode.
+ assert(ct == T_INT || is_subword_type(ct), err_msg_res("must match: rt=%s, ct=%s", type2name(rt), type2name(ct)));
} else if (rt == T_OBJECT || rt == T_ARRAY) {
assert(ct == T_OBJECT || ct == T_ARRAY, err_msg_res("rt=%s, ct=%s", type2name(rt), type2name(ct)));
if (ctype->is_loaded()) {
const TypeOopPtr* arg_type = TypeOopPtr::make_from_klass(rtype->as_klass());
const Type* sig_type = TypeOopPtr::make_from_klass(ctype->as_klass());
if (arg_type != NULL && !arg_type->higher_equal(sig_type)) {
+ Node* retnode = pop();
Node* cast_obj = _gvn.transform(new (C) CheckCastPPNode(control(), retnode, sig_type));
- pop();
push(cast_obj);
}
}
} else {
- assert(ct == rt, err_msg("unexpected mismatch rt=%d, ct=%d", rt, ct));
+ assert(rt == ct, err_msg_res("unexpected mismatch: rt=%s, ct=%s", type2name(rt), type2name(ct)));
// push a zero; it's better than getting an oop/int mismatch
- retnode = pop_node(rt);
- retnode = zerocon(ct);
+ pop_node(rt);
+ Node* retnode = zerocon(ct);
push_node(ct, retnode);
}
// Now that the value is well-behaved, continue with the call-site type.
rtype = ctype;
}
+ } else {
+ assert(rtype == ctype, "mismatched return types"); // symbolic resolution enforces this
}
// If the return type of the method is not loaded, assert that the
@@ -879,17 +871,39 @@
#endif //PRODUCT
+ciMethod* Compile::optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
+ ciMethod* callee, const TypeOopPtr* receiver_type,
+ bool is_virtual,
+ bool& call_does_dispatch, int& vtable_index) {
+ // Set default values for out-parameters.
+ call_does_dispatch = true;
+ vtable_index = Method::invalid_vtable_index;
+
+ // Choose call strategy.
+ ciMethod* optimized_virtual_method = optimize_inlining(caller, bci, klass, callee, receiver_type);
+
+ // Have the call been sufficiently improved such that it is no longer a virtual?
+ if (optimized_virtual_method != NULL) {
+ callee = optimized_virtual_method;
+ call_does_dispatch = false;
+ } else if (!UseInlineCaches && is_virtual && callee->is_loaded()) {
+ // We can make a vtable call at this site
+ vtable_index = callee->resolve_vtable_index(caller->holder(), klass);
+ }
+ return callee;
+}
+
// Identify possible target method and inlining style
-ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
- ciMethod *dest_method, const TypeOopPtr* receiver_type) {
+ciMethod* Compile::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+ ciMethod* callee, const TypeOopPtr* receiver_type) {
// only use for virtual or interface calls
// If it is obviously final, do not bother to call find_monomorphic_target,
// because the class hierarchy checks are not needed, and may fail due to
// incompletely loaded classes. Since we do our own class loading checks
// in this module, we may confidently bind to any method.
- if (dest_method->can_be_statically_bound()) {
- return dest_method;
+ if (callee->can_be_statically_bound()) {
+ return callee;
}
// Attempt to improve the receiver
@@ -898,8 +912,8 @@
if (receiver_type != NULL) {
// Array methods are all inherited from Object, and are monomorphic.
if (receiver_type->isa_aryptr() &&
- dest_method->holder() == env()->Object_klass()) {
- return dest_method;
+ callee->holder() == env()->Object_klass()) {
+ return callee;
}
// All other interesting cases are instance klasses.
@@ -919,7 +933,7 @@
}
ciInstanceKlass* calling_klass = caller->holder();
- ciMethod* cha_monomorphic_target = dest_method->find_monomorphic_target(calling_klass, klass, actual_receiver);
+ ciMethod* cha_monomorphic_target = callee->find_monomorphic_target(calling_klass, klass, actual_receiver);
if (cha_monomorphic_target != NULL) {
assert(!cha_monomorphic_target->is_abstract(), "");
// Look at the method-receiver type. Does it add "too much information"?
@@ -937,10 +951,10 @@
cha_monomorphic_target->print();
tty->cr();
}
- if (C->log() != NULL) {
- C->log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
- C->log()->identify(klass),
- C->log()->identify(cha_monomorphic_target));
+ if (log() != NULL) {
+ log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
+ log()->identify(klass),
+ log()->identify(cha_monomorphic_target));
}
cha_monomorphic_target = NULL;
}
@@ -952,7 +966,7 @@
// by dynamic class loading. Be sure to test the "static" receiver
// dest_method here, as opposed to the actual receiver, which may
// falsely lead us to believe that the receiver is final or private.
- C->dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
+ dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
return cha_monomorphic_target;
}
@@ -961,7 +975,7 @@
if (actual_receiver_is_exact) {
// In case of evolution, there is a dependence on every inlined method, since each
// such method can be changed when its class is redefined.
- ciMethod* exact_method = dest_method->resolve_invoke(calling_klass, actual_receiver);
+ ciMethod* exact_method = callee->resolve_invoke(calling_klass, actual_receiver);
if (exact_method != NULL) {
#ifndef PRODUCT
if (PrintOpto) {
--- a/hotspot/src/share/vm/opto/graphKit.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/graphKit.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -1794,10 +1794,15 @@
if (ejvms == NULL) {
// No exception edges to simply kill off those paths
- C->gvn_replace_by(callprojs.catchall_catchproj, C->top());
- C->gvn_replace_by(callprojs.catchall_memproj, C->top());
- C->gvn_replace_by(callprojs.catchall_ioproj, C->top());
-
+ if (callprojs.catchall_catchproj != NULL) {
+ C->gvn_replace_by(callprojs.catchall_catchproj, C->top());
+ }
+ if (callprojs.catchall_memproj != NULL) {
+ C->gvn_replace_by(callprojs.catchall_memproj, C->top());
+ }
+ if (callprojs.catchall_ioproj != NULL) {
+ C->gvn_replace_by(callprojs.catchall_ioproj, C->top());
+ }
// Replace the old exception object with top
if (callprojs.exobj != NULL) {
C->gvn_replace_by(callprojs.exobj, C->top());
@@ -1809,10 +1814,15 @@
SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states();
Node* ex_oop = ekit.use_exception_state(ex_map);
-
- C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control());
- C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory());
- C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o());
+ if (callprojs.catchall_catchproj != NULL) {
+ C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control());
+ }
+ if (callprojs.catchall_memproj != NULL) {
+ C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory());
+ }
+ if (callprojs.catchall_ioproj != NULL) {
+ C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o());
+ }
// Replace the old exception object with the newly created one
if (callprojs.exobj != NULL) {
--- a/hotspot/src/share/vm/opto/memnode.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/memnode.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -2725,10 +2725,8 @@
zend = phase->transform( new(C) URShiftXNode(zend, shift) );
}
+ // Bulk clear double-words
Node* zsize = phase->transform( new(C) SubXNode(zend, zbase) );
- Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT);
-
- // Bulk clear double-words
Node* adr = phase->transform( new(C) AddPNode(dest, dest, start_offset) );
mem = new (C) ClearArrayNode(ctl, mem, zsize, adr);
return phase->transform(mem);
--- a/hotspot/src/share/vm/opto/parse.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/parse.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -70,7 +70,7 @@
InlineTree *build_inline_tree_for_callee(ciMethod* callee_method,
JVMState* caller_jvms,
int caller_bci);
- const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result);
+ const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result, bool& should_delay);
const char* should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const;
const char* should_not_inline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const;
void print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const;
@@ -107,7 +107,7 @@
// and may be accessed by find_subtree_from_root.
// The call_method is the dest_method for a special or static invocation.
// The call_method is an optimized virtual method candidate otherwise.
- WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci);
+ WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci, bool& should_delay);
// Information about inlined method
JVMState* caller_jvms() const { return _caller_jvms; }
@@ -469,10 +469,6 @@
// Helper function to uncommon-trap or bailout for non-compilable call-sites
bool can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass *klass);
- // Helper function to identify inlining potential at call-site
- ciMethod* optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
- ciMethod *dest_method, const TypeOopPtr* receiver_type);
-
// Helper function to setup for type-profile based inlining
bool prepare_type_profile_inline(ciInstanceKlass* prof_klass, ciMethod* prof_method);
--- a/hotspot/src/share/vm/opto/parse1.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/parse1.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -1404,7 +1404,8 @@
do_one_bytecode();
- assert(!have_se || stopped() || failing() || (sp() - pre_bc_sp) == depth, "correct depth prediction");
+ assert(!have_se || stopped() || failing() || (sp() - pre_bc_sp) == depth,
+ err_msg_res("incorrect depth prediction: sp=%d, pre_bc_sp=%d, depth=%d", sp(), pre_bc_sp, depth));
do_exceptions();
--- a/hotspot/src/share/vm/opto/phaseX.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/phaseX.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -75,6 +75,13 @@
// nh->_sentinel must be in the current node space
}
+void NodeHash::replace_with(NodeHash *nh) {
+ debug_only(_table = (Node**)badAddress); // interact correctly w/ operator=
+ // just copy in all the fields
+ *this = *nh;
+ // nh->_sentinel must be in the current node space
+}
+
//------------------------------hash_find--------------------------------------
// Find in hash table
Node *NodeHash::hash_find( const Node *n ) {
--- a/hotspot/src/share/vm/opto/phaseX.hpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/phaseX.hpp Thu Jan 10 10:00:43 2013 -0800
@@ -92,6 +92,7 @@
}
void remove_useless_nodes(VectorSet &useful); // replace with sentinel
+ void replace_with(NodeHash* nh);
Node *sentinel() { return _sentinel; }
@@ -386,6 +387,11 @@
Node *transform( Node *n );
Node *transform_no_reclaim( Node *n );
+ void replace_with(PhaseGVN* gvn) {
+ _table.replace_with(&gvn->_table);
+ _types = gvn->_types;
+ }
+
// Check for a simple dead loop when a data node references itself.
DEBUG_ONLY(void dead_loop_check(Node *n);)
};
--- a/hotspot/src/share/vm/opto/stringopts.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/opto/stringopts.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -265,7 +265,8 @@
} else if (n->is_IfTrue()) {
Compile* C = _stringopts->C;
C->gvn_replace_by(n, n->in(0)->in(0));
- C->gvn_replace_by(n->in(0), C->top());
+ // get rid of the other projection
+ C->gvn_replace_by(n->in(0)->as_If()->proj_out(false), C->top());
}
}
}
@@ -439,7 +440,7 @@
}
// Find the constructor call
Node* result = alloc->result_cast();
- if (result == NULL || !result->is_CheckCastPP()) {
+ if (result == NULL || !result->is_CheckCastPP() || alloc->in(TypeFunc::Memory)->is_top()) {
// strange looking allocation
#ifndef PRODUCT
if (PrintOptimizeStringConcat) {
@@ -834,6 +835,9 @@
ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) {
// Simple diamond.
// XXX should check for possibly merging stores. simple data merges are ok.
+ // The IGVN will make this simple diamond go away when it
+ // transforms the Region. Make sure it sees it.
+ Compile::current()->record_for_igvn(ptr);
ptr = ptr->in(1)->in(0)->in(0);
continue;
}
--- a/hotspot/src/share/vm/runtime/arguments.cpp Thu Jan 10 07:32:32 2013 -0800
+++ b/hotspot/src/share/vm/runtime/arguments.cpp Thu Jan 10 10:00:43 2013 -0800
@@ -3303,6 +3303,18 @@
if (!EliminateLocks) {
EliminateNestedLocks = false;
}
+ if (!Inline) {
+ IncrementalInline = false;
+ }
+#ifndef PRODUCT
+ if (!IncrementalInline) {
+ AlwaysIncrementalInline = false;
+ }
+#endif
+ if (IncrementalInline && FLAG_IS_DEFAULT(MaxNodeLimit)) {
+ // incremental inlining: bump MaxNodeLimit
+ FLAG_SET_DEFAULT(MaxNodeLimit, (intx)75000);
+ }
#endif
if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/8005419/Test8005419.java Thu Jan 10 10:00:43 2013 -0800
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8005419
+ * @summary Improve intrinsics code performance on x86 by using AVX2
+ * @run main/othervm -Xbatch -Xmx64m Test8005419
+ *
+ */
+
+public class Test8005419 {
+ public static int SIZE = 64;
+
+ public static void main(String[] args) {
+ char[] a = new char[SIZE];
+ char[] b = new char[SIZE];
+
+ for (int i = 16; i < SIZE; i++) {
+ a[i] = (char)i;
+ b[i] = (char)i;
+ }
+ String s1 = new String(a);
+ String s2 = new String(b);
+
+ // Warm up
+ boolean failed = false;
+ int result = 0;
+ for (int i = 0; i < 10000; i++) {
+ result += test(s1, s2);
+ }
+ for (int i = 0; i < 10000; i++) {
+ result += test(s1, s2);
+ }
+ for (int i = 0; i < 10000; i++) {
+ result += test(s1, s2);
+ }
+ if (result != 0) failed = true;
+
+ System.out.println("Start testing");
+ // Compare same string
+ result = test(s1, s1);
+ if (result != 0) {
+ failed = true;
+ System.out.println("Failed same: result = " + result + ", expected 0");
+ }
+ // Compare equal strings
+ for (int i = 1; i <= SIZE; i++) {
+ s1 = new String(a, 0, i);
+ s2 = new String(b, 0, i);
+ result = test(s1, s2);
+ if (result != 0) {
+ failed = true;
+ System.out.println("Failed equals s1[" + i + "], s2[" + i + "]: result = " + result + ", expected 0");
+ }
+ }
+ // Compare equal strings but different sizes
+ for (int i = 1; i <= SIZE; i++) {
+ s1 = new String(a, 0, i);
+ for (int j = 1; j <= SIZE; j++) {
+ s2 = new String(b, 0, j);
+ result = test(s1, s2);
+ if (result != (i-j)) {
+ failed = true;
+ System.out.println("Failed diff size s1[" + i + "], s2[" + j + "]: result = " + result + ", expected " + (i-j));
+ }
+ }
+ }
+ // Compare strings with one char different and different sizes
+ for (int i = 1; i <= SIZE; i++) {
+ s1 = new String(a, 0, i);
+ for (int j = 0; j < i; j++) {
+ b[j] -= 3; // change char
+ s2 = new String(b, 0, i);
+ result = test(s1, s2);
+ int chdiff = a[j] - b[j];
+ if (result != chdiff) {
+ failed = true;
+ System.out.println("Failed diff char s1[" + i + "], s2[" + i + "]: result = " + result + ", expected " + chdiff);
+ }
+ result = test(s2, s1);
+ chdiff = b[j] - a[j];
+ if (result != chdiff) {
+ failed = true;
+ System.out.println("Failed diff char s2[" + i + "], s1[" + i + "]: result = " + result + ", expected " + chdiff);
+ }
+ b[j] += 3; // restore
+ }
+ }
+ if (failed) {
+ System.out.println("FAILED");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ private static int test(String str1, String str2) {
+ return str1.compareTo(str2);
+ }
+}