8222271: [s390] optimize register usage in C2 instruction forms for clearing arrays
authorrrich
Tue, 16 Apr 2019 08:51:01 +0200
changeset 54542 0a4214c90a55
parent 54541 9ff8d175035d
child 54543 4fc566b7a9c0
8222271: [s390] optimize register usage in C2 instruction forms for clearing arrays Reviewed-by: mdoerr, lucy
src/hotspot/cpu/s390/macroAssembler_s390.cpp
src/hotspot/cpu/s390/macroAssembler_s390.hpp
src/hotspot/cpu/s390/s390.ad
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Tue Apr 09 18:46:51 2019 +0800
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Tue Apr 16 08:51:01 2019 +0200
@@ -4355,12 +4355,9 @@
 // Emitter does not KILL cnt and base arguments, since they need to be copied to
 // work registers anyway.
 // Actually, only r0, r1, and r5 are killed.
-unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
-  // Src_addr is evenReg.
-  // Src_len is odd_Reg.
+unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) {
 
   int      block_start = offset();
-  Register tmp_reg  = src_len; // Holds target instr addr for EX.
   Register dst_len  = Z_R1;    // Holds dst len  for MVCLE.
   Register dst_addr = Z_R0;    // Holds dst addr for MVCLE.
 
@@ -4369,7 +4366,7 @@
   BLOCK_COMMENT("Clear_Array {");
 
   // Check for zero len and convert to long.
-  z_ltgfr(src_len, cnt_arg);      // Remember casted value for doSTG case.
+  z_ltgfr(odd_tmp_reg, cnt_arg);
   z_bre(done);                    // Nothing to do if len == 0.
 
   // Prefetch data to be cleared.
@@ -4378,16 +4375,17 @@
     z_pfd(0x02, 256, Z_R0, base_pointer_arg);
   }
 
-  z_sllg(dst_len, src_len, 3);    // #bytes to clear.
-  z_cghi(src_len, 32);            // Check for len <= 256 bytes (<=32 DW).
-  z_brnh(doXC);                   // If so, use executed XC to clear.
+  z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear.
+  z_cghi(odd_tmp_reg, 32);         // Check for len <= 256 bytes (<=32 DW).
+  z_brnh(doXC);                    // If so, use executed XC to clear.
 
   // MVCLE: initialize long arrays (general case).
   bind(doMVCLE);
   z_lgr(dst_addr, base_pointer_arg);
-  clear_reg(src_len, true, false); // Src len of MVCLE is zero.
-
-  MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
+  // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
+  // The even register of the register pair is not killed.
+  clear_reg(odd_tmp_reg, true, false);
+  MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0);
   z_bru(done);
 
   // XC: initialize short arrays.
@@ -4396,12 +4394,12 @@
     z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
 
   bind(doXC);
-    add2reg(dst_len, -1);             // Get #bytes-1 for EXECUTE.
+    add2reg(dst_len, -1);               // Get #bytes-1 for EXECUTE.
     if (VM_Version::has_ExecuteExtensions()) {
-      z_exrl(dst_len, XC_template);   // Execute XC with var. len.
+      z_exrl(dst_len, XC_template);     // Execute XC with var. len.
     } else {
-      z_larl(tmp_reg, XC_template);
-      z_ex(dst_len,0,Z_R0,tmp_reg);   // Execute XC with var. len.
+      z_larl(odd_tmp_reg, XC_template);
+      z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len.
     }
     // z_bru(done);      // fallthru
 
@@ -4463,7 +4461,7 @@
 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
 // Emitter does not KILL cnt and base arguments, since they need to be copied to
 // work registers anyway.
-// Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
+// Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed.
 //
 // For very large arrays, exploit MVCLE H/W support.
 // MVCLE instruction automatically exploits H/W-optimized page mover.
@@ -4471,9 +4469,7 @@
 // - All full pages are cleared with the page mover H/W assist.
 // - Remaining bytes are again cleared by a series of XC to self.
 //
-unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
-  // Src_addr is evenReg.
-  // Src_len is odd_Reg.
+unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) {
 
   int      block_start = offset();
   Register dst_len  = Z_R1;      // Holds dst len  for MVCLE.
@@ -4486,11 +4482,10 @@
 
   // Prepare other args to MVCLE.
   z_lgr(dst_addr, base_pointer_arg);
-  // Indicate unused result.
-  (void) clear_reg(src_len, true, false);  // Src len of MVCLE is zero.
-
-  // Clear.
-  MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
+  // Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
+  // The even register of the register pair is not killed.
+  (void) clear_reg(odd_tmp_reg, true, false);  // Src len of MVCLE is zero.
+  MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0);
   BLOCK_COMMENT("} Clear_Array_Const_Big");
 
   int block_end = offset();
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp	Tue Apr 09 18:46:51 2019 +0800
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp	Tue Apr 16 08:51:01 2019 +0200
@@ -828,9 +828,9 @@
   //--------------------------
   //---  Operations on arrays.
   //--------------------------
-  unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len);
+  unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg);
   unsigned int Clear_Array_Const(long cnt, Register base);
-  unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len);
+  unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg);
   unsigned int CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
                                              Register cnt_reg,
                                              Register tmp1_reg, Register tmp2_reg);
--- a/src/hotspot/cpu/s390/s390.ad	Tue Apr 09 18:46:51 2019 +0800
+++ b/src/hotspot/cpu/s390/s390.ad	Tue Apr 16 08:51:01 2019 +0200
@@ -474,6 +474,19 @@
 /*Z_R15_H,Z_R15*/   // SP
 );
 
+// z_long_reg without even registers
+reg_class z_long_odd_reg(
+/*Z_R0_H,Z_R0*/     // R0
+/*Z_R1_H,Z_R1*/
+  Z_R3_H,Z_R3,
+  Z_R5_H,Z_R5,
+  Z_R7_H,Z_R7,
+  Z_R9_H,Z_R9,
+  Z_R11_H,Z_R11,
+  Z_R13_H,Z_R13
+/*Z_R14_H,Z_R14,*/  // return_pc
+/*Z_R15_H,Z_R15*/   // SP
+);
 
 // Special Class for Condition Code Flags Register
 
@@ -3378,6 +3391,7 @@
   match(RegL);
   match(revenRegL);
   match(roddRegL);
+  match(allRoddRegL);
   match(rarg1RegL);
   match(rarg5RegL);
   format %{ %}
@@ -3400,6 +3414,14 @@
   interface(REG_INTER);
 %}
 
+// available odd registers for iRegL
+operand allRoddRegL() %{
+  constraint(ALLOC_IN_RC(z_long_odd_reg));
+  match(iRegL);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 operand rarg1RegL() %{
   constraint(ALLOC_IN_RC(z_rarg1_long_reg));
   match(iRegL);
@@ -9899,23 +9921,23 @@
   ins_pipe(pipe_class_dummy);
 %}
 
-instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
+instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
   match(Set dummy (ClearArray cnt base));
-  effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
+  effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
   ins_cost(200);
   // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to optimized constant loader.
   format %{ "ClearArrayConstBig $cnt,$base" %}
-  ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %}
-  ins_pipe(pipe_class_dummy);
-%}
-
-instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
+  ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $tmpL$$Register); %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
   match(Set dummy (ClearArray cnt base));
-  effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
+  effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
   ins_cost(300);
   // TODO: s390 port size(FIXED_SIZE);  // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
   format %{ "ClearArrayVar $cnt,$base" %}
-  ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %}
+  ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $tmpL$$Register); %}
   ins_pipe(pipe_class_dummy);
 %}