hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp
changeset 46726 7801367e3cc9
parent 46369 3bf4544bec14
child 46961 c9094b1e5f87
--- a/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp	Fri Jul 21 12:43:47 2017 +0200
+++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp	Thu Jul 27 15:36:15 2017 +0200
@@ -2022,17 +2022,41 @@
   z_stg(fp, _z_abi(callers_sp), Z_SP);
 }
 
-// Resize_frame with SP(new) = [addr].
-void MacroAssembler::resize_frame_absolute(Register addr, Register fp, bool load_fp) {
-  assert_different_registers(addr, fp, Z_SP);
-  if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
-
-  if (addr != Z_R0) {
-    // Minimize stalls by not using Z_SP immediately after update.
-    z_stg(fp, _z_abi(callers_sp), addr);
-    z_lgr(Z_SP, addr);
+// Resize_frame with SP(new) = [newSP] + offset.
+//   This emitter is useful if we already have calculated a pointer
+//   into the to-be-allocated stack space, e.g. with special alignment properties,
+//   but need some additional space, e.g. for spilling.
+//   newSP    is the pre-calculated pointer. It must not be modified.
+//   fp       holds, or is filled with, the frame pointer.
+//   offset   is the additional increment which is added to addr to form the new SP.
+//            Note: specify a negative value to reserve more space!
+//   load_fp == true  only indicates that fp is not pre-filled with the frame pointer.
+//                    It does not guarantee that fp contains the frame pointer at the end.
+void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) {
+  assert_different_registers(newSP, fp, Z_SP);
+
+  if (load_fp) {
+    z_lg(fp, _z_abi(callers_sp), Z_SP);
+  }
+
+  add2reg(Z_SP, offset, newSP);
+  z_stg(fp, _z_abi(callers_sp), Z_SP);
+}
+
+// Resize_frame with SP(new) = [newSP].
+//   load_fp == true  only indicates that fp is not pre-filled with the frame pointer.
+//                    It does not guarantee that fp contains the frame pointer at the end.
+void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) {
+  assert_different_registers(newSP, fp, Z_SP);
+
+  if (load_fp) {
+    z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store.
+  }
+
+  z_lgr(Z_SP, newSP);
+  if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses.
+    z_stg(fp, _z_abi(callers_sp), newSP);
   } else {
-    z_lgr(Z_SP, addr);
     z_stg(fp, _z_abi(callers_sp), Z_SP);
   }
 }
@@ -2040,17 +2064,12 @@
 // Resize_frame with SP(new) = SP(old) + offset.
 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
   assert_different_registers(fp, Z_SP);
-  if (load_fp) z_lg(fp, _z_abi(callers_sp), Z_SP);
-
-  if (Displacement::is_validDisp((int)_z_abi(callers_sp) + offset.constant_or_zero())) {
-    // Minimize stalls by first using, then updating Z_SP.
-    // Do that only if we have a small positive offset or if ExtImm are available.
-    z_stg(fp, Address(Z_SP, offset, _z_abi(callers_sp)));
-    add64(Z_SP, offset);
-  } else {
-    add64(Z_SP, offset);
-    z_stg(fp, _z_abi(callers_sp), Z_SP);
-  }
+
+  if (load_fp) {
+    z_lg(fp, _z_abi(callers_sp), Z_SP);
+  }
+  add64(Z_SP, offset);
+  z_stg(fp, _z_abi(callers_sp), Z_SP);
 }
 
 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
@@ -2063,32 +2082,32 @@
 #endif
   if (copy_sp) { z_lgr(old_sp, Z_SP); }
   if (bytes_with_inverted_sign) {
-    z_stg(old_sp, 0, bytes, Z_SP);
-    add2reg_with_index(Z_SP, 0, bytes, Z_SP);
+    z_agr(Z_SP, bytes);
   } else {
     z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
-    z_stg(old_sp, 0, Z_SP);
-  }
+  }
+  z_stg(old_sp, _z_abi(callers_sp), Z_SP);
 }
 
 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
   long offset = Assembler::align(bytes, frame::alignment_in_bytes);
-
-  if (Displacement::is_validDisp(-offset)) {
-    // Minimize stalls by first using, then updating Z_SP.
-    // Do that only if we have ExtImm available.
-    z_stg(Z_SP, -offset, Z_SP);
-    add2reg(Z_SP, -offset);
-  } else {
-    if (scratch != Z_R0 && scratch != Z_R1) {
-      z_stg(Z_SP, -offset, Z_SP);
-      add2reg(Z_SP, -offset);
-    } else {   // scratch == Z_R0 || scratch == Z_R1
-      z_lgr(scratch, Z_SP);
-      add2reg(Z_SP, -offset);
-      z_stg(scratch, 0, Z_SP);
-    }
-  }
+  assert(offset > 0, "should push a frame with positive size, size = %ld.", offset);
+  assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset);
+
+  // We must not write outside the current stack bounds (given by Z_SP).
+  // Thus, we have to first update Z_SP and then store the previous SP as stack linkage.
+  // We rely on Z_R0 by default to be available as scratch.
+  z_lgr(scratch, Z_SP);
+  add2reg(Z_SP, -offset);
+  z_stg(scratch, _z_abi(callers_sp), Z_SP);
+#ifdef ASSERT
+  // Just make sure nobody uses the value in the default scratch register.
+  // When another register is used, the caller might rely on it containing the frame pointer.
+  if (scratch == Z_R0) {
+    z_iihf(scratch, 0xbaadbabe);
+    z_iilf(scratch, 0xdeadbeef);
+  }
+#endif
   return offset;
 }
 
@@ -2106,6 +2125,20 @@
   Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
 }
 
+// Pop current C frame and restore return PC register (Z_R14).
+void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) {
+  BLOCK_COMMENT("pop_frame_restore_retPC:");
+  int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes;
+  // If possible, pop frame by add instead of load (a penny saved is a penny got :-).
+  if (Displacement::is_validDisp(retPC_offset)) {
+    z_lg(Z_R14, retPC_offset, Z_SP);
+    add2reg(Z_SP, frame_size_in_bytes);
+  } else {
+    add2reg(Z_SP, frame_size_in_bytes);
+    restore_return_pc();
+  }
+}
+
 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
   if (allow_relocation) {
     call_c(entry_point);
@@ -3485,6 +3518,17 @@
 // Purpose: record the previous value if it is not null.
 // All non-tmps are preserved.
 //------------------------------------------------------
+// Note: Rpre_val needs special attention.
+//   The flag pre_val_needed indicated that the caller of this emitter function
+//   relies on Rpre_val containing the correct value, that is:
+//     either the value it contained on entry to this code segment
+//     or the value that was loaded into the register from (Robj+offset).
+//
+//   Independent from this requirement, the contents of Rpre_val must survive
+//   the push_frame() operation. push_frame() uses Z_R0_scratch by default
+//   to temporarily remember the frame pointer.
+//   If Rpre_val is assigned Z_R0_scratch by the caller, code must be emitted to
+//   save it's value.
 void MacroAssembler::g1_write_barrier_pre(Register           Robj,
                                           RegisterOrConstant offset,
                                           Register           Rpre_val,      // Ideally, this is a non-volatile register.
@@ -3498,6 +3542,16 @@
   const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf());
   const int index_offset  = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index());
   assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp<i> must be Z_R0!!
+  assert_different_registers(Robj, Z_R0_scratch);         // Used for addressing. Furthermore, push_frame destroys Z_R0!!
+  assert_different_registers(Rval, Z_R0_scratch);         // push_frame destroys Z_R0!!
+
+#ifdef ASSERT
+  // make sure the register is not Z_R0. Used for addressing. Furthermore, would be destroyed by push_frame.
+  if (offset.is_register() && offset.as_register()->encoding() == 0) {
+    tty->print_cr("Roffset(g1_write_barrier_pre)  = %%r%d", offset.as_register()->encoding());
+    assert(false, "bad register for offset");
+  }
+#endif
 
   BLOCK_COMMENT("g1_write_barrier_pre {");
 
@@ -3511,7 +3565,10 @@
   }
   z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently.
 
-  // Do we need to load the previous value into Rpre_val?
+  assert(Rpre_val != noreg, "must have a real register");
+
+
+  // If an object is given, we need to load the previous value into Rpre_val.
   if (Robj != noreg) {
     // Load the previous value...
     Register ixReg = offset.is_register() ? offset.register_or_noreg() : Z_R0;
@@ -3521,9 +3578,9 @@
       z_lg(Rpre_val, offset.constant_or_zero(), ixReg, Robj);
     }
   }
-  assert(Rpre_val != noreg, "must have a real register");
 
   // Is the previous value NULL?
+  // If so, we don't need to record it and we're done.
   // Note: pre_val is loaded, decompressed and stored (directly or via runtime call).
   //       Register contents is preserved across runtime call if caller requests to do so.
   z_ltgr(Rpre_val, Rpre_val);
@@ -3540,6 +3597,7 @@
   // only if index > 0. Otherwise, we need runtime to handle.
   // (The index field is typed as size_t.)
   Register Rbuffer = Rtmp1, Rindex = Rtmp2;
+  assert_different_registers(Rbuffer, Rindex, Rpre_val);
 
   z_lg(Rbuffer, buffer_offset, Z_thread);
 
@@ -3558,16 +3616,8 @@
 
   bind(callRuntime);
 
-  // Save Rpre_val (result) over runtime call.
-  // Requires Rtmp1, Rtmp2, or Rpre_val to be non-volatile.
-  Register Rpre_save = Rpre_val;
-  if (pre_val_needed && Rpre_val->is_volatile()) {
-    guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
-    Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
-  }
-  lgr_if_needed(Rpre_save, Rpre_val);
-
-  // Preserve inputs by spilling them into the top frame.
+  // Save some registers (inputs and result) over runtime call
+  // by spilling them into the top frame.
   if (Robj != noreg && Robj->is_volatile()) {
     z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP);
   }
@@ -3579,11 +3629,20 @@
     z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
   }
 
+  // Save Rpre_val (result) over runtime call.
+  Register Rpre_save = Rpre_val;
+  if ((Rpre_val == Z_R0_scratch) || (pre_val_needed && Rpre_val->is_volatile())) {
+    guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!");
+    Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2;
+  }
+  lgr_if_needed(Rpre_save, Rpre_val);
+
   // Push frame to protect top frame with return pc and spilled register values.
   save_return_pc();
-  push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
-
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, Z_thread);
+  push_frame_abi160(0); // Will use Z_R0 as tmp.
+
+  // Rpre_val may be destroyed by push_frame().
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_save, Z_thread);
 
   pop_frame();
   restore_return_pc();
@@ -3599,9 +3658,9 @@
   if (Rval != noreg && Rval->is_volatile()) {
     z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP);
   }
-
-  // Restore Rpre_val (result) after runtime call.
-  lgr_if_needed(Rpre_val, Rpre_save);
+  if (pre_val_needed && Rpre_val->is_volatile()) {
+    lgr_if_needed(Rpre_val, Rpre_save);
+  }
 
   bind(filtered);
   BLOCK_COMMENT("} g1_write_barrier_pre");
@@ -3654,7 +3713,7 @@
   // calculate address of card
   load_const_optimized(Rbase, (address)bs->byte_map_base);        // Card table base.
   z_srlg(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); // Index into card table.
-  add2reg_with_index(Rcard_addr, 0, Rcard_addr, Rbase);           // Explicit calculation needed for cli.
+  z_algr(Rcard_addr, Rbase);                                      // Explicit calculation needed for cli.
   Rbase = noreg; // end of lifetime
 
   // Filter young.
@@ -3698,6 +3757,7 @@
 
   // TODO: do we need a frame? Introduced to be on the safe side.
   bool needs_frame = true;
+  lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
 
   // VM call need frame to access(write) O register.
   if (needs_frame) {
@@ -3706,7 +3766,7 @@
   }
 
   // Save the live input values.
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr_x, Z_thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, Z_thread);
 
   if (needs_frame) {
     pop_frame();
@@ -4062,7 +4122,12 @@
 void MacroAssembler::store_klass_gap(Register s, Register d) {
   if (UseCompressedClassPointers) {
     assert(s != d, "not enough registers");
-    z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
+    // Support s = noreg.
+    if (s != noreg) {
+      z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
+    } else {
+      z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
+    }
   }
 }
 
@@ -6621,11 +6686,12 @@
 
   BLOCK_COMMENT("verify_oop {");
   Register tmp = Z_R0;
-  unsigned int nbytes_save = 6 *8;
+  unsigned int nbytes_save = 5*BytesPerWord;
   address entry = StubRoutines::verify_oop_subroutine_entry_address();
+
   save_return_pc();
   push_frame_abi160(nbytes_save);
-  z_stmg(Z_R0, Z_R5, 160, Z_SP);
+  z_stmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP);
 
   z_lgr(Z_ARG2, oop);
   load_const(Z_ARG1, (address) msg);
@@ -6633,10 +6699,10 @@
   z_lg(Z_R1, 0, Z_R1);
   call_c(Z_R1);
 
-  z_lmg(Z_R0, Z_R5, 160, Z_SP);
+  z_lmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP);
   pop_frame();
-
   restore_return_pc();
+
   BLOCK_COMMENT("} verify_oop ");
 }
 
@@ -6658,8 +6724,8 @@
   // Setup arguments.
   load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
   load_const(Z_ARG2, (void*) msg);
-  get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address.
-  save_return_pc();    // Saves return pc Z_R14.
+  get_PC(Z_R14);     // Following code pushes a frame without entering a new function. Use current pc as return address.
+  save_return_pc();  // Saves return pc Z_R14.
   push_frame_abi160(0);
   call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
   // The plain disassembler does not recognize illtrap. It instead displays