8154867: PPC64: Better byte behavior
authormdoerr
Fri, 22 Apr 2016 10:46:08 +0200
changeset 37488 ab63cdc0e14e
parent 37487 2ef46fd3ecef
child 37489 b2812e811d0d
8154867: PPC64: Better byte behavior Reviewed-by: goetz
hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp
hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Thu Apr 21 13:36:14 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Fri Apr 22 10:46:08 2016 +0200
@@ -360,7 +360,7 @@
     length.set_instruction(x->length());
     length.load_item();
   }
-  if (needs_store_check) {
+  if (needs_store_check || x->check_boolean()) {
     value.load_item();
   } else {
     value.load_for_store(x->elt_type());
@@ -407,7 +407,8 @@
     pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
                 true /* do_load */, false /* patch */, NULL);
   }
-  __ move(value.result(), array_addr, null_check_info);
+  LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
+  __ move(result, array_addr, null_check_info);
   if (obj_store) {
     // Precise card mark.
     post_barrier(LIR_OprFact::address(array_addr), value.result());
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu Apr 21 13:36:14 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Fri Apr 22 10:46:08 2016 +0200
@@ -169,6 +169,7 @@
     case ltos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
                break;
     case btos: // fall through
+    case ztos: // fall through
     case ctos: // fall through
     case stos: // fall through
     case itos: lwz(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
@@ -294,6 +295,7 @@
   switch (state) {
     case atos: push_ptr();                break;
     case btos:
+    case ztos:
     case ctos:
     case stos:
     case itos: push_i();                  break;
@@ -309,6 +311,7 @@
   switch (state) {
     case atos: pop_ptr();            break;
     case btos:
+    case ztos:
     case ctos:
     case stos:
     case itos: pop_i();              break;
@@ -749,6 +752,43 @@
   stdux(Rscratch2, R1_SP, Rscratch1); // atomically set *(SP = top_frame_sp) = **SP
 }
 
+void InterpreterMacroAssembler::narrow(Register result) {
+  Register ret_type = R11_scratch1;
+  ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
+  lbz(ret_type, in_bytes(ConstMethod::result_type_offset()), R11_scratch1);
+
+  Label notBool, notByte, notChar, done;
+
+  // common case first
+  cmpwi(CCR0, ret_type, T_INT);
+  beq(CCR0, done);
+
+  cmpwi(CCR0, ret_type, T_BOOLEAN);
+  bne(CCR0, notBool);
+  andi(result, result, 0x1);
+  b(done);
+
+  bind(notBool);
+  cmpwi(CCR0, ret_type, T_BYTE);
+  bne(CCR0, notByte);
+  extsb(result, result);
+  b(done);
+
+  bind(notByte);
+  cmpwi(CCR0, ret_type, T_CHAR);
+  bne(CCR0, notChar);
+  andi(result, result, 0xffff);
+  b(done);
+
+  bind(notChar);
+  // cmpwi(CCR0, ret_type, T_SHORT);  // all that's left
+  // bne(CCR0, done);
+  extsh(result, result);
+
+  // Nothing to do for T_INT
+  bind(done);
+}
+
 // Remove activation.
 //
 // Unlock the receiver if this is a synchronized method.
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Thu Apr 21 13:36:14 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Fri Apr 22 10:46:08 2016 +0200
@@ -140,6 +140,8 @@
   void get_cpool_and_tags(Register Rcpool, Register Rtags);
   void is_a(Label& L);
 
+  void narrow(Register result);
+
   // Java Call Helpers
   void call_from_interpreter(Register Rtarget_method, Register Rret_addr, Register Rscratch1, Register Rscratch2);
 
--- a/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp	Thu Apr 21 13:36:14 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp	Fri Apr 22 10:46:08 2016 +0200
@@ -655,6 +655,7 @@
   switch (state) {
     case ltos:
     case btos:
+    case ztos:
     case ctos:
     case stos:
     case atos:
@@ -701,6 +702,7 @@
   switch (state) {
     case ltos:
     case btos:
+    case ztos:
     case ctos:
     case stos:
     case atos:
@@ -2092,12 +2094,14 @@
   // Copied from TemplateTable::_return.
   // Restoration of lr done by remove_activation.
   switch (state) {
+    // Narrow result if state is itos but result type is smaller.
+    case itos: __ narrow(R17_tos); /* fall through */
     case ltos:
     case btos:
+    case ztos:
     case ctos:
     case stos:
-    case atos:
-    case itos: __ mr(R3_RET, R17_tos); break;
+    case atos: __ mr(R3_RET, R17_tos); break;
     case ftos:
     case dtos: __ fmr(F1_RET, F15_ftos); break;
     case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
@@ -2157,6 +2161,10 @@
     bname = "trace_code_btos {";
     tsize = 2;
     break;
+  case ztos:
+    bname = "trace_code_ztos {";
+    tsize = 2;
+    break;
   case ctos:
     bname = "trace_code_ctos {";
     tsize = 2;
--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Thu Apr 21 13:36:14 2016 -0700
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Fri Apr 22 10:46:08 2016 +0200
@@ -170,6 +170,7 @@
   switch (new_bc) {
     case Bytecodes::_fast_aputfield:
     case Bytecodes::_fast_bputfield:
+    case Bytecodes::_fast_zputfield:
     case Bytecodes::_fast_cputfield:
     case Bytecodes::_fast_dputfield:
     case Bytecodes::_fast_fputfield:
@@ -981,9 +982,21 @@
                  Rarray   = R12_scratch2,
                  Rscratch = R3_ARG1;
   __ pop_i(Rindex);
+  __ pop_ptr(Rarray);
   // tos: val
-  // Rarray: array ptr (popped by index_check)
-  __ index_check(Rarray, Rindex, 0, Rscratch, Rarray);
+
+  // Need to check whether array is boolean or byte
+  // since both types share the bastore bytecode.
+  __ load_klass(Rscratch, Rarray);
+  __ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch);
+  int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit());
+  __ testbitdi(CCR0, R0, Rscratch, diffbit);
+  Label L_skip;
+  __ bfalse(CCR0, L_skip);
+  __ andi(R17_tos, R17_tos, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
+  __ bind(L_skip);
+
+  __ index_check_without_pop(Rarray, Rindex, 0, Rscratch, Rarray);
   __ stb(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rarray);
 }
 
@@ -2108,12 +2121,16 @@
   __ remove_activation(state, /* throw_monitor_exception */ true);
   // Restoration of lr done by remove_activation.
   switch (state) {
+    // Narrow result if state is itos but result type is smaller.
+    // Need to narrow in the return bytecode rather than in generate_return_entry
+    // since compiled code callers expect the result to already be narrowed.
+    case itos: __ narrow(R17_tos); /* fall through */
     case ltos:
     case btos:
+    case ztos:
     case ctos:
     case stos:
-    case atos:
-    case itos: __ mr(R3_RET, R17_tos); break;
+    case atos: __ mr(R3_RET, R17_tos); break;
     case ftos:
     case dtos: __ fmr(F1_RET, F15_ftos); break;
     case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
@@ -2519,6 +2536,21 @@
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
   __ align(32, 28, 28); // Align load.
+  // __ bind(Lztos); (same code as btos)
+  __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
+  assert(branch_table[ztos] == 0, "can't compute twice");
+  branch_table[ztos] = __ pc(); // non-volatile_entry point
+  __ lbzx(R17_tos, Rclass_or_obj, Roffset);
+  __ extsb(R17_tos, R17_tos);
+  __ push(ztos);
+  if (!is_static) {
+    // use btos rewriting, no truncating to t/f bit is needed for getfield.
+    patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
+  }
+  __ beq(CCR6, Lacquire); // Volatile?
+  __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
+
+  __ align(32, 28, 28); // Align load.
   // __ bind(Lctos);
   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
   assert(branch_table[ctos] == 0, "can't compute twice");
@@ -2618,6 +2650,7 @@
         case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break;
         case Bytecodes::_fast_iputfield: // Fall through
         case Bytecodes::_fast_bputfield: // Fall through
+        case Bytecodes::_fast_zputfield: // Fall through
         case Bytecodes::_fast_cputfield: // Fall through
         case Bytecodes::_fast_sputfield: __ push_i(); offs+=  Interpreter::stackElementSize; break;
         case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break;
@@ -2658,6 +2691,7 @@
       case Bytecodes::_fast_aputfield: __ pop_ptr(); break;
       case Bytecodes::_fast_iputfield: // Fall through
       case Bytecodes::_fast_bputfield: // Fall through
+      case Bytecodes::_fast_zputfield: // Fall through
       case Bytecodes::_fast_cputfield: // Fall through
       case Bytecodes::_fast_sputfield: __ pop_i(); break;
       case Bytecodes::_fast_lputfield: __ pop_l(); break;
@@ -2825,6 +2859,21 @@
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
   __ align(32, 28, 28); // Align pop.
+  // __ bind(Lztos);
+  __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
+  assert(branch_table[ztos] == 0, "can't compute twice");
+  branch_table[ztos] = __ pc(); // non-volatile_entry point
+  __ pop(ztos);
+  if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
+  __ andi(R17_tos, R17_tos, 0x1);
+  __ stbx(R17_tos, Rclass_or_obj, Roffset);
+  if (!is_static) { patch_bytecode(Bytecodes::_fast_zputfield, Rbc, Rscratch, true, byte_no); }
+  if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+    __ beq(CR_is_vol, Lvolatile); // Volatile?
+  }
+  __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
+
+  __ align(32, 28, 28); // Align pop.
   // __ bind(Lctos);
   __ release(); // Volatile entry point (one instruction before non-volatile_entry point).
   assert(branch_table[ctos] == 0, "can't compute twice");
@@ -2949,6 +2998,9 @@
       __ stdx(R17_tos, Rclass_or_obj, Roffset);
       break;
 
+    case Bytecodes::_fast_zputfield:
+      __ andi(R17_tos, R17_tos, 0x1);  // boolean is true if LSB is 1
+      // fall through to bputfield
     case Bytecodes::_fast_bputfield:
       __ stbx(R17_tos, Rclass_or_obj, Roffset);
       break;