8205582: PPC64: RTM: Fix counter for aborts on nested transactions
authorgromero
Fri, 06 Jul 2018 16:25:07 -0400
changeset 51147 a1a53d240353
parent 51146 08c3167e2d22
child 51148 959dbf7e96d0
8205582: PPC64: RTM: Fix counter for aborts on nested transactions Reviewed-by: mdoerr, goetz
src/hotspot/cpu/ppc/assembler_ppc.hpp
src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp	Tue Jun 26 08:33:17 2018 -0400
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp	Fri Jul 06 16:25:07 2018 -0400
@@ -1635,6 +1635,7 @@
   // TEXASR bit description
   enum transaction_failure_reason {
     // Upper half (TEXASRU):
+    tm_failure_code       =  0, // The Failure Code is copied from tabort or treclaim operand.
     tm_failure_persistent =  7, // The failure is likely to recur on each execution.
     tm_disallowed         =  8, // The instruction is not permitted.
     tm_nesting_of         =  9, // The maximum transaction level was exceeded.
@@ -1650,6 +1651,7 @@
     tm_failure_summary    = 36, // Failure has been detected and recorded.
     tm_tfiar_exact        = 37, // Value in the TFIAR is exact.
     tm_rot                = 38, // Rollback-only transaction.
+    tm_transaction_level  = 52, // Transaction level (nesting depth + 1).
   };
 
   // PPC 1, section 2.4.1 Branch Instructions
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Tue Jun 26 08:33:17 2018 -0400
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Fri Jul 06 16:25:07 2018 -0400
@@ -2412,7 +2412,7 @@
 
 // Update rtm_counters based on abort status
 // input: abort_status
-//        rtm_counters (RTMLockingCounters*)
+//        rtm_counters_Reg (RTMLockingCounters*)
 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
   // Mapping to keep PreciseRTMLockingStatistics similar to x86.
   // x86 ppc (! means inverted, ? means not the same)
@@ -2422,52 +2422,96 @@
   //  3   10  Set if an internal buffer overflowed.
   //  4  ?12  Set if a debug breakpoint was hit.
   //  5  ?32  Set if an abort occurred during execution of a nested transaction.
-  const  int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
-                                 Assembler::tm_failure_persistent, // inverted: transient
-                                 Assembler::tm_trans_cf,
-                                 Assembler::tm_footprint_of,
-                                 Assembler::tm_non_trans_cf,
-                                 Assembler::tm_suspended};
-  const bool tm_failure_inv[] = {false, true, false, false, false, false};
-  assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
-
-  const Register addr_Reg = R0;
-  // Keep track of offset to where rtm_counters_Reg had pointed to.
+  const int failure_bit[] = {tm_tabort, // Signal handler will set this too.
+                             tm_failure_persistent,
+                             tm_non_trans_cf,
+                             tm_trans_cf,
+                             tm_footprint_of,
+                             tm_failure_code,
+                             tm_transaction_level};
+
+  const int num_failure_bits = sizeof(failure_bit) / sizeof(int);
+  const int num_counters = RTMLockingCounters::ABORT_STATUS_LIMIT;
+
+  const int bit2counter_map[][num_counters] =
+  // 0 = no map; 1 = mapped, no inverted logic; -1 = mapped, inverted logic
+  // Inverted logic means that if a bit is set don't count it, or vice-versa.
+  // Care must be taken when mapping bits to counters as bits for a given
+  // counter must be mutually exclusive. Otherwise, the counter will be
+  // incremented more than once.
+  // counters:
+  // 0        1        2         3         4         5
+  // abort  , persist, conflict, overflow, debug   , nested         bits:
+  {{ 1      , 0      , 0       , 0       , 0       , 0      },   // abort
+   { 0      , -1     , 0       , 0       , 0       , 0      },   // failure_persistent
+   { 0      , 0      , 1       , 0       , 0       , 0      },   // non_trans_cf
+   { 0      , 0      , 1       , 0       , 0       , 0      },   // trans_cf
+   { 0      , 0      , 0       , 1       , 0       , 0      },   // footprint_of
+   { 0      , 0      , 0       , 0       , -1      , 0      },   // failure_code = 0xD4
+   { 0      , 0      , 0       , 0       , 0       , 1      }};  // transaction_level > 1
+  // ...
+
+  // Move abort_status value to R0 and use abort_status register as a
+  // temporary register because R0 as third operand in ld/std is treated
+  // as base address zero (value). Likewise, R0 as second operand in addi
+  // is problematic because it amounts to li.
+  const Register temp_Reg = abort_status;
+  const Register abort_status_R0 = R0;
+  mr(abort_status_R0, abort_status);
+
+  // Increment total abort counter.
   int counters_offs = RTMLockingCounters::abort_count_offset();
-  addi(addr_Reg, rtm_counters_Reg, counters_offs);
-  const Register temp_Reg = rtm_counters_Reg;
-
-  //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
-  ldx(temp_Reg, addr_Reg);
+  ld(temp_Reg, counters_offs, rtm_counters_Reg);
   addi(temp_Reg, temp_Reg, 1);
-  stdx(temp_Reg, addr_Reg);
-
+  std(temp_Reg, counters_offs, rtm_counters_Reg);
+
+  // Increment specific abort counters.
   if (PrintPreciseRTMLockingStatistics) {
-    int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
-
-    //mftexasr(abort_status); done by caller
-    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
-      counters_offs += counters_offs_delta;
-      li(temp_Reg, counters_offs_delta); // can't use addi with R0
-      add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
-      counters_offs_delta = sizeof(uintx);
-
-      Label check_abort;
-      rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
-      if (tm_failure_inv[i]) {
-        bne(CCR0, check_abort);
-      } else {
-        beq(CCR0, check_abort);
+
+    // #0 counter offset.
+    int abortX_offs = RTMLockingCounters::abortX_count_offset();
+
+    for (int nbit = 0; nbit < num_failure_bits; nbit++) {
+      for (int ncounter = 0; ncounter < num_counters; ncounter++) {
+        if (bit2counter_map[nbit][ncounter] != 0) {
+          Label check_abort;
+          int abort_counter_offs = abortX_offs + (ncounter << 3);
+
+          if (failure_bit[nbit] == tm_transaction_level) {
+            // Don't check outer transaction, TL = 1 (bit 63). Hence only
+            // 11 bits in the TL field are checked to find out if failure
+            // occured in a nested transaction. This check also matches
+            // the case when nesting_of = 1 (nesting overflow).
+            rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10);
+          } else if (failure_bit[nbit] == tm_failure_code) {
+            // Check failure code for trap or illegal caught in TM.
+            // Bits 0:7 are tested as bit 7 (persistent) is copied from
+            // tabort or treclaim source operand.
+            // On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4).
+            rldicl(temp_Reg, abort_status_R0, 8, 56);
+            cmpdi(CCR0, temp_Reg, 0xD4);
+          } else {
+            rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0);
+          }
+
+          if (bit2counter_map[nbit][ncounter] == 1) {
+            beq(CCR0, check_abort);
+          } else {
+            bne(CCR0, check_abort);
+          }
+
+          // We don't increment atomically.
+          ld(temp_Reg, abort_counter_offs, rtm_counters_Reg);
+          addi(temp_Reg, temp_Reg, 1);
+          std(temp_Reg, abort_counter_offs, rtm_counters_Reg);
+
+          bind(check_abort);
+        }
       }
-      //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
-      ldx(temp_Reg, addr_Reg);
-      addi(temp_Reg, temp_Reg, 1);
-      stdx(temp_Reg, addr_Reg);
-      bind(check_abort);
     }
   }
-  li(temp_Reg, -counters_offs); // can't use addi with R0
-  add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
+  // Restore abort_status.
+  mr(abort_status, abort_status_R0);
 }
 
 // Branch if (random & (count-1) != 0), count is 2^n