8214205: PPC64: Add instructions for counting trailing zeros
authormhorie
Thu, 22 Nov 2018 21:43:37 -0500
changeset 52667 a959583eea01
parent 52666 4bef1957a1d8
child 52668 02747dfbd776
8214205: PPC64: Add instructions for counting trailing zeros Reviewed-by: mdoerr, gromero
src/hotspot/cpu/ppc/assembler_ppc.hpp
src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
src/hotspot/cpu/ppc/globals_ppc.hpp
src/hotspot/cpu/ppc/ppc.ad
src/hotspot/cpu/ppc/vm_version_ppc.cpp
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp	Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp	Thu Nov 22 21:43:37 2018 -0500
@@ -397,6 +397,7 @@
     LWAX_OPCODE   = (31u << OPCODE_SHIFT | 341u << XO_21_30_SHIFT), // X-FORM
 
     CNTLZW_OPCODE = (31u << OPCODE_SHIFT |  26u << XO_21_30_SHIFT), // X-FORM
+    CNTTZW_OPCODE = (31u << OPCODE_SHIFT | 538u << XO_21_30_SHIFT), // X-FORM
 
     // 64 bit opcode encodings
 
@@ -428,6 +429,7 @@
     DIVD_OPCODE   = (31u << OPCODE_SHIFT | 489u << 1),              // XO-FORM
 
     CNTLZD_OPCODE = (31u << OPCODE_SHIFT |  58u << XO_21_30_SHIFT), // X-FORM
+    CNTTZD_OPCODE = (31u << OPCODE_SHIFT | 570u << XO_21_30_SHIFT), // X-FORM
     NAND_OPCODE   = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM
     NOR_OPCODE    = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM
 
@@ -1500,6 +1502,10 @@
   inline void cntlzw_( Register a, Register s);
   inline void cntlzd(  Register a, Register s);
   inline void cntlzd_( Register a, Register s);
+  inline void cnttzw(  Register a, Register s);
+  inline void cnttzw_( Register a, Register s);
+  inline void cnttzd(  Register a, Register s);
+  inline void cnttzd_( Register a, Register s);
 
   // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
   inline void sld(     Register a, Register s, Register b);
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp	Thu Nov 22 21:43:37 2018 -0500
@@ -235,6 +235,10 @@
 inline void Assembler::cntlzw_( Register a, Register s)              { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::cntlzd(  Register a, Register s)              { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(0)); }
 inline void Assembler::cntlzd_( Register a, Register s)              { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(1)); }
+inline void Assembler::cnttzw(  Register a, Register s)              { emit_int32(CNTTZW_OPCODE | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::cnttzw_( Register a, Register s)              { emit_int32(CNTTZW_OPCODE | rta(a) | rs(s) | rc(1)); }
+inline void Assembler::cnttzd(  Register a, Register s)              { emit_int32(CNTTZD_OPCODE | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::cnttzd_( Register a, Register s)              { emit_int32(CNTTZD_OPCODE | rta(a) | rs(s) | rc(1)); }
 
 // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
 inline void Assembler::sld(     Register a, Register s, Register b)  { emit_int32(SLD_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp	Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp	Thu Nov 22 21:43:37 2018 -0500
@@ -126,6 +126,9 @@
   product(bool, UseCountLeadingZerosInstructionsPPC64, true,                \
           "Use count leading zeros instructions.")                          \
                                                                             \
+  product(bool, UseCountTrailingZerosInstructionsPPC64, false,              \
+          "Use count trailing zeros instructions.")                         \
+                                                                            \
   product(bool, UseExtendedLoadAndReserveInstructionsPPC64, false,          \
           "Use extended versions of load-and-reserve instructions.")        \
                                                                             \
--- a/src/hotspot/cpu/ppc/ppc.ad	Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/ppc.ad	Thu Nov 22 21:43:37 2018 -0500
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2012, 2017 SAP SE. All rights reserved.
+// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2012, 2018 SAP SE. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -2209,9 +2209,13 @@
     return VM_Version::has_fsqrt();
   case Op_CountLeadingZerosI:
   case Op_CountLeadingZerosL:
+    if (!UseCountLeadingZerosInstructionsPPC64)
+      return false;
+    break;
   case Op_CountTrailingZerosI:
   case Op_CountTrailingZerosL:
-    if (!UseCountLeadingZerosInstructionsPPC64)
+    if (!UseCountLeadingZerosInstructionsPPC64 &&
+        !UseCountTrailingZerosInstructionsPPC64)
       return false;
     break;
 
@@ -13425,7 +13429,7 @@
 
 instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CountTrailingZerosI src));
-  predicate(UseCountLeadingZerosInstructionsPPC64);
+  predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -13442,9 +13446,22 @@
   %}
 %}
 
+instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
+  match(Set dst (CountTrailingZerosI src));
+  predicate(UseCountTrailingZerosInstructionsPPC64);
+  ins_cost(DEFAULT_COST);
+
+  format %{ "CNTTZW  $dst, $src" %}
+  size(4);
+  ins_encode %{
+    __ cnttzw($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
   match(Set dst (CountTrailingZerosL src));
-  predicate(UseCountLeadingZerosInstructionsPPC64);
+  predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
   ins_cost(DEFAULT_COST);
 
   expand %{
@@ -13460,6 +13477,19 @@
  %}
 %}
 
+instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
+  match(Set dst (CountTrailingZerosL src));
+  predicate(UseCountTrailingZerosInstructionsPPC64);
+  ins_cost(DEFAULT_COST);
+
+  format %{ "CNTTZD  $dst, $src" %}
+  size(4);
+  ins_encode %{
+    __ cnttzd($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // Expand nodes for byte_reverse_int.
 instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
   effect(DEF dst, USE src, USE pos, USE shift);
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp	Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp	Thu Nov 22 21:43:37 2018 -0500
@@ -129,6 +129,17 @@
     }
   }
   MaxVectorSize = SuperwordUseVSX ? 16 : 8;
+
+  if (PowerArchitecturePPC64 >= 9) {
+    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionsPPC64)) {
+      FLAG_SET_ERGO(bool, UseCountTrailingZerosInstructionsPPC64, true);
+    }
+  } else {
+    if (UseCountTrailingZerosInstructionsPPC64) {
+      warning("UseCountTrailingZerosInstructionsPPC64 specified, but needs at least Power9.");
+      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionsPPC64, false);
+    }
+  }
 #endif
 
   // Create and print feature-string.