8214205: PPC64: Add instructions for counting trailing zeros
Reviewed-by: mdoerr, gromero
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp Thu Nov 22 21:43:37 2018 -0500
@@ -397,6 +397,7 @@
LWAX_OPCODE = (31u << OPCODE_SHIFT | 341u << XO_21_30_SHIFT), // X-FORM
CNTLZW_OPCODE = (31u << OPCODE_SHIFT | 26u << XO_21_30_SHIFT), // X-FORM
+ CNTTZW_OPCODE = (31u << OPCODE_SHIFT | 538u << XO_21_30_SHIFT), // X-FORM
// 64 bit opcode encodings
@@ -428,6 +429,7 @@
DIVD_OPCODE = (31u << OPCODE_SHIFT | 489u << 1), // XO-FORM
CNTLZD_OPCODE = (31u << OPCODE_SHIFT | 58u << XO_21_30_SHIFT), // X-FORM
+ CNTTZD_OPCODE = (31u << OPCODE_SHIFT | 570u << XO_21_30_SHIFT), // X-FORM
NAND_OPCODE = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM
NOR_OPCODE = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM
@@ -1500,6 +1502,10 @@
inline void cntlzw_( Register a, Register s);
inline void cntlzd( Register a, Register s);
inline void cntlzd_( Register a, Register s);
+ inline void cnttzw( Register a, Register s);
+ inline void cnttzw_( Register a, Register s);
+ inline void cnttzd( Register a, Register s);
+ inline void cnttzd_( Register a, Register s);
// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
inline void sld( Register a, Register s, Register b);
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp Thu Nov 22 21:43:37 2018 -0500
@@ -235,6 +235,10 @@
inline void Assembler::cntlzw_( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); }
inline void Assembler::cntlzd( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(0)); }
inline void Assembler::cntlzd_( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(1)); }
+inline void Assembler::cnttzw( Register a, Register s) { emit_int32(CNTTZW_OPCODE | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::cnttzw_( Register a, Register s) { emit_int32(CNTTZW_OPCODE | rta(a) | rs(s) | rc(1)); }
+inline void Assembler::cnttzd( Register a, Register s) { emit_int32(CNTTZD_OPCODE | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::cnttzd_( Register a, Register s) { emit_int32(CNTTZD_OPCODE | rta(a) | rs(s) | rc(1)); }
// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
inline void Assembler::sld( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); }
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp Thu Nov 22 21:43:37 2018 -0500
@@ -126,6 +126,9 @@
product(bool, UseCountLeadingZerosInstructionsPPC64, true, \
"Use count leading zeros instructions.") \
\
+ product(bool, UseCountTrailingZerosInstructionsPPC64, false, \
+ "Use count trailing zeros instructions.") \
+ \
product(bool, UseExtendedLoadAndReserveInstructionsPPC64, false, \
"Use extended versions of load-and-reserve instructions.") \
\
--- a/src/hotspot/cpu/ppc/ppc.ad Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/ppc.ad Thu Nov 22 21:43:37 2018 -0500
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2012, 2017 SAP SE. All rights reserved.
+// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2012, 2018 SAP SE. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -2209,9 +2209,13 @@
return VM_Version::has_fsqrt();
case Op_CountLeadingZerosI:
case Op_CountLeadingZerosL:
+ if (!UseCountLeadingZerosInstructionsPPC64)
+ return false;
+ break;
case Op_CountTrailingZerosI:
case Op_CountTrailingZerosL:
- if (!UseCountLeadingZerosInstructionsPPC64)
+ if (!UseCountLeadingZerosInstructionsPPC64 &&
+ !UseCountTrailingZerosInstructionsPPC64)
return false;
break;
@@ -13425,7 +13429,7 @@
instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
match(Set dst (CountTrailingZerosI src));
- predicate(UseCountLeadingZerosInstructionsPPC64);
+ predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
ins_cost(DEFAULT_COST);
expand %{
@@ -13442,9 +13446,22 @@
%}
%}
+instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
+ match(Set dst (CountTrailingZerosI src));
+ predicate(UseCountTrailingZerosInstructionsPPC64);
+ ins_cost(DEFAULT_COST);
+
+ format %{ "CNTTZW $dst, $src" %}
+ size(4);
+ ins_encode %{
+ __ cnttzw($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
match(Set dst (CountTrailingZerosL src));
- predicate(UseCountLeadingZerosInstructionsPPC64);
+ predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
ins_cost(DEFAULT_COST);
expand %{
@@ -13460,6 +13477,19 @@
%}
%}
+instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
+ match(Set dst (CountTrailingZerosL src));
+ predicate(UseCountTrailingZerosInstructionsPPC64);
+ ins_cost(DEFAULT_COST);
+
+ format %{ "CNTTZD $dst, $src" %}
+ size(4);
+ ins_encode %{
+ __ cnttzd($dst$$Register, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Expand nodes for byte_reverse_int.
instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
effect(DEF dst, USE src, USE pos, USE shift);
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp Thu Nov 22 10:15:32 2018 -0800
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp Thu Nov 22 21:43:37 2018 -0500
@@ -129,6 +129,17 @@
}
}
MaxVectorSize = SuperwordUseVSX ? 16 : 8;
+
+ if (PowerArchitecturePPC64 >= 9) {
+ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionsPPC64)) {
+ FLAG_SET_ERGO(bool, UseCountTrailingZerosInstructionsPPC64, true);
+ }
+ } else {
+ if (UseCountTrailingZerosInstructionsPPC64) {
+ warning("UseCountTrailingZerosInstructionsPPC64 specified, but needs at least Power9.");
+ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionsPPC64, false);
+ }
+ }
#endif
// Create and print feature-string.