8129426: aarch64: add support for PopCount in C2
Summary: Add support for PopCount using SIMD cnt and addv inst
Reviewed-by: kvn, aph
Contributed-by: alexander.alexeev@caviumnetworks.com
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Thu Jun 25 08:52:12 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Thu Jun 25 13:41:29 2015 +0000
@@ -7464,6 +7464,96 @@
ins_pipe(ialu_reg);
%}
+//---------- Population Count Instructions -------------------------------------
+//
+
+instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountI src));
+ effect(TEMP tmp);
+ ins_cost(INSN_COST * 13);
+
+ format %{ "movw $src, $src\n\t"
+ "mov $tmp, $src\t# vector (1D)\n\t"
+ "cnt $tmp, $tmp\t# vector (8B)\n\t"
+ "addv $tmp, $tmp\t# vector (8B)\n\t"
+ "mov $dst, $tmp\t# vector (1D)" %}
+ ins_encode %{
+ __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
+ __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
+ __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountI (LoadI mem)));
+ effect(TEMP tmp);
+ ins_cost(INSN_COST * 13);
+
+ format %{ "ldrs $tmp, $mem\n\t"
+ "cnt $tmp, $tmp\t# vector (8B)\n\t"
+ "addv $tmp, $tmp\t# vector (8B)\n\t"
+ "mov $dst, $tmp\t# vector (1D)" %}
+ ins_encode %{
+ FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
+ loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountL src));
+ effect(TEMP tmp);
+ ins_cost(INSN_COST * 13);
+
+ format %{ "mov $tmp, $src\t# vector (1D)\n\t"
+ "cnt $tmp, $tmp\t# vector (8B)\n\t"
+ "addv $tmp, $tmp\t# vector (8B)\n\t"
+ "mov $dst, $tmp\t# vector (1D)" %}
+ ins_encode %{
+ __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
+ __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
+ predicate(UsePopCountInstruction);
+ match(Set dst (PopCountL (LoadL mem)));
+ effect(TEMP tmp);
+ ins_cost(INSN_COST * 13);
+
+ format %{ "ldrd $tmp, $mem\n\t"
+ "cnt $tmp, $tmp\t# vector (8B)\n\t"
+ "addv $tmp, $tmp\t# vector (8B)\n\t"
+ "mov $dst, $tmp\t# vector (1D)" %}
+ ins_encode %{
+ FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
+ loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
// ============================================================================
// MemBar Instruction
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Jun 25 08:52:12 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Jun 25 13:41:29 2015 +0000
@@ -2055,6 +2055,9 @@
INSN(negr, 1, 0b100000101110);
INSN(notr, 1, 0b100000010110);
INSN(addv, 0, 0b110001101110);
+ INSN(cls, 0, 0b100000010010);
+ INSN(clz, 1, 0b100000010010);
+ INSN(cnt, 0, 0b100000010110);
#undef INSN
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Jun 25 08:52:12 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Jun 25 13:41:29 2015 +0000
@@ -36,6 +36,7 @@
class MacroAssembler: public Assembler {
friend class LIR_Assembler;
+ public:
using Assembler::mov;
using Assembler::movi;
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Jun 25 08:52:12 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Jun 25 13:41:29 2015 +0000
@@ -257,6 +257,10 @@
UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0;
}
+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+ UsePopCountInstruction = true;
+ }
+
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;