8214239: Missing x86_64.ad patterns for clearing and setting long vector bits
authorbsrbnd
Wed, 13 Nov 2019 11:21:15 +0100
changeset 59051 f0312c7d5b37
parent 59050 7bbaa3c416e7
child 59053 ba6c248cae19
8214239: Missing x86_64.ad patterns for clearing and setting long vector bits Reviewed-by: kvn, vlivanov, jrose, sviswanathan
src/hotspot/cpu/x86/assembler_x86.cpp
src/hotspot/cpu/x86/assembler_x86.hpp
src/hotspot/cpu/x86/x86_64.ad
test/hotspot/jtreg/compiler/c2/TestBitSetAndReset.java
test/micro/org/openjdk/bench/vm/compiler/BitSetAndReset.java
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Mon Oct 21 19:58:16 2019 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Wed Nov 13 11:21:15 2019 +0100
@@ -9163,6 +9163,26 @@
   emit_int8((unsigned char)(0xD0 | encode));
 }
 
+void Assembler::btsq(Address dst, int imm8) {
+  assert(isByte(imm8), "not a byte");
+  InstructionMark im(this);
+  prefixq(dst);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xBA);
+  emit_operand(rbp /* 5 */, dst, 1);
+  emit_int8(imm8);
+}
+
+void Assembler::btrq(Address dst, int imm8) {
+  assert(isByte(imm8), "not a byte");
+  InstructionMark im(this);
+  prefixq(dst);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)0xBA);
+  emit_operand(rsi /* 6 */, dst, 1);
+  emit_int8(imm8);
+}
+
 void Assembler::orq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Mon Oct 21 19:58:16 2019 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Wed Nov 13 11:21:15 2019 +0100
@@ -1592,6 +1592,9 @@
 
 #ifdef _LP64
   void notq(Register dst);
+
+  void btsq(Address dst, int imm8);
+  void btrq(Address dst, int imm8);
 #endif
 
   void orl(Address dst, int32_t imm32);
--- a/src/hotspot/cpu/x86/x86_64.ad	Mon Oct 21 19:58:16 2019 +0200
+++ b/src/hotspot/cpu/x86/x86_64.ad	Wed Nov 13 11:21:15 2019 +0100
@@ -3116,6 +3116,26 @@
   interface(CONST_INTER);
 %}
 
+operand immL_Pow2()
+%{
+  predicate(is_power_of_2_long(n->get_long()));
+  match(ConL);
+
+  op_cost(15);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_NotPow2()
+%{
+  predicate(is_power_of_2_long(~n->get_long()));
+  match(ConL);
+
+  op_cost(15);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long Immediate zero
 operand immL0()
 %{
@@ -9841,6 +9861,23 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
+%{
+  // con should be a pure 64-bit immediate given that not(con) is a power of 2
+  // because AND/OR works well enough for 8/32-bit values.
+  predicate(log2_long(~n->in(3)->in(2)->get_long()) > 30);
+
+  match(Set dst (StoreL dst (AndL (LoadL dst) con)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "btrq    $dst, log2(not($con))\t# long" %}
+  ins_encode %{
+    __ btrq($dst$$Address, log2_long(~$con$$constant));
+  %}
+  ins_pipe(ialu_mem_imm);
+%}
+
 // BMI1 instructions
 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
@@ -10034,6 +10071,23 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
+%{
+  // con should be a pure 64-bit power of 2 immediate
+  // because AND/OR works well enough for 8/32-bit values.
+  predicate(log2_long(n->in(3)->in(2)->get_long()) > 31);
+
+  match(Set dst (StoreL dst (OrL (LoadL dst) con)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "btsq    $dst, log2($con)\t# long" %}
+  ins_encode %{
+    __ btsq($dst$$Address, log2_long($con$$constant));
+  %}
+  ins_pipe(ialu_mem_imm);
+%}
+
 // Xor Instructions
 // Xor Register with Register
 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/c2/TestBitSetAndReset.java	Wed Nov 13 11:21:15 2019 +0100
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8214239
+ * @summary Missing x86_64.ad patterns for clearing and setting long vector bits
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:-TieredCompilation -XX:CompileThreshold=1000
+ *                   -XX:CompileCommand=print,compiler/c2/TestBitSetAndReset.test*
+ *                   -XX:CompileCommand=compileonly,compiler/c2/TestBitSetAndReset.test*
+ *                   -XX:CompileCommand=dontinline,compiler/c2/TestBitSetAndReset.test*
+ *                   compiler.c2.TestBitSetAndReset
+ */
+
+package compiler.c2;
+
+public class TestBitSetAndReset {
+    private static final int COUNT = 10_000;
+
+    private static final long MASK63 = 0x8000_0000_0000_0000L;
+    private static final long MASK32 = 0x0000_0001_0000_0000L;
+    private static final long MASK31 = 0x0000_0000_8000_0000L;
+    private static final long MASK15 = 0x0000_0000_0000_8000L;
+    private static final long MASK00 = 0x0000_0000_0000_0001L;
+
+    private static long andq, orq;
+
+    public static void main(String... args) {
+        boolean success = true;
+
+        for (int i=0; i<COUNT; i++) {
+            andq = MASK63 | MASK31 | MASK15 | MASK00;
+            orq = 0;
+            test63();
+            test32();
+            test31();
+            test15();
+            test00();
+            success &= andq == 0 && orq == (MASK63 | MASK32 | MASK31 | MASK15 | MASK00);
+        }
+        if (!success)
+            throw new AssertionError("Failure while setting or clearing long vector bits!");
+    }
+
+    private static void test63() {
+        andq &= ~MASK63;
+        orq |= MASK63;
+    }
+    private static void test32() {
+        andq &= ~MASK32;
+        orq |= MASK32;
+    }
+    private static void test31() {
+        andq &= ~MASK31;
+        orq |= MASK31;
+    }
+    private static void test15() {
+        andq &= ~MASK15;
+        orq |= MASK15;
+    }
+    private static void test00() {
+        andq &= ~MASK00;
+        orq |= MASK00;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/micro/org/openjdk/bench/vm/compiler/BitSetAndReset.java	Wed Nov 13 11:21:15 2019 +0100
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.vm.compiler;
+
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.*;
+
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+public class BitSetAndReset {
+    private static final int COUNT = 10_000;
+
+    private static final long MASK63 = 0x8000_0000_0000_0000L;
+    private static final long MASK31 = 0x0000_0000_8000_0000L;
+    private static final long MASK15 = 0x0000_0000_0000_8000L;
+    private static final long MASK00 = 0x0000_0000_0000_0001L;
+
+    private long andq, orq;
+    private boolean success = true;
+
+    @TearDown(Level.Iteration)
+    public void finish() {
+        if (!success)
+            throw new AssertionError("Failure while setting or clearing long vector bits!");
+    }
+
+    @Benchmark
+    public void bitSet(Blackhole bh) {
+        for (int i=0; i<COUNT; i++) {
+            andq = MASK63 | MASK31 | MASK15 | MASK00;
+            orq = 0;
+            bh.consume(test63());
+            bh.consume(test31());
+            bh.consume(test15());
+            bh.consume(test00());
+            success &= andq == 0 && orq == (MASK63 | MASK31 | MASK15 | MASK00);
+        }
+    }
+
+    private long test63() {
+        andq &= ~MASK63;
+        orq |= MASK63;
+        return 0L;
+    }
+    private long test31() {
+        andq &= ~MASK31;
+        orq |= MASK31;
+        return 0L;
+    }
+    private long test15() {
+        andq &= ~MASK15;
+        orq |= MASK15;
+        return 0L;
+    }
+    private long test00() {
+        andq &= ~MASK00;
+        orq |= MASK00;
+        return 0L;
+    }
+
+    private static final long MASK62 = 0x4000_0000_0000_0000L;
+    private static final long MASK61 = 0x2000_0000_0000_0000L;
+    private static final long MASK60 = 0x1000_0000_0000_0000L;
+
+    private long orq63, orq62, orq61, orq60;
+
+    @Benchmark
+    public void throughput(Blackhole bh) {
+        for (int i=0; i<COUNT; i++) {
+            orq63 = orq62 = orq61 = orq60 = 0;
+            bh.consume(testTp());
+        }
+    }
+
+    private long testTp() {
+        orq63 |= MASK63;
+        orq62 |= MASK62;
+        orq61 |= MASK61;
+        orq60 |= MASK60;
+        return 0L;
+    }
+}