8144028: Use AArch64 bit-test instructions in C2
authoraph
Wed, 25 Nov 2015 18:13:13 +0000
changeset 34507 636a88905e3b
parent 34506 7af1663b3497
child 34508 838d37db4223
8144028: Use AArch64 bit-test instructions in C2 Reviewed-by: kvn
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
hotspot/test/compiler/codegen/8144028/BitTests.java
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Dec 01 21:16:12 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 25 18:13:13 2015 +0000
@@ -4306,7 +4306,6 @@
     int disp = $mem$$disp;
     if (index == -1) {
       __ prfm(Address(base, disp), PSTL1KEEP);
-      __ nop();
     } else {
       Register index_reg = as_Register(index);
       if (disp == 0) {
@@ -13844,6 +13843,139 @@
   ins_pipe(pipe_cmp_branch);
 %}
 
+// Test bit and Branch
+
+instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # long" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::LT)
+      __ tbnz($op1$$Register, 63, *L);
+    else
+      __ tbz($op1$$Register, 63, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # int" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::LT)
+      __ tbnz($op1$$Register, 31, *L);
+    else
+      __ tbz($op1$$Register, 31, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpL (AndL op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    if (cond == Assembler::EQ)
+      __ tbz($op1$$Register, bit, *L);
+    else
+      __ tbnz($op1$$Register, bit, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpI (AndI op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    if (cond == Assembler::EQ)
+      __ tbz($op1$$Register, bit, *L);
+    else
+      __ tbnz($op1$$Register, bit, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+// Test bits
+
+instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpL (AndL op1 op2) op3));
+  predicate(Assembler::operand_valid_for_logical_immediate
+            (/*is_32*/false, n->in(1)->in(2)->get_long()));
+
+  ins_cost(INSN_COST);
+  format %{ "tst $op1, $op2 # long" %}
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpI (AndI op1 op2) op3));
+  predicate(Assembler::operand_valid_for_logical_immediate
+            (/*is_32*/true, n->in(1)->in(2)->get_int()));
+
+  ins_cost(INSN_COST);
+  format %{ "tst $op1, $op2 # int" %}
+  ins_encode %{
+    __ tstw($op1$$Register, $op2$$constant);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpL (AndL op1 op2) op3));
+
+  ins_cost(INSN_COST);
+  format %{ "tst $op1, $op2 # long" %}
+  ins_encode %{
+    __ tst($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
+  match(Set cr (CmpI (AndI op1 op2) op3));
+
+  ins_cost(INSN_COST);
+  format %{ "tstw $op1, $op2 # int" %}
+  ins_encode %{
+    __ tstw($op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(ialu_reg_reg);
+%}
+
+
 // Conditional Far Branch
 // Conditional Far Branch Unsigned
 // TODO: fixme
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Dec 01 21:16:12 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Nov 25 18:13:13 2015 +0000
@@ -215,8 +215,11 @@
   inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); }
   inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); }
 
-  inline void tstw(Register Rd, unsigned imm) { andsw(zr, Rd, imm); }
-  inline void tst(Register Rd, unsigned imm) { ands(zr, Rd, imm); }
+  inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); }
+  inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); }
+
+  inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); }
+  inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); }
 
   inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
     bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/codegen/8144028/BitTests.java	Wed Nov 25 18:13:13 2015 +0000
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2015, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8144028
+ * @summary Use AArch64 bit-test instructions in C2
+ * @modules java.base
+ * @run main/othervm -Xbatch -XX:CompileCommand=dontinline,BitTests::* -XX:-TieredCompilation BitTests
+ * @run main/othervm -Xbatch -XX:+TieredCompilation -XX:TieredStopAtLevel=1 BitTests
+ * @run main/othervm -Xbatch -XX:+TieredCompilation BitTests
+ *
+ */
+
+// Try to ensure that the bit test instructions TBZ/TBNZ, TST/TSTW
+// don't generate incorrect code.  We can't guarantee that C2 will use
+// bit test instructions for this test and it's not a bug if it
+// doesn't.  However, these test cases are ideal candidates for each
+// of the instruction forms.
+public class BitTests {
+
+    private final XorShift r = new XorShift();
+
+    private final long increment(long ctr) {
+        return ctr + 1;
+    }
+
+    private final int increment(int ctr) {
+        return ctr + 1;
+    }
+
+    private final long testIntSignedBranch(long counter) {
+        if ((int)r.nextLong() < 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testLongSignedBranch(long counter) {
+        if (r.nextLong() < 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testIntBitBranch(long counter) {
+        if (((int)r.nextLong() & (1 << 27)) != 0) {
+            counter = increment(counter);
+        }
+        if (((int)r.nextLong() & (1 << 27)) != 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testLongBitBranch(long counter) {
+        if ((r.nextLong() & (1l << 50)) != 0) {
+            counter = increment(counter);
+        }
+        if ((r.nextLong() & (1l << 50)) != 0) {
+            counter = increment(counter);
+        }
+        return counter;
+    }
+
+    private final long testLongMaskBranch(long counter) {
+        if (((r.nextLong() & 0x0800000000l) != 0)) {
+            counter++;
+        }
+       return counter;
+    }
+
+    private final long testIntMaskBranch(long counter) {
+        if ((((int)r.nextLong() & 0x08) != 0)) {
+            counter++;
+        }
+        return counter;
+    }
+
+    private final long testLongMaskBranch(long counter, long mask) {
+        if (((r.nextLong() & mask) != 0)) {
+            counter++;
+        }
+       return counter;
+    }
+
+    private final long testIntMaskBranch(long counter, int mask) {
+        if ((((int)r.nextLong() & mask) != 0)) {
+            counter++;
+        }
+        return counter;
+    }
+
+    private final long step(long counter) {
+        counter = testIntSignedBranch(counter);
+        counter = testLongSignedBranch(counter);
+        counter = testIntBitBranch(counter);
+        counter = testLongBitBranch(counter);
+        counter = testIntMaskBranch(counter);
+        counter = testLongMaskBranch(counter);
+        counter = testIntMaskBranch(counter, 0x8000);
+        counter = testLongMaskBranch(counter, 0x800000000l);
+        return counter;
+    }
+
+
+    private final long finalBits = 3;
+
+    private long bits = 7;
+
+    public static void main(String[] args) {
+        BitTests t = new BitTests();
+
+        long counter = 0;
+        for (int i = 0; i < 10000000; i++) {
+            counter = t.step((int) counter);
+        }
+        if (counter != 50001495) {
+            System.err.println("FAILED: counter = " + counter + ", should be 50001495.");
+            System.exit(97);
+        }
+        System.out.println("PASSED");
+    }
+
+}
+
+// Marsaglia's xor-shift generator, used here because it is
+// reproducible across all Java implementations.  It is also very
+// fast.
+class XorShift {
+
+    private long y;
+
+    XorShift() {
+        y = 2463534242l;
+    }
+
+    public long nextLong() {
+        y ^= (y << 13);
+        y ^= (y >>> 17);
+        return (y ^= (y << 5));
+
+    }
+}