8210413: AArch64: Optimize div/rem by constant in C1
authorfyang
Wed, 26 Sep 2018 06:26:54 +0800
changeset 51875 e1368526699d
parent 51874 a0426bc28519
child 51876 d2b381ea8477
8210413: AArch64: Optimize div/rem by constant in C1 Summary: Remove div-by-zero check for non-zero divisor and generate cheap instructions if divisor is power-of-2 Reviewed-by: aph Contributed-by: pengfei.li@arm.com
src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Tue Sep 25 13:57:24 2018 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Wed Sep 26 06:26:54 2018 +0800
@@ -1025,37 +1025,17 @@
   return exact_log2(elem_size);
 }
 
-void LIR_Assembler::arithmetic_idiv(LIR_Op3* op, bool is_irem) {
-  Register Rdividend = op->in_opr1()->as_register();
-  Register Rdivisor  = op->in_opr2()->as_register();
-  Register Rscratch  = op->in_opr3()->as_register();
-  Register Rresult   = op->result_opr()->as_register();
-  int divisor = -1;
-
-  /*
-  TODO: For some reason, using the Rscratch that gets passed in is
-  not possible because the register allocator does not see the tmp reg
-  as used, and assignes it the same register as Rdividend. We use rscratch1
-   instead.
-
-  assert(Rdividend != Rscratch, "");
-  assert(Rdivisor  != Rscratch, "");
-  */
-
-  if (Rdivisor == noreg && is_power_of_2(divisor)) {
-    // convert division by a power of two into some shifts and logical operations
-  }
-
-  __ corrected_idivl(Rresult, Rdividend, Rdivisor, is_irem, rscratch1);
-}
 
 void LIR_Assembler::emit_op3(LIR_Op3* op) {
   switch (op->code()) {
   case lir_idiv:
-    arithmetic_idiv(op, false);
-    break;
   case lir_irem:
-    arithmetic_idiv(op, true);
+    arithmetic_idiv(op->code(),
+                    op->in_opr1(),
+                    op->in_opr2(),
+                    op->in_opr3(),
+                    op->result_opr(),
+                    op->info());
     break;
   case lir_fmad:
     __ fmaddd(op->result_opr()->as_double_reg(),
@@ -1752,16 +1732,43 @@
       }
 
     } else if (right->is_constant()) {
-      jlong c = right->as_constant_ptr()->as_jlong_bits();
+      jlong c = right->as_constant_ptr()->as_jlong();
       Register dreg = as_reg(dest);
-      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
-      if (c == 0 && dreg == lreg_lo) {
-        COMMENT("effective nop elided");
-        return;
-      }
       switch (code) {
-        case lir_add: __ add(dreg, lreg_lo, c); break;
-        case lir_sub: __ sub(dreg, lreg_lo, c); break;
+        case lir_add:
+        case lir_sub:
+          if (c == 0 && dreg == lreg_lo) {
+            COMMENT("effective nop elided");
+            return;
+          }
+          code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
+          break;
+        case lir_div:
+          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+          if (c == 1) {
+            // move lreg_lo to dreg if divisor is 1
+            __ mov(dreg, lreg_lo);
+          } else {
+            unsigned int shift = exact_log2_long(c);
+            // use rscratch1 as intermediate result register
+            __ asr(rscratch1, lreg_lo, 63);
+            __ add(rscratch1, lreg_lo, rscratch1, Assembler::LSR, 64 - shift);
+            __ asr(dreg, rscratch1, shift);
+          }
+          break;
+        case lir_rem:
+          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+          if (c == 1) {
+            // move 0 to dreg if divisor is 1
+            __ mov(dreg, zr);
+          } else {
+            // use rscratch1 as intermediate result register
+            __ negs(rscratch1, lreg_lo);
+            __ andr(dreg, lreg_lo, c - 1);
+            __ andr(rscratch1, rscratch1, c - 1);
+            __ csneg(dreg, dreg, rscratch1, Assembler::MI);
+          }
+          break;
         default:
           ShouldNotReachHere();
       }
@@ -1862,7 +1869,51 @@
 
 
 
-void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); }
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
+
+  // opcode check
+  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
+  bool is_irem = (code == lir_irem);
+
+  // operand check
+  assert(left->is_single_cpu(),   "left must be register");
+  assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
+  assert(result->is_single_cpu(), "result must be register");
+  Register lreg = left->as_register();
+  Register dreg = result->as_register();
+
+  // power-of-2 constant check and codegen
+  if (right->is_constant()) {
+    int c = right->as_constant_ptr()->as_jint();
+    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+    if (is_irem) {
+      if (c == 1) {
+        // move 0 to dreg if divisor is 1
+        __ movw(dreg, zr);
+      } else {
+        // use rscratch1 as intermediate result register
+        __ negsw(rscratch1, lreg);
+        __ andw(dreg, lreg, c - 1);
+        __ andw(rscratch1, rscratch1, c - 1);
+        __ csnegw(dreg, dreg, rscratch1, Assembler::MI);
+      }
+    } else {
+      if (c == 1) {
+        // move lreg to dreg if divisor is 1
+        __ movw(dreg, lreg);
+      } else {
+        unsigned int shift = exact_log2(c);
+        // use rscratch1 as intermediate result register
+        __ asrw(rscratch1, lreg, 31);
+        __ addw(rscratch1, lreg, rscratch1, Assembler::LSR, 32 - shift);
+        __ asrw(dreg, rscratch1, shift);
+      }
+    }
+  } else {
+    Register rreg = right->as_register();
+    __ corrected_idivl(dreg, lreg, rreg, is_irem, rscratch1);
+  }
+}
 
 
 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp	Tue Sep 25 13:57:24 2018 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp	Wed Sep 26 06:26:54 2018 +0800
@@ -75,8 +75,6 @@
     _deopt_handler_size = 7 * NativeInstruction::instruction_size
   };
 
-  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
-
 public:
 
   void store_parameter(Register r, int offset_from_esp_in_words);
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Tue Sep 25 13:57:24 2018 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Wed Sep 26 06:26:54 2018 +0800
@@ -440,17 +440,26 @@
 
   if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
 
-    // the check for division by zero destroys the right operand
-    right.set_destroys_register();
-
-    // check for division by zero (destroys registers of right operand!)
-    CodeEmitInfo* info = state_for(x);
-
     left.load_item();
-    right.load_item();
-
-    __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-    __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jlong c = right.get_jlong_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) need_zero_check = false;
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2_long(c)) {
+        right.dont_load_item();
+      } else {
+        right.load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+    }
 
     rlock_result(x);
     switch (x->op()) {
@@ -506,19 +515,32 @@
   // do not need to load right, as we can handle stack and constants
   if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
 
-    right_arg->load_item();
     rlock_result(x);
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jint c = right.get_jint_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) need_zero_check = false;
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2(c)) {
+        right_arg->dont_load_item();
+      } else {
+        right_arg->load_item();
+      }
+    } else {
+      right_arg->load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+    }
 
-    CodeEmitInfo* info = state_for(x);
-    LIR_Opr tmp = new_register(T_INT);
-    __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
-    __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
-    info = state_for(x);
-
+    LIR_Opr ill = LIR_OprFact::illegalOpr;
     if (x->op() == Bytecodes::_irem) {
-      __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
     } else if (x->op() == Bytecodes::_idiv) {
-      __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
     }
 
   } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {