8185786: AArch64: disable some address reshapings.
authornjian
Wed, 16 Aug 2017 14:48:41 +0800
changeset 46954 6ad56f307810
parent 46953 39063b484ec2
child 46957 1a5279da09ee
8185786: AArch64: disable some address reshapings. Summary: LoadS/LoadUS's address reshapings are disabled on Arm Cortex-A family for performance. Reviewed-by: adinn, aph Contributed-by: zhongwei.yao@linaro.org
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed Aug 23 10:25:25 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed Aug 16 14:48:41 2017 +0800
@@ -3806,15 +3806,24 @@
       // Any use that can't embed the address computation?
       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
         Node* u = addp->fast_out(i);
-        if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
+        if (!u->is_Mem()) {
+          return;
+        }
+        if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
           return;
         }
+        if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
+          int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
+          if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
+            return;
+          }
+        }
       }
-      
+
       Node* off = addp->in(AddPNode::Offset);
       Node* addr2 = addp2->in(AddPNode::Address);
       Node* base = addp->in(AddPNode::Base);
-      
+
       Node* new_addr = NULL;
       // Check whether the graph already has the new AddP we need
       // before we create one (no GVN available here).
@@ -3828,7 +3837,7 @@
           break;
         }
       }
-      
+
       if (new_addr == NULL) {
         new_addr = new AddPNode(base, addr2, off);
       }
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Wed Aug 23 10:25:25 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Wed Aug 16 14:48:41 2017 +0800
@@ -56,6 +56,17 @@
   static void assert_is_initialized() {
   }
 
+  static bool expensive_load(int ld_size, int scale) {
+    if (cpu_family() == CPU_ARM) {
+      // Half-word load with index shift by 1 (aka scale is 2) has
+      // extra cycle latency, e.g. ldrsh w0, [x1,w2,sxtw #1].
+      if (ld_size == 2 && scale == 2) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   enum Family {
     CPU_ARM       = 'A',
     CPU_BROADCOM  = 'B',