8157841: aarch64: prefetch ignores cache line size
authorenevill
Wed, 25 May 2016 13:30:07 +0000
changeset 38714 170464570e45
parent 38713 4a16e9ea88a0
child 38715 f8a3a82dfaf0
8157841: aarch64: prefetch ignores cache line size Summary: fix prefetch to take account of cache line size Reviewed-by: aph Contributed-by: stuart.monteith@linaro.org, edward.nevill@linaro.org
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed May 25 13:30:07 2016 +0000
@@ -4743,7 +4743,8 @@
       __ br(Assembler::EQ, cont);
     } else {
       Label retry_load;
-      __ prfm(Address(oop), PSTL1STRM);
+      if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+        __ prfm(Address(oop), PSTL1STRM);
       __ bind(retry_load);
       __ ldaxr(tmp, oop);
       __ cmp(tmp, disp_hdr);
@@ -4798,7 +4799,8 @@
         __ cmp(rscratch1, disp_hdr);
       } else {
         Label retry_load, fail;
-        __ prfm(Address(tmp), PSTL1STRM);
+        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+          __ prfm(Address(tmp), PSTL1STRM);
         __ bind(retry_load);
         __ ldaxr(rscratch1, tmp);
         __ cmp(disp_hdr, rscratch1);
@@ -4892,7 +4894,8 @@
         __ cmp(tmp, box);
       } else {
         Label retry_load;
-        __ prfm(Address(oop), PSTL1STRM);
+        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+          __ prfm(Address(oop), PSTL1STRM);
         __ bind(retry_load);
         __ ldxr(tmp, oop);
         __ cmp(box, tmp);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed May 25 13:30:07 2016 +0000
@@ -1643,7 +1643,8 @@
     return;
   }
   Label retry_load;
-  prfm(Address(counter_addr), PSTL1STRM);
+  if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+    prfm(Address(counter_addr), PSTL1STRM);
   bind(retry_load);
   // flush and load exclusive from the memory location
   ldxrw(tmp, counter_addr);
@@ -2084,7 +2085,8 @@
     membar(AnyAny);
   } else {
     Label retry_load, nope;
-    prfm(Address(addr), PSTL1STRM);
+    if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+      prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
     // flush and load exclusive from the memory location
     // and fail if it is not what we expect
@@ -2120,7 +2122,8 @@
     membar(AnyAny);
   } else {
     Label retry_load, nope;
-    prfm(Address(addr), PSTL1STRM);
+    if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+      prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
     // flush and load exclusive from the memory location
     // and fail if it is not what we expect
@@ -2155,7 +2158,8 @@
   } else {
     BLOCK_COMMENT("cmpxchg {");
     Label retry_load, done;
-    prfm(Address(addr), PSTL1STRM);
+    if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+      prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
     load_exclusive(tmp, addr, size, acquire);
     if (size == xword)
@@ -2194,7 +2198,8 @@
     result = different(prev, incr, addr) ? prev : rscratch2;            \
                                                                         \
   Label retry_load;                                                     \
-  prfm(Address(addr), PSTL1STRM);                                       \
+  if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))         \
+    prfm(Address(addr), PSTL1STRM);                                     \
   bind(retry_load);                                                     \
   LDXR(result, addr);                                                   \
   OP(rscratch1, result, incr);                                          \
@@ -2224,7 +2229,8 @@
     result = different(prev, newv, addr) ? prev : rscratch2;            \
                                                                         \
   Label retry_load;                                                     \
-  prfm(Address(addr), PSTL1STRM);                                       \
+  if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))         \
+    prfm(Address(addr), PSTL1STRM);                                     \
   bind(retry_load);                                                     \
   LDXR(result, addr);                                                   \
   STXR(rscratch1, newv, addr);                                          \
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed May 25 13:30:07 2016 +0000
@@ -545,6 +545,15 @@
     mrs(0b011, 0b0000, 0b0000, 0b111, reg);
   }
 
+  // CTR_EL0:   op1 == 011
+  //            CRn == 0000
+  //            CRm == 0000
+  //            op2 == 001
+  inline void get_ctr_el0(Register reg)
+  {
+    mrs(0b011, 0b0000, 0b0000, 0b001, reg);
+  }
+
   // idiv variant which deals with MINLONG as dividend and -1 as divisor
   int corrected_idivl(Register result, Register ra, Register rb,
                       bool want_remainder, Register tmp = rscratch1);
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Wed May 25 13:30:07 2016 +0000
@@ -105,6 +105,9 @@
     __ get_dczid_el0(rscratch1);
     __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset())));
 
+    __ get_ctr_el0(rscratch1);
+    __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset())));
+
     __ leave();
     __ ret(lr);
 
@@ -124,16 +127,20 @@
 
   getPsrInfo_stub(&_psr_info);
 
+  int dcache_line = VM_Version::dcache_line_size();
+
   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance))
-    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 3*dcache_line);
   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize))
-    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
-  FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256);
-  FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256);
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line);
+  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes))
+    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line);
   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes))
-    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256);
-  if ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768)) {
-    warning("PrefetchCopyIntervalInBytes must be a multiple of 8 and < 32768");
+    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line);
+
+  if (PrefetchCopyIntervalInBytes != -1 &&
+       ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
+    warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
     PrefetchCopyIntervalInBytes &= ~7;
     if (PrefetchCopyIntervalInBytes >= 32768)
       PrefetchCopyIntervalInBytes = 32760;
@@ -170,6 +177,7 @@
   // Enable vendor specific features
   if (_cpu == CPU_CAVIUM && _variant == 0) _features |= CPU_DMB_ATOMICS;
   if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _features |= CPU_A53MAC;
+  if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
   // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
   // we assume the worst and assume we could be on a big little system and have
   // undisclosed A53 cores which we could be swapped to at any stage
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Wed May 25 13:30:07 2016 +0000
@@ -42,6 +42,7 @@
 
   struct PsrInfo {
     uint32_t dczid_el0;
+    uint32_t ctr_el0;
   };
   static PsrInfo _psr_info;
   static void get_processor_features();
@@ -78,6 +79,7 @@
     CPU_SHA2         = (1<<6),
     CPU_CRC32        = (1<<7),
     CPU_LSE          = (1<<8),
+    CPU_STXR_PREFETCH= (1 << 29),
     CPU_A53MAC       = (1 << 30),
     CPU_DMB_ATOMICS  = (1 << 31),
   };
@@ -88,6 +90,7 @@
   static int cpu_variant()                    { return _variant; }
   static int cpu_revision()                   { return _revision; }
   static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
+  static ByteSize ctr_el0_offset()   { return byte_offset_of(PsrInfo, ctr_el0); }
   static bool is_zva_enabled() {
     // Check the DZP bit (bit 4) of dczid_el0 is zero
     // and block size (bit 0~3) is not zero.
@@ -98,6 +101,12 @@
     assert(is_zva_enabled(), "ZVA not available");
     return 4 << (_psr_info.dczid_el0 & 0xf);
   }
+  static int icache_line_size() {
+    return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4;
+  }
+  static int dcache_line_size() {
+    return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4;
+  }
 };
 
 #endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP