8157841: aarch64: prefetch ignores cache line size
Summary: fix prefetch to take account of cache line size
Reviewed-by: aph
Contributed-by: stuart.monteith@linaro.org, edward.nevill@linaro.org
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed May 25 13:30:07 2016 +0000
@@ -4743,7 +4743,8 @@
__ br(Assembler::EQ, cont);
} else {
Label retry_load;
- __ prfm(Address(oop), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ __ prfm(Address(oop), PSTL1STRM);
__ bind(retry_load);
__ ldaxr(tmp, oop);
__ cmp(tmp, disp_hdr);
@@ -4798,7 +4799,8 @@
__ cmp(rscratch1, disp_hdr);
} else {
Label retry_load, fail;
- __ prfm(Address(tmp), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ __ prfm(Address(tmp), PSTL1STRM);
__ bind(retry_load);
__ ldaxr(rscratch1, tmp);
__ cmp(disp_hdr, rscratch1);
@@ -4892,7 +4894,8 @@
__ cmp(tmp, box);
} else {
Label retry_load;
- __ prfm(Address(oop), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ __ prfm(Address(oop), PSTL1STRM);
__ bind(retry_load);
__ ldxr(tmp, oop);
__ cmp(box, tmp);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed May 25 13:30:07 2016 +0000
@@ -1643,7 +1643,8 @@
return;
}
Label retry_load;
- prfm(Address(counter_addr), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ prfm(Address(counter_addr), PSTL1STRM);
bind(retry_load);
// flush and load exclusive from the memory location
ldxrw(tmp, counter_addr);
@@ -2084,7 +2085,8 @@
membar(AnyAny);
} else {
Label retry_load, nope;
- prfm(Address(addr), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ prfm(Address(addr), PSTL1STRM);
bind(retry_load);
// flush and load exclusive from the memory location
// and fail if it is not what we expect
@@ -2120,7 +2122,8 @@
membar(AnyAny);
} else {
Label retry_load, nope;
- prfm(Address(addr), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ prfm(Address(addr), PSTL1STRM);
bind(retry_load);
// flush and load exclusive from the memory location
// and fail if it is not what we expect
@@ -2155,7 +2158,8 @@
} else {
BLOCK_COMMENT("cmpxchg {");
Label retry_load, done;
- prfm(Address(addr), PSTL1STRM);
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
+ prfm(Address(addr), PSTL1STRM);
bind(retry_load);
load_exclusive(tmp, addr, size, acquire);
if (size == xword)
@@ -2194,7 +2198,8 @@
result = different(prev, incr, addr) ? prev : rscratch2; \
\
Label retry_load; \
- prfm(Address(addr), PSTL1STRM); \
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) \
+ prfm(Address(addr), PSTL1STRM); \
bind(retry_load); \
LDXR(result, addr); \
OP(rscratch1, result, incr); \
@@ -2224,7 +2229,8 @@
result = different(prev, newv, addr) ? prev : rscratch2; \
\
Label retry_load; \
- prfm(Address(addr), PSTL1STRM); \
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) \
+ prfm(Address(addr), PSTL1STRM); \
bind(retry_load); \
LDXR(result, addr); \
STXR(rscratch1, newv, addr); \
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed May 25 13:30:07 2016 +0000
@@ -545,6 +545,15 @@
mrs(0b011, 0b0000, 0b0000, 0b111, reg);
}
+ // CTR_EL0: op1 == 011
+ // CRn == 0000
+ // CRm == 0000
+ // op2 == 001
+ inline void get_ctr_el0(Register reg)
+ {
+ mrs(0b011, 0b0000, 0b0000, 0b001, reg);
+ }
+
// idiv variant which deals with MINLONG as dividend and -1 as divisor
int corrected_idivl(Register result, Register ra, Register rb,
bool want_remainder, Register tmp = rscratch1);
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Wed May 25 13:30:07 2016 +0000
@@ -105,6 +105,9 @@
__ get_dczid_el0(rscratch1);
__ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset())));
+ __ get_ctr_el0(rscratch1);
+ __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset())));
+
__ leave();
__ ret(lr);
@@ -124,16 +127,20 @@
getPsrInfo_stub(&_psr_info);
+ int dcache_line = VM_Version::dcache_line_size();
+
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance))
- FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 3*dcache_line);
if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize))
- FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
- FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256);
- FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256);
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line);
+ if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes))
+ FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line);
if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes))
- FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256);
- if ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768)) {
- warning("PrefetchCopyIntervalInBytes must be a multiple of 8 and < 32768");
+ FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line);
+
+ if (PrefetchCopyIntervalInBytes != -1 &&
+ ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
+ warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
PrefetchCopyIntervalInBytes &= ~7;
if (PrefetchCopyIntervalInBytes >= 32768)
PrefetchCopyIntervalInBytes = 32760;
@@ -170,6 +177,7 @@
// Enable vendor specific features
if (_cpu == CPU_CAVIUM && _variant == 0) _features |= CPU_DMB_ATOMICS;
if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _features |= CPU_A53MAC;
+ if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
// If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
// we assume the worst and assume we could be on a big little system and have
// undisclosed A53 cores which we could be swapped to at any stage
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp Wed May 25 15:05:26 2016 +0000
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp Wed May 25 13:30:07 2016 +0000
@@ -42,6 +42,7 @@
struct PsrInfo {
uint32_t dczid_el0;
+ uint32_t ctr_el0;
};
static PsrInfo _psr_info;
static void get_processor_features();
@@ -78,6 +79,7 @@
CPU_SHA2 = (1<<6),
CPU_CRC32 = (1<<7),
CPU_LSE = (1<<8),
+ CPU_STXR_PREFETCH= (1 << 29),
CPU_A53MAC = (1 << 30),
CPU_DMB_ATOMICS = (1 << 31),
};
@@ -88,6 +90,7 @@
static int cpu_variant() { return _variant; }
static int cpu_revision() { return _revision; }
static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
+ static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); }
static bool is_zva_enabled() {
// Check the DZP bit (bit 4) of dczid_el0 is zero
// and block size (bit 0~3) is not zero.
@@ -98,6 +101,12 @@
assert(is_zva_enabled(), "ZVA not available");
return 4 << (_psr_info.dczid_el0 & 0xf);
}
+ static int icache_line_size() {
+ return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4;
+ }
+ static int dcache_line_size() {
+ return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4;
+ }
};
#endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP