8016470: AllocatePrefetchDistance is not changed by command line
Summary: Values from command line given preference for related flags, over default values based on platform
Reviewed-by: thartmann, kvn
--- a/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp Mon Jun 19 01:23:58 2017 -0700
@@ -256,7 +256,9 @@
}
}
- AllocatePrefetchDistance = 128;
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128);
+ }
#ifdef COMPILER2
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
--- a/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp Mon Jun 19 01:23:58 2017 -0700
@@ -201,7 +201,9 @@
}
}
- AllocatePrefetchDistance = 128;
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128);
+ }
#ifdef COMPILER2
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Jun 19 01:23:58 2017 -0700
@@ -37,23 +37,40 @@
assert(_features != 0, "System pre-initialization is not complete.");
guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");
- PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
- PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
- PrefetchFieldsAhead = prefetch_fields_ahead();
+ if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
+ FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, prefetch_copy_interval_in_bytes());
+ }
+ if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
+ FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, prefetch_scan_interval_in_bytes());
+ }
+ if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
+ FLAG_SET_DEFAULT(PrefetchFieldsAhead, prefetch_fields_ahead());
+ }
// Allocation prefetch settings
intx cache_line_size = prefetch_data_size();
- if( cache_line_size > AllocatePrefetchStepSize )
- AllocatePrefetchStepSize = cache_line_size;
+ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
+ (cache_line_size > AllocatePrefetchStepSize)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ }
- AllocatePrefetchDistance = allocate_prefetch_distance();
- AllocatePrefetchStyle = allocate_prefetch_style();
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 512);
+ }
- if (!has_blk_init() || cache_line_size <= 0) {
- if (AllocatePrefetchInstr == 1) {
+ if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
+ assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
+ if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+ warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
+ }
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+ }
+
+ if ((AllocatePrefetchInstr == 1) && (!has_blk_init() || cache_line_size <= 0)) {
+ if (!FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable");
- FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
}
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
}
UseSSE = 0; // Only on x86 and x64
@@ -121,9 +138,10 @@
}
}
- if (AllocatePrefetchInstr == 1) {
- // Use allocation prefetch style 3 because BIS instructions
- // require aligned memory addresses.
+ if ((AllocatePrefetchInstr == 1) && (AllocatePrefetchStyle != 3)) {
+ if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+ warning("AllocatePrefetchStyle set to 3 because BIS instructions require aligned memory addresses");
+ }
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
}
#endif /* COMPILER2 */
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Jun 19 01:23:58 2017 -0700
@@ -180,30 +180,13 @@
// Prefetch
static intx prefetch_copy_interval_in_bytes() {
- intx interval = PrefetchCopyIntervalInBytes;
- return interval >= 0 ? interval : (has_v9() ? 512 : 0);
+ return (has_v9() ? 512 : 0);
}
static intx prefetch_scan_interval_in_bytes() {
- intx interval = PrefetchScanIntervalInBytes;
- return interval >= 0 ? interval : (has_v9() ? 512 : 0);
+ return (has_v9() ? 512 : 0);
}
static intx prefetch_fields_ahead() {
- intx count = PrefetchFieldsAhead;
- return count >= 0 ? count : (is_ultra3() ? 1 : 0);
- }
-
- static intx allocate_prefetch_distance() {
- // This method should be called before allocate_prefetch_style().
- intx count = AllocatePrefetchDistance;
- if (count < 0) { // default is not defined ?
- count = 512;
- }
- return count;
- }
- static intx allocate_prefetch_style() {
- assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
- // Return 0 if AllocatePrefetchDistance was not defined.
- return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
+ return (is_ultra3() ? 1 : 0);
}
// Assembler testing
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Mon Jun 19 01:23:58 2017 -0700
@@ -1103,18 +1103,18 @@
if ( cpu_family() == 0x15 ) {
// On family 15h processors default is no sw prefetch
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
- AllocatePrefetchStyle = 0;
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
}
// Also, if some other prefetch style is specified, default instruction type is PREFETCHW
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
- AllocatePrefetchInstr = 3;
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
// On family 15h processors use XMM and UnalignedLoadStores for Array Copy
if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
- UseXMMForArrayCopy = true;
+ FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
}
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
- UseUnalignedLoadStores = true;
+ FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
}
@@ -1195,7 +1195,7 @@
}
}
if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
- AllocatePrefetchInstr = 3;
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
}
@@ -1291,45 +1291,68 @@
}
#endif // COMPILER2
- if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
- if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
+ if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
+ if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
+ } else if (!supports_sse() && supports_3dnow_prefetch()) {
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
+ }
+ }
// Allocation prefetch settings
intx cache_line_size = prefetch_data_size();
- if( cache_line_size > AllocatePrefetchStepSize )
- AllocatePrefetchStepSize = cache_line_size;
+ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
+ (cache_line_size > AllocatePrefetchStepSize)) {
+ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+ }
- AllocatePrefetchDistance = allocate_prefetch_distance();
- AllocatePrefetchStyle = allocate_prefetch_style();
+ if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
+ assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
+ if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+ warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
+ }
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+ }
+
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+ bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
+ }
if (is_intel() && cpu_family() == 6 && supports_sse3()) {
- if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
-#ifdef _LP64
- AllocatePrefetchDistance = 384;
-#else
- AllocatePrefetchDistance = 320;
-#endif
- }
- if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
- AllocatePrefetchDistance = 192;
- if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
- FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
- }
+ if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
+ supports_sse4_2() && supports_ht()) { // Nehalem based cpus
+ FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
}
#ifdef COMPILER2
- if (supports_sse4_2()) {
- if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
- FLAG_SET_DEFAULT(UseFPUForSpilling, true);
- }
+ if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
}
#endif
}
#ifdef _LP64
// Prefetch settings
- PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
- PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
- PrefetchFieldsAhead = prefetch_fields_ahead();
+
+ // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
+ // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
+ // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
+ // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
+
+ // gc copy/scan is disabled if prefetchw isn't supported, because
+ // Prefetch::write emits an inlined prefetchw on Linux.
+ // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
+ // The used prefetcht0 instruction works for both amd64 and em64t.
+
+ if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
+ FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
+ }
+ if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
+ FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
+ }
+ if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
+ FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
+ }
#endif
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Mon Jun 19 01:23:58 2017 -0700
@@ -782,9 +782,7 @@
static bool supports_compare_and_exchange() { return true; }
- static intx allocate_prefetch_distance() {
- // This method should be called before allocate_prefetch_style().
- //
+ static intx allocate_prefetch_distance(bool use_watermark_prefetch) {
// Hardware prefetching (distance/size in bytes):
// Pentium 3 - 64 / 32
// Pentium 4 - 256 / 128
@@ -800,58 +798,34 @@
// Core - 256 / prefetchnta
// It will be used only when AllocatePrefetchStyle > 0
- intx count = AllocatePrefetchDistance;
- if (count < 0) { // default ?
- if (is_amd()) { // AMD
- if (supports_sse2())
- count = 256; // Opteron
- else
- count = 128; // Athlon
- } else { // Intel
- if (supports_sse2())
- if (cpu_family() == 6) {
- count = 256; // Pentium M, Core, Core2
- } else {
- count = 512; // Pentium 4
- }
- else
- count = 128; // Pentium 3 (and all other old CPUs)
+ if (is_amd()) { // AMD
+ if (supports_sse2()) {
+ return 256; // Opteron
+ } else {
+ return 128; // Athlon
+ }
+ } else { // Intel
+ if (supports_sse3() && cpu_family() == 6) {
+ if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
+ return 192;
+ } else if (use_watermark_prefetch) { // watermark prefetching on Core
+#ifdef _LP64
+ return 384;
+#else
+ return 320;
+#endif
+ }
+ }
+ if (supports_sse2()) {
+ if (cpu_family() == 6) {
+ return 256; // Pentium M, Core, Core2
+ } else {
+ return 512; // Pentium 4
+ }
+ } else {
+ return 128; // Pentium 3 (and all other old CPUs)
}
}
- return count;
- }
- static intx allocate_prefetch_style() {
- assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
- // Return 0 if AllocatePrefetchDistance was not defined.
- return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
- }
-
- // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
- // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
- // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
- // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
-
- // gc copy/scan is disabled if prefetchw isn't supported, because
- // Prefetch::write emits an inlined prefetchw on Linux.
- // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
- // The used prefetcht0 instruction works for both amd64 and em64t.
- static intx prefetch_copy_interval_in_bytes() {
- intx interval = PrefetchCopyIntervalInBytes;
- return interval >= 0 ? interval : 576;
- }
- static intx prefetch_scan_interval_in_bytes() {
- intx interval = PrefetchScanIntervalInBytes;
- return interval >= 0 ? interval : 576;
- }
- static intx prefetch_fields_ahead() {
- intx count = PrefetchFieldsAhead;
- return count >= 0 ? count : 1;
- }
- static uint32_t get_xsave_header_lower_segment() {
- return _cpuid_info.xem_xcr0_eax.value;
- }
- static uint32_t get_xsave_header_upper_segment() {
- return _cpuid_info.xem_xcr0_edx;
}
// SSE2 and later processors implement a 'pause' instruction