# HG changeset patch # User rraghavan # Date 1497860638 25200 # Node ID e1b926a0b23f3d85e8a5c7907ce1811c90343923 # Parent 4dba7f5446f3114d9985a44f7e53f4621acafe80 8016470: AllocatePrefetchDistance is not changed by command line Summary: Values from command line given preference for related flags, over default values based on platform Reviewed-by: thartmann, kvn diff -r 4dba7f5446f3 -r e1b926a0b23f hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp --- a/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp Fri Jun 16 12:06:31 2017 -0700 +++ b/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp Mon Jun 19 01:23:58 2017 -0700 @@ -256,7 +256,9 @@ } } - AllocatePrefetchDistance = 128; + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128); + } #ifdef COMPILER2 FLAG_SET_DEFAULT(UseFPUForSpilling, true); diff -r 4dba7f5446f3 -r e1b926a0b23f hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp --- a/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp Fri Jun 16 12:06:31 2017 -0700 +++ b/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp Mon Jun 19 01:23:58 2017 -0700 @@ -201,7 +201,9 @@ } } - AllocatePrefetchDistance = 128; + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128); + } #ifdef COMPILER2 FLAG_SET_DEFAULT(UseFPUForSpilling, true); diff -r 4dba7f5446f3 -r e1b926a0b23f hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Jun 16 12:06:31 2017 -0700 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Jun 19 01:23:58 2017 -0700 @@ -37,23 +37,40 @@ assert(_features != 0, "System pre-initialization is not complete."); guarantee(VM_Version::has_v9(), "only SPARC v9 is supported"); - PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); - PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); - PrefetchFieldsAhead = prefetch_fields_ahead(); + if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { + FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, prefetch_copy_interval_in_bytes()); + } + if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { + FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, prefetch_scan_interval_in_bytes()); + } + if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) { + FLAG_SET_DEFAULT(PrefetchFieldsAhead, prefetch_fields_ahead()); + } // Allocation prefetch settings intx cache_line_size = prefetch_data_size(); - if( cache_line_size > AllocatePrefetchStepSize ) - AllocatePrefetchStepSize = cache_line_size; + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && + (cache_line_size > AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } - AllocatePrefetchDistance = allocate_prefetch_distance(); - AllocatePrefetchStyle = allocate_prefetch_style(); + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 512); + } - if (!has_blk_init() || cache_line_size <= 0) { - if (AllocatePrefetchInstr == 1) { + if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { + assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); + if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); + } + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); + } + + if ((AllocatePrefetchInstr == 1) && (!has_blk_init() || cache_line_size <= 0)) { + if (!FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable"); - FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); } + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); } UseSSE = 0; // Only on x86 and x64 @@ -121,9 +138,10 @@ } } - if (AllocatePrefetchInstr == 1) { - // Use allocation prefetch style 3 because BIS instructions - // require aligned memory addresses. + if ((AllocatePrefetchInstr == 1) && (AllocatePrefetchStyle != 3)) { + if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + warning("AllocatePrefetchStyle set to 3 because BIS instructions require aligned memory addresses"); + } FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); } #endif /* COMPILER2 */ diff -r 4dba7f5446f3 -r e1b926a0b23f hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Jun 16 12:06:31 2017 -0700 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Jun 19 01:23:58 2017 -0700 @@ -180,30 +180,13 @@ // Prefetch static intx prefetch_copy_interval_in_bytes() { - intx interval = PrefetchCopyIntervalInBytes; - return interval >= 0 ? interval : (has_v9() ? 512 : 0); + return (has_v9() ? 512 : 0); } static intx prefetch_scan_interval_in_bytes() { - intx interval = PrefetchScanIntervalInBytes; - return interval >= 0 ? interval : (has_v9() ? 512 : 0); + return (has_v9() ? 512 : 0); } static intx prefetch_fields_ahead() { - intx count = PrefetchFieldsAhead; - return count >= 0 ? count : (is_ultra3() ? 1 : 0); - } - - static intx allocate_prefetch_distance() { - // This method should be called before allocate_prefetch_style(). - intx count = AllocatePrefetchDistance; - if (count < 0) { // default is not defined ? - count = 512; - } - return count; - } - static intx allocate_prefetch_style() { - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - // Return 0 if AllocatePrefetchDistance was not defined. - return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; + return (is_ultra3() ? 1 : 0); } // Assembler testing diff -r 4dba7f5446f3 -r e1b926a0b23f hotspot/src/cpu/x86/vm/vm_version_x86.cpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Fri Jun 16 12:06:31 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Mon Jun 19 01:23:58 2017 -0700 @@ -1103,18 +1103,18 @@ if ( cpu_family() == 0x15 ) { // On family 15h processors default is no sw prefetch if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { - AllocatePrefetchStyle = 0; + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); } // Also, if some other prefetch style is specified, default instruction type is PREFETCHW if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { - AllocatePrefetchInstr = 3; + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); } // On family 15h processors use XMM and UnalignedLoadStores for Array Copy if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { - UseXMMForArrayCopy = true; + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); } if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { - UseUnalignedLoadStores = true; + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); } } @@ -1195,7 +1195,7 @@ } } if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { - AllocatePrefetchInstr = 3; + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); } } @@ -1291,45 +1291,68 @@ } #endif // COMPILER2 - if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; - if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; + if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { + if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); + } else if (!supports_sse() && supports_3dnow_prefetch()) { + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); + } + } // Allocation prefetch settings intx cache_line_size = prefetch_data_size(); - if( cache_line_size > AllocatePrefetchStepSize ) - AllocatePrefetchStepSize = cache_line_size; + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && + (cache_line_size > AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } - AllocatePrefetchDistance = allocate_prefetch_distance(); - AllocatePrefetchStyle = allocate_prefetch_style(); + if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { + assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); + if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); + } + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); + FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); + } if (is_intel() && cpu_family() == 6 && supports_sse3()) { - if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core -#ifdef _LP64 - AllocatePrefetchDistance = 384; -#else - AllocatePrefetchDistance = 320; -#endif - } - if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus - AllocatePrefetchDistance = 192; - if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { - FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); - } + if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && + supports_sse4_2() && supports_ht()) { // Nehalem based cpus + FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); } #ifdef COMPILER2 - if (supports_sse4_2()) { - if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { - FLAG_SET_DEFAULT(UseFPUForSpilling, true); - } + if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { + FLAG_SET_DEFAULT(UseFPUForSpilling, true); } #endif } #ifdef _LP64 // Prefetch settings - PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); - PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); - PrefetchFieldsAhead = prefetch_fields_ahead(); + + // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from + // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. + // Tested intervals from 128 to 2048 in increments of 64 == one cache line. + // 256 bytes (4 dcache lines) was the nearest runner-up to 576. + + // gc copy/scan is disabled if prefetchw isn't supported, because + // Prefetch::write emits an inlined prefetchw on Linux. + // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. + // The used prefetcht0 instruction works for both amd64 and em64t. + + if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { + FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); + } + if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { + FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); + } + if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) { + FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1); + } #endif if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && diff -r 4dba7f5446f3 -r e1b926a0b23f hotspot/src/cpu/x86/vm/vm_version_x86.hpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Fri Jun 16 12:06:31 2017 -0700 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp Mon Jun 19 01:23:58 2017 -0700 @@ -782,9 +782,7 @@ static bool supports_compare_and_exchange() { return true; } - static intx allocate_prefetch_distance() { - // This method should be called before allocate_prefetch_style(). - // + static intx allocate_prefetch_distance(bool use_watermark_prefetch) { // Hardware prefetching (distance/size in bytes): // Pentium 3 - 64 / 32 // Pentium 4 - 256 / 128 @@ -800,58 +798,34 @@ // Core - 256 / prefetchnta // It will be used only when AllocatePrefetchStyle > 0 - intx count = AllocatePrefetchDistance; - if (count < 0) { // default ? - if (is_amd()) { // AMD - if (supports_sse2()) - count = 256; // Opteron - else - count = 128; // Athlon - } else { // Intel - if (supports_sse2()) - if (cpu_family() == 6) { - count = 256; // Pentium M, Core, Core2 - } else { - count = 512; // Pentium 4 - } - else - count = 128; // Pentium 3 (and all other old CPUs) + if (is_amd()) { // AMD + if (supports_sse2()) { + return 256; // Opteron + } else { + return 128; // Athlon + } + } else { // Intel + if (supports_sse3() && cpu_family() == 6) { + if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus + return 192; + } else if (use_watermark_prefetch) { // watermark prefetching on Core +#ifdef _LP64 + return 384; +#else + return 320; +#endif + } + } + if (supports_sse2()) { + if (cpu_family() == 6) { + return 256; // Pentium M, Core, Core2 + } else { + return 512; // Pentium 4 + } + } else { + return 128; // Pentium 3 (and all other old CPUs) } } - return count; - } - static intx allocate_prefetch_style() { - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - // Return 0 if AllocatePrefetchDistance was not defined. - return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; - } - - // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from - // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. - // Tested intervals from 128 to 2048 in increments of 64 == one cache line. - // 256 bytes (4 dcache lines) was the nearest runner-up to 576. - - // gc copy/scan is disabled if prefetchw isn't supported, because - // Prefetch::write emits an inlined prefetchw on Linux. - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. - // The used prefetcht0 instruction works for both amd64 and em64t. - static intx prefetch_copy_interval_in_bytes() { - intx interval = PrefetchCopyIntervalInBytes; - return interval >= 0 ? interval : 576; - } - static intx prefetch_scan_interval_in_bytes() { - intx interval = PrefetchScanIntervalInBytes; - return interval >= 0 ? interval : 576; - } - static intx prefetch_fields_ahead() { - intx count = PrefetchFieldsAhead; - return count >= 0 ? count : 1; - } - static uint32_t get_xsave_header_lower_segment() { - return _cpuid_info.xem_xcr0_eax.value; - } - static uint32_t get_xsave_header_upper_segment() { - return _cpuid_info.xem_xcr0_edx; } // SSE2 and later processors implement a 'pause' instruction