8016470: AllocatePrefetchDistance is not changed by command line
authorrraghavan
Mon, 19 Jun 2017 01:23:58 -0700
changeset 46547 e1b926a0b23f
parent 46546 4dba7f5446f3
child 46548 2d4e14c79154
child 46549 744e7a498dac
8016470: AllocatePrefetchDistance is not changed by command line Summary: Values from command line given preference for related flags, over default values based on platform Reviewed-by: thartmann, kvn
hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp
hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp
hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp
hotspot/src/cpu/x86/vm/vm_version_x86.cpp
hotspot/src/cpu/x86/vm/vm_version_x86.hpp
--- a/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp	Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm_32.cpp	Mon Jun 19 01:23:58 2017 -0700
@@ -256,7 +256,9 @@
     }
   }
 
-  AllocatePrefetchDistance = 128;
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128);
+  }
 
 #ifdef COMPILER2
   FLAG_SET_DEFAULT(UseFPUForSpilling, true);
--- a/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp	Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/arm/vm/vm_version_arm_64.cpp	Mon Jun 19 01:23:58 2017 -0700
@@ -201,7 +201,9 @@
     }
   }
 
-  AllocatePrefetchDistance = 128;
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 128);
+  }
 
 #ifdef COMPILER2
   FLAG_SET_DEFAULT(UseFPUForSpilling, true);
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Jun 19 01:23:58 2017 -0700
@@ -37,23 +37,40 @@
   assert(_features != 0, "System pre-initialization is not complete.");
   guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");
 
-  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
-  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
-  PrefetchFieldsAhead         = prefetch_fields_ahead();
+  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
+    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, prefetch_copy_interval_in_bytes());
+  }
+  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
+    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, prefetch_scan_interval_in_bytes());
+  }
+  if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
+    FLAG_SET_DEFAULT(PrefetchFieldsAhead, prefetch_fields_ahead());
+  }
 
   // Allocation prefetch settings
   intx cache_line_size = prefetch_data_size();
-  if( cache_line_size > AllocatePrefetchStepSize )
-    AllocatePrefetchStepSize = cache_line_size;
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
+      (cache_line_size > AllocatePrefetchStepSize)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+  }
 
-  AllocatePrefetchDistance = allocate_prefetch_distance();
-  AllocatePrefetchStyle    = allocate_prefetch_style();
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 512);
+  }
 
-  if (!has_blk_init() || cache_line_size <= 0) {
-    if (AllocatePrefetchInstr == 1) {
+  if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
+    assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
+    if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+      warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
+    }
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+  }
+
+  if ((AllocatePrefetchInstr == 1) && (!has_blk_init() || cache_line_size <= 0)) {
+    if (!FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
       warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable");
-      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
     }
+    FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
   }
 
   UseSSE = 0; // Only on x86 and x64
@@ -121,9 +138,10 @@
       }
     }
 
-    if (AllocatePrefetchInstr == 1) {
-      // Use allocation prefetch style 3 because BIS instructions
-      // require aligned memory addresses.
+    if ((AllocatePrefetchInstr == 1) && (AllocatePrefetchStyle != 3)) {
+      if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+        warning("AllocatePrefetchStyle set to 3 because BIS instructions require aligned memory addresses");
+      }
       FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
     }
 #endif /* COMPILER2 */
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp	Mon Jun 19 01:23:58 2017 -0700
@@ -180,30 +180,13 @@
 
   // Prefetch
   static intx prefetch_copy_interval_in_bytes() {
-    intx interval = PrefetchCopyIntervalInBytes;
-    return interval >= 0 ? interval : (has_v9() ? 512 : 0);
+    return (has_v9() ? 512 : 0);
   }
   static intx prefetch_scan_interval_in_bytes() {
-    intx interval = PrefetchScanIntervalInBytes;
-    return interval >= 0 ? interval : (has_v9() ? 512 : 0);
+    return (has_v9() ? 512 : 0);
   }
   static intx prefetch_fields_ahead() {
-    intx count = PrefetchFieldsAhead;
-    return count >= 0 ? count : (is_ultra3() ? 1 : 0);
-  }
-
-  static intx allocate_prefetch_distance() {
-    // This method should be called before allocate_prefetch_style().
-    intx count = AllocatePrefetchDistance;
-    if (count < 0) { // default is not defined ?
-      count = 512;
-    }
-    return count;
-  }
-  static intx allocate_prefetch_style() {
-    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
-    // Return 0 if AllocatePrefetchDistance was not defined.
-    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
+    return (is_ultra3() ? 1 : 0);
   }
 
   // Assembler testing
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Mon Jun 19 01:23:58 2017 -0700
@@ -1103,18 +1103,18 @@
     if ( cpu_family() == 0x15 ) {
       // On family 15h processors default is no sw prefetch
       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
-        AllocatePrefetchStyle = 0;
+        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
       }
       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
-        AllocatePrefetchInstr = 3;
+        FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
       }
       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
-        UseXMMForArrayCopy = true;
+        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
       }
       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
-        UseUnalignedLoadStores = true;
+        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
       }
     }
 
@@ -1195,7 +1195,7 @@
       }
     }
     if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
-      AllocatePrefetchInstr = 3;
+      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
     }
   }
 
@@ -1291,45 +1291,68 @@
   }
 #endif // COMPILER2
 
-  if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
-  if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
+  if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
+    if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
+      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
+    } else if (!supports_sse() && supports_3dnow_prefetch()) {
+      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
+    }
+  }
 
   // Allocation prefetch settings
   intx cache_line_size = prefetch_data_size();
-  if( cache_line_size > AllocatePrefetchStepSize )
-    AllocatePrefetchStepSize = cache_line_size;
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
+      (cache_line_size > AllocatePrefetchStepSize)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
+  }
 
-  AllocatePrefetchDistance = allocate_prefetch_distance();
-  AllocatePrefetchStyle    = allocate_prefetch_style();
+  if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
+    assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
+    if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+      warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
+    }
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
+  }
 
   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
-    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
-#ifdef _LP64
-      AllocatePrefetchDistance = 384;
-#else
-      AllocatePrefetchDistance = 320;
-#endif
-    }
-    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
-      AllocatePrefetchDistance = 192;
-      if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
-        FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
-      }
+    if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
+        supports_sse4_2() && supports_ht()) { // Nehalem based cpus
+      FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
     }
 #ifdef COMPILER2
-    if (supports_sse4_2()) {
-      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
-        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
-      }
+    if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
+      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
     }
 #endif
   }
 
 #ifdef _LP64
   // Prefetch settings
-  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
-  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
-  PrefetchFieldsAhead         = prefetch_fields_ahead();
+
+  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
+  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
+  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
+  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
+
+  // gc copy/scan is disabled if prefetchw isn't supported, because
+  // Prefetch::write emits an inlined prefetchw on Linux.
+  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
+  // The used prefetcht0 instruction works for both amd64 and em64t.
+
+  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
+    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
+  }
+  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
+    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
+  }
+  if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
+    FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
+  }
 #endif
 
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Fri Jun 16 12:06:31 2017 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Mon Jun 19 01:23:58 2017 -0700
@@ -782,9 +782,7 @@
 
   static bool supports_compare_and_exchange() { return true; }
 
-  static intx allocate_prefetch_distance() {
-    // This method should be called before allocate_prefetch_style().
-    //
+  static intx allocate_prefetch_distance(bool use_watermark_prefetch) {
     // Hardware prefetching (distance/size in bytes):
     // Pentium 3 -  64 /  32
     // Pentium 4 - 256 / 128
@@ -800,58 +798,34 @@
     // Core      - 256 / prefetchnta
     // It will be used only when AllocatePrefetchStyle > 0
 
-    intx count = AllocatePrefetchDistance;
-    if (count < 0) {   // default ?
-      if (is_amd()) {  // AMD
-        if (supports_sse2())
-          count = 256; // Opteron
-        else
-          count = 128; // Athlon
-      } else {         // Intel
-        if (supports_sse2())
-          if (cpu_family() == 6) {
-            count = 256; // Pentium M, Core, Core2
-          } else {
-            count = 512; // Pentium 4
-          }
-        else
-          count = 128; // Pentium 3 (and all other old CPUs)
+    if (is_amd()) { // AMD
+      if (supports_sse2()) {
+        return 256; // Opteron
+      } else {
+        return 128; // Athlon
+      }
+    } else { // Intel
+      if (supports_sse3() && cpu_family() == 6) {
+        if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
+          return 192;
+        } else if (use_watermark_prefetch) { // watermark prefetching on Core
+#ifdef _LP64
+          return 384;
+#else
+          return 320;
+#endif
+        }
+      }
+      if (supports_sse2()) {
+        if (cpu_family() == 6) {
+          return 256; // Pentium M, Core, Core2
+        } else {
+          return 512; // Pentium 4
+        }
+      } else {
+        return 128; // Pentium 3 (and all other old CPUs)
       }
     }
-    return count;
-  }
-  static intx allocate_prefetch_style() {
-    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
-    // Return 0 if AllocatePrefetchDistance was not defined.
-    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
-  }
-
-  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
-  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
-  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
-  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
-
-  // gc copy/scan is disabled if prefetchw isn't supported, because
-  // Prefetch::write emits an inlined prefetchw on Linux.
-  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
-  // The used prefetcht0 instruction works for both amd64 and em64t.
-  static intx prefetch_copy_interval_in_bytes() {
-    intx interval = PrefetchCopyIntervalInBytes;
-    return interval >= 0 ? interval : 576;
-  }
-  static intx prefetch_scan_interval_in_bytes() {
-    intx interval = PrefetchScanIntervalInBytes;
-    return interval >= 0 ? interval : 576;
-  }
-  static intx prefetch_fields_ahead() {
-    intx count = PrefetchFieldsAhead;
-    return count >= 0 ? count : 1;
-  }
-  static uint32_t get_xsave_header_lower_segment() {
-    return _cpuid_info.xem_xcr0_eax.value;
-  }
-  static uint32_t get_xsave_header_upper_segment() {
-    return _cpuid_info.xem_xcr0_edx;
   }
 
   // SSE2 and later processors implement a 'pause' instruction