8153340: Disallow misconfiguration and improve the consistency of allocation prefetching
authorzmajo
Fri, 29 Apr 2016 08:32:42 +0200
changeset 38220 8d86b82e0ac7
parent 38130 7ef594f39eb2
child 38221 0a7813e6b50e
8153340: Disallow misconfiguration and improve the consistency of allocation prefetching Summary: Improve allocation prefetching. Reviewed-by: kvn
hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java
hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp
hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp
hotspot/src/share/vm/opto/macro.cpp
hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/vmStructs.cpp
hotspot/src/share/vm/runtime/vm_version.cpp
hotspot/src/share/vm/runtime/vm_version.hpp
hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Apr 29 08:32:42 2016 +0200
@@ -49,9 +49,11 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
-    warning("BIS instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
+  if (!has_blk_init()) {
+    if (AllocatePrefetchInstr == 1) {
+      warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable");
+      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
+    }
   }
 
   UseSSE = 0; // Only on x86 and x64
@@ -88,11 +90,13 @@
       if (has_blk_init() && UseTLAB &&
           FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
         // Use BIS instruction for TLAB allocation prefetch.
-        FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
-        if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
-          FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
-        }
-        if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+        FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
+      }
+      if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+        if (AllocatePrefetchInstr == 0) {
+          // Use different prefetch distance without BIS
+          FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+        } else {
           // Use smaller prefetch distance with BIS
           FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
         }
@@ -107,25 +111,14 @@
           FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
         }
       }
-      if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-        // Use different prefetch distance without BIS
-        FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
-      }
-      if (AllocatePrefetchInstr == 1) {
-        // Need extra space at the end of TLAB for BIS, otherwise prefetching
-        // instructions will fault (due to accessing memory outside of heap).
-        // The amount of space is the max of the number of lines to
-        // prefetch for array and for instance allocations. (Extra space must be
-        // reserved to accomodate both types of allocations.)
+    }
 
-        // +1 for rounding up to next cache line, +1 to be safe
-        int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
-        int step_size = AllocatePrefetchStepSize;
-        int distance = AllocatePrefetchDistance;
-        _reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
-      }
+    if (AllocatePrefetchInstr == 1) {
+      // Use allocation prefetch style 3 because BIS instructions
+      // require aligned memory addresses.
+      FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
     }
-#endif
+#endif /* COMPILER2 */
   }
 
   // Use hardware population count instruction if available.
--- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri Apr 29 08:32:42 2016 +0200
@@ -324,8 +324,9 @@
        Address vmInternalInfoAddr = vmVersion.getAddressField("_s_internal_vm_info_string").getValue();
        vmInternalInfo = CStringUtilities.getString(vmInternalInfoAddr);
 
+       Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer");
        CIntegerType intType = (CIntegerType) db.lookupType("int");
-       CIntegerField reserveForAllocationPrefetchField = vmVersion.getCIntegerField("_reserve_for_allocation_prefetch");
+       CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch");
        reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType);
     } catch (Exception exp) {
        throw new RuntimeException("can't determine target's VM version : " + exp.getMessage());
--- a/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp	Fri Apr 29 08:32:42 2016 +0200
@@ -36,6 +36,7 @@
 
 // static member initialization
 size_t           ThreadLocalAllocBuffer::_max_size       = 0;
+int              ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
 unsigned         ThreadLocalAllocBuffer::_target_refills = 0;
 GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats   = NULL;
 
@@ -215,6 +216,23 @@
 
   _global_stats = new GlobalTLABStats();
 
+  // Need extra space at the end of TLAB, otherwise prefetching
+  // instructions will fault (due to accessing memory outside of heap).
+  // The amount of space is the max of the number of lines to
+  // prefetch for array and for instance allocations. (Extra space must be
+  // reserved to accommodate both types of allocations.)
+  //
+  // Only SPARC-specific BIS instructions are known to fault. (Those
+  // instructions are generated if AllocatePrefetchStyle==3 and
+  // AllocatePrefetchInstr==1). To be on the safe side, however,
+  // extra space is reserved for all combinations of
+  // AllocatePrefetchStyle and AllocatePrefetchInstr.
+
+  // +1 for rounding up to next cache line, +1 to be safe
+  int lines =  MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
+  _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
+                                     (int)HeapWordSize;
+
   // During jvm startup, the main (primordial) thread is initialized
   // before the heap is initialized.  So reinitialize it now.
   guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
--- a/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp	Fri Apr 29 08:32:42 2016 +0200
@@ -49,8 +49,9 @@
   size_t    _refill_waste_limit;                 // hold onto tlab if free() is larger than this
   size_t    _allocated_before_last_gc;           // total bytes allocated up until the last gc
 
-  static size_t   _max_size;                     // maximum size of any TLAB
-  static unsigned _target_refills;               // expected number of refills between GCs
+  static size_t   _max_size;                          // maximum size of any TLAB
+  static int      _reserve_for_allocation_prefetch;   // Reserve at the end of the TLAB
+  static unsigned _target_refills;                    // expected number of refills between GCs
 
   unsigned  _number_of_refills;
   unsigned  _fast_refill_waste;
@@ -129,7 +130,7 @@
   // Reserve space at the end of TLAB
   static size_t end_reserve() {
     int reserve_size = typeArrayOopDesc::header_size(T_INT);
-    return MAX2(reserve_size, VM_Version::reserve_for_allocation_prefetch());
+    return MAX2(reserve_size, _reserve_for_allocation_prefetch);
   }
   static size_t alignment_reserve()              { return align_object_size(end_reserve()); }
   static size_t alignment_reserve_in_bytes()     { return alignment_reserve() * HeapWordSize; }
--- a/hotspot/src/share/vm/opto/macro.cpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/opto/macro.cpp	Fri Apr 29 08:32:42 2016 +0200
@@ -1897,7 +1897,7 @@
 
       Node *prefetch_adr;
       Node *prefetch;
-      uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize;
+      uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       uint step_size = AllocatePrefetchStepSize;
       uint distance = 0;
 
@@ -1926,12 +1926,8 @@
       contended_phi_rawmem = pf_phi_rawmem;
       i_o = pf_phi_abio;
    } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
-      // Insert a prefetch for each allocation.
-      // This code is used for Sparc with BIS.
-      Node *pf_region = new RegionNode(3);
-      Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
-                                             TypeRawPtr::BOTTOM );
-      transform_later(pf_region);
+      // Insert a prefetch instruction for each allocation.
+      // This code is used for SPARC with BIS.
 
       // Generate several prefetch instructions.
       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
@@ -1940,10 +1936,15 @@
 
       // Next cache address.
       Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
-                                            _igvn.MakeConX(distance));
+                                     _igvn.MakeConX(step_size + distance));
       transform_later(cache_adr);
       cache_adr = new CastP2XNode(needgc_false, cache_adr);
       transform_later(cache_adr);
+      // For BIS instructions to be emitted, the address must be aligned at cache line size.
+      // (The VM sets AllocatePrefetchStepSize to the cache line size, unless a value is
+      // specified at the command line.) If the address is not aligned at cache line size
+      // boundary, a standard store instruction is triggered (instead of the BIS). For the
+      // latter, 8-byte alignment is necessary.
       Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
       cache_adr = new AndXNode(cache_adr, mask);
       transform_later(cache_adr);
--- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Fri Apr 29 08:32:42 2016 +0200
@@ -90,16 +90,29 @@
 }
 
 Flag::Error AllocatePrefetchDistanceConstraintFunc(intx value, bool verbose) {
-  if (value < 0) {
+  if (value < 0 || value > 512) {
     CommandLineError::print(verbose,
-                            "Unable to determine system-specific value for AllocatePrefetchDistance. "
-                            "Please provide appropriate value, if unsure, use 0 to disable prefetching\n");
+                            "AllocatePrefetchDistance (" INTX_FORMAT ") must be "
+                            "between 0 and " INTX_FORMAT "\n",
+                            AllocatePrefetchDistance, 512);
     return Flag::VIOLATES_CONSTRAINT;
   }
 
   return Flag::SUCCESS;
 }
 
+Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
+  if (AllocatePrefetchStyle == 3) {
+    if (value % wordSize != 0) {
+      CommandLineError::print(verbose,
+                              "AllocatePrefetchStepSize (" INTX_FORMAT ") must be multiple of %d\n",
+                              value, wordSize);
+      return Flag::VIOLATES_CONSTRAINT;
+    }
+  }
+  return Flag::SUCCESS;
+}
+
 Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) {
   intx max_value = max_intx;
 #if defined(SPARC)
@@ -117,49 +130,6 @@
   return Flag::SUCCESS;
 }
 
-Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
-  intx max_value = 512;
-  if (value < 1 || value > max_value) {
-    CommandLineError::print(verbose,
-                            "AllocatePrefetchStepSize (" INTX_FORMAT ") "
-                            "must be between 1 and %d\n",
-                            AllocatePrefetchStepSize,
-                            max_value);
-    return Flag::VIOLATES_CONSTRAINT;
-  }
-
-  if (AllocatePrefetchDistance % AllocatePrefetchStepSize != 0) {
-    CommandLineError::print(verbose,
-                            "AllocatePrefetchDistance (" INTX_FORMAT ") "
-                            "%% AllocatePrefetchStepSize (" INTX_FORMAT ") "
-                            "= " INTX_FORMAT " "
-                            "must be 0\n",
-                            AllocatePrefetchDistance, AllocatePrefetchStepSize,
-                            AllocatePrefetchDistance % AllocatePrefetchStepSize);
-    return Flag::VIOLATES_CONSTRAINT;
-  }
-
-  /* The limit of 64 for the quotient of AllocatePrefetchDistance and AllocatePrefetchSize
-   * originates from the limit of 64 for AllocatePrefetchLines/AllocateInstancePrefetchLines.
-   * If AllocatePrefetchStyle == 2, the quotient from above is used in PhaseMacroExpand::prefetch_allocation()
-   * to determine the number of lines to prefetch. For other values of AllocatePrefetchStyle,
-   * AllocatePrefetchDistance and AllocatePrefetchSize is used. For consistency, all these
-   * quantities must have the same limit (64 in this case).
-   */
-  if (AllocatePrefetchDistance / AllocatePrefetchStepSize > 64) {
-    CommandLineError::print(verbose,
-                            "AllocatePrefetchDistance (" INTX_FORMAT ") too large or "
-                            "AllocatePrefetchStepSize (" INTX_FORMAT ") too small; "
-                            "try decreasing/increasing values so that "
-                            "AllocatePrefetchDistance / AllocatePrefetchStepSize <= 64\n",
-                            AllocatePrefetchDistance, AllocatePrefetchStepSize,
-                            AllocatePrefetchDistance % AllocatePrefetchStepSize);
-    return Flag::VIOLATES_CONSTRAINT;
-  }
-
-  return Flag::SUCCESS;
-}
-
 Flag::Error CompileThresholdConstraintFunc(intx value, bool verbose) {
   if (value < 0 || value > INT_MAX >> InvocationCounter::count_shift) {
     CommandLineError::print(verbose,
--- a/hotspot/src/share/vm/runtime/globals.hpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Fri Apr 29 08:32:42 2016 +0200
@@ -2901,9 +2901,9 @@
                                                                             \
   product(intx,  AllocatePrefetchStyle, 1,                                  \
           "0 = no prefetch, "                                               \
-          "1 = prefetch instructions for each allocation, "                 \
+          "1 = generate prefetch instructions for each allocation, "        \
           "2 = use TLAB watermark to gate allocation prefetch, "            \
-          "3 = use BIS instruction on Sparc for allocation prefetch")       \
+          "3 = generate one prefetch instruction per cache line")           \
           range(0, 3)                                                       \
                                                                             \
   product(intx,  AllocatePrefetchDistance, -1,                              \
@@ -2926,8 +2926,8 @@
           constraint(AllocatePrefetchStepSizeConstraintFunc,AfterMemoryInit)\
                                                                             \
   product(intx,  AllocatePrefetchInstr, 0,                                  \
-          "Prefetch instruction to prefetch ahead of allocation pointer")   \
-          constraint(AllocatePrefetchInstrConstraintFunc, AfterErgo)        \
+          "Select instruction to prefetch ahead of allocation pointer")     \
+          constraint(AllocatePrefetchInstrConstraintFunc, AfterMemoryInit)  \
                                                                             \
   /* deoptimization */                                                      \
   develop(bool, TraceDeoptimization, false,                                 \
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Fri Apr 29 08:32:42 2016 +0200
@@ -600,6 +600,7 @@
   nonstatic_field(ThreadLocalAllocBuffer,      _pf_top,                                       HeapWord*)                             \
   nonstatic_field(ThreadLocalAllocBuffer,      _desired_size,                                 size_t)                                \
   nonstatic_field(ThreadLocalAllocBuffer,      _refill_waste_limit,                           size_t)                                \
+     static_field(ThreadLocalAllocBuffer,      _reserve_for_allocation_prefetch,              int)                                   \
      static_field(ThreadLocalAllocBuffer,      _target_refills,                               unsigned)                              \
   nonstatic_field(ThreadLocalAllocBuffer,      _number_of_refills,                            unsigned)                              \
   nonstatic_field(ThreadLocalAllocBuffer,      _fast_refill_waste,                            unsigned)                              \
@@ -1318,7 +1319,6 @@
      static_field(Abstract_VM_Version,         _vm_minor_version,                             int)                                   \
      static_field(Abstract_VM_Version,         _vm_security_version,                          int)                                   \
      static_field(Abstract_VM_Version,         _vm_build_number,                              int)                                   \
-     static_field(Abstract_VM_Version,         _reserve_for_allocation_prefetch,              int)                                   \
                                                                                                                                      \
      static_field(JDK_Version,                 _current,                                      JDK_Version)                           \
   nonstatic_field(JDK_Version,                 _major,                                        unsigned char)                         \
--- a/hotspot/src/share/vm/runtime/vm_version.cpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/runtime/vm_version.cpp	Fri Apr 29 08:32:42 2016 +0200
@@ -43,7 +43,6 @@
 bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
 unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
 unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0;
-int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
 
 #ifndef HOTSPOT_VERSION_STRING
   #error HOTSPOT_VERSION_STRING must be defined
--- a/hotspot/src/share/vm/runtime/vm_version.hpp	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/src/share/vm/runtime/vm_version.hpp	Fri Apr 29 08:32:42 2016 +0200
@@ -57,7 +57,6 @@
   static int          _vm_build_number;
   static unsigned int _parallel_worker_threads;
   static bool         _parallel_worker_threads_initialized;
-  static int          _reserve_for_allocation_prefetch;
 
   static unsigned int nof_parallel_worker_threads(unsigned int num,
                                                   unsigned int dem,
@@ -139,12 +138,6 @@
     return _L1_data_cache_line_size;
   }
 
-  // Need a space at the end of TLAB for prefetch instructions
-  // which may fault when accessing memory outside of heap.
-  static int reserve_for_allocation_prefetch() {
-    return _reserve_for_allocation_prefetch;
-  }
-
   // ARCH specific policy for the BiasedLocking
   static bool use_biased_locking()  { return true; }
 
--- a/hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java	Mon Apr 25 10:53:42 2016 +0200
+++ b/hotspot/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java	Fri Apr 29 08:32:42 2016 +0200
@@ -90,13 +90,6 @@
         excludeTestMaxRange("CICompilerCount");
 
         /*
-         * JDK-8153340
-         * Temporary exclude AllocatePrefetchDistance option from testing
-         */
-        excludeTestRange("AllocatePrefetchDistance");
-
-
-        /*
          * JDK-8136766
          * Temporarily remove ThreadStackSize from testing because Windows can set it to 0
          * (for default OS size) but other platforms insist it must be greater than 0