# HG changeset patch
# User michaelm
# Date 1567436377 -3600
# Node ID f571cc16478a4402b8f64dc134a962f9140762c0
# Parent  c4ec55644b4b40bb79556da310eac17716fc9a39# Parent  ca9e3b68a969822d027d7a6490eac568fedece21
Merge

diff -r c4ec55644b4b -r f571cc16478a src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp
--- a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -34,6 +34,12 @@
 
 #define SUPPORTS_NATIVE_CX8
 
+// Aarch64 was not originally defined as multi-copy-atomic, but now is.
+// See: "Simplifying ARM Concurrency: Multicopy-atomic Axiomatic and
+// Operational Models for ARMv8"
+// So we could #define CPU_MULTI_COPY_ATOMIC but historically we have
+// not done so.
+
 // According to the ARMv8 ARM, "Concurrent modification and execution
 // of instructions can lead to the resulting instruction performing
 // any behavior that can be achieved by executing any sequence of
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/cpu/arm/globalDefinitions_arm.hpp
--- a/src/hotspot/cpu/arm/globalDefinitions_arm.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/cpu/arm/globalDefinitions_arm.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -45,6 +45,9 @@
 #define SUPPORTS_NATIVE_CX8
 #endif
 
+// arm32 is not specified as multi-copy-atomic
+// So we must not #define CPU_MULTI_COPY_ATOMIC
+
 #define STUBROUTINES_MD_HPP    "stubRoutines_arm.hpp"
 #define INTERP_MASM_MD_HPP     "interp_masm_arm.hpp"
 #define TEMPLATETABLE_MD_HPP   "templateTable_arm.hpp"
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp
--- a/src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -41,8 +41,8 @@
 
 #define SUPPORTS_NATIVE_CX8
 
-// The PPC CPUs are NOT multiple-copy-atomic.
-#define CPU_NOT_MULTIPLE_COPY_ATOMIC
+// PPC64 is not specified as multi-copy-atomic
+// So we must not #define CPU_MULTI_COPY_ATOMIC
 
 // The expected size in bytes of a cache line, used to pad data structures.
 #define DEFAULT_CACHE_LINE_SIZE 128
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/cpu/s390/globalDefinitions_s390.hpp
--- a/src/hotspot/cpu/s390/globalDefinitions_s390.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/cpu/s390/globalDefinitions_s390.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -42,6 +42,8 @@
 
 #define SUPPORTS_NATIVE_CX8
 
+#define CPU_MULTI_COPY_ATOMIC
+
 // Indicates whether the C calling conventions require that
 // 32-bit integer argument values are extended to 64 bits.
 // This is the case on z/Architecture.
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp
--- a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -36,6 +36,8 @@
 
 #define SUPPORTS_NATIVE_CX8
 
+#define CPU_MULTI_COPY_ATOMIC
+
 // The expected size in bytes of a cache line, used to pad data structures.
 #if defined(TIERED)
   // tiered, 64-bit, large machine
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/cpu/x86/globalDefinitions_x86.hpp
--- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -33,6 +33,8 @@
 
 #define SUPPORTS_NATIVE_CX8
 
+#define CPU_MULTI_COPY_ATOMIC
+
 // The expected size in bytes of a cache line, used to pad data structures.
 #if defined(TIERED)
   #ifdef _LP64
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/share/gc/shared/taskqueue.inline.hpp
--- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -207,7 +207,7 @@
   // Architectures with weak memory model require a barrier here
   // to guarantee that bottom is not older than age,
   // which is crucial for the correctness of the algorithm.
-#if !(defined SPARC || defined IA32 || defined AMD64)
+#ifndef CPU_MULTI_COPY_ATOMIC
   OrderAccess::fence();
 #endif
   uint localBot = OrderAccess::load_acquire(&_bottom);
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp	Mon Sep 02 15:59:37 2019 +0100
@@ -60,11 +60,9 @@
   }
 
   // Enable NUMA by default. While Shenandoah is not NUMA-aware, enabling NUMA makes
-  // storage allocation code NUMA-aware, and NUMA interleaving makes the storage
-  // allocated in consistent manner (interleaving) to minimize run-to-run variance.
+  // storage allocation code NUMA-aware.
   if (FLAG_IS_DEFAULT(UseNUMA)) {
     FLAG_SET_DEFAULT(UseNUMA, true);
-    FLAG_SET_DEFAULT(UseNUMAInterleaving, true);
   }
 
   // Set up default number of concurrent threads. We want to have cycles complete fast
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp
--- a/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp	Mon Sep 02 15:59:37 2019 +0100
@@ -230,8 +230,6 @@
 
 void BFSClosure::do_root(const oop* ref) {
   assert(ref != NULL, "invariant");
-  assert(is_aligned(ref, HeapWordSize), "invariant");
-  assert(*ref != NULL, "invariant");
   if (!_edge_queue->is_full()) {
     _edge_queue->add(NULL, ref);
   }
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp
--- a/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp	Mon Sep 02 15:59:37 2019 +0100
@@ -178,8 +178,7 @@
 
 void DFSClosure::do_root(const oop* ref) {
   assert(ref != NULL, "invariant");
-  assert(is_aligned(ref, HeapWordSize), "invariant");
-  const oop pointee = *ref;
+  const oop pointee = UnifiedOop::dereference(ref);
   assert(pointee != NULL, "invariant");
   closure_impl(ref, pointee);
 }
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp
--- a/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp	Mon Sep 02 15:59:37 2019 +0100
@@ -317,29 +317,28 @@
 };
 
 typedef CheckpointWriteOp<JfrCheckpointMspace::Type> WriteOperation;
-typedef MutexedWriteOp<WriteOperation> MutexedWriteOperation;
 typedef ReleaseOp<JfrCheckpointMspace> CheckpointReleaseOperation;
-typedef CompositeOperation<MutexedWriteOperation, CheckpointReleaseOperation> CheckpointWriteOperation;
 
-static size_t write_mspace_exclusive(JfrCheckpointMspace* mspace, JfrChunkWriter& chunkwriter) {
-  Thread* const thread = Thread::current();
+template <template <typename> class WriterHost, template <typename, typename> class CompositeOperation>
+static size_t write_mspace(JfrCheckpointMspace* mspace, JfrChunkWriter& chunkwriter) {
+  assert(mspace != NULL, "invariant");
   WriteOperation wo(chunkwriter);
-  MutexedWriteOperation mwo(wo);
-  CheckpointReleaseOperation cro(mspace, thread, false);
-  CheckpointWriteOperation cpwo(&mwo, &cro);
+  WriterHost<WriteOperation> wh(wo);
+  CheckpointReleaseOperation cro(mspace, Thread::current(), false);
+  CompositeOperation<WriterHost<WriteOperation>, CheckpointReleaseOperation> co(&wh, &cro);
   assert(mspace->is_full_empty(), "invariant");
-  process_free_list(cpwo, mspace);
+  process_free_list(co, mspace);
   return wo.processed();
 }
 
 size_t JfrCheckpointManager::write() {
-  const size_t processed = write_mspace_exclusive(_free_list_mspace, _chunkwriter);
+  const size_t processed = write_mspace<MutexedWriteOp, CompositeOperation>(_free_list_mspace, _chunkwriter);
   synchronize_epoch();
   return processed;
 }
 
 size_t JfrCheckpointManager::write_epoch_transition_mspace() {
-  return write_mspace_exclusive(_epoch_transition_mspace, _chunkwriter);
+  return write_mspace<ExclusiveOp, CompositeOperation>(_epoch_transition_mspace, _chunkwriter);
 }
 
 typedef DiscardOp<DefaultDiscarder<JfrBuffer> > DiscardOperation;
diff -r c4ec55644b4b -r f571cc16478a src/hotspot/share/utilities/globalDefinitions.hpp
--- a/src/hotspot/share/utilities/globalDefinitions.hpp	Mon Sep 02 15:58:44 2019 +0100
+++ b/src/hotspot/share/utilities/globalDefinitions.hpp	Mon Sep 02 15:59:37 2019 +0100
@@ -481,10 +481,13 @@
 // assure their ordering, instead of after volatile stores.
 // (See "A Tutorial Introduction to the ARM and POWER Relaxed Memory Models"
 // by Luc Maranget, Susmit Sarkar and Peter Sewell, INRIA/Cambridge)
-#ifdef CPU_NOT_MULTIPLE_COPY_ATOMIC
-const bool support_IRIW_for_not_multiple_copy_atomic_cpu = true;
+#ifdef CPU_MULTI_COPY_ATOMIC
+// Not needed.
+const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false;
 #else
-const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false;
+// From all non-multi-copy-atomic architectures, only PPC64 supports IRIW at the moment.
+// Final decision is subject to JEP 188: Java Memory Model Update.
+const bool support_IRIW_for_not_multiple_copy_atomic_cpu = PPC64_ONLY(true) NOT_PPC64(false);
 #endif
 
 // The expected size in bytes of a cache line, used to pad data structures.