Merge jdk7-b88
authorduke
Wed, 05 Jul 2017 17:09:16 +0200
changeset 5109 7077b95d42f6
parent 5108 d312c7b0a178 (diff)
parent 5073 4f2026dfd83d (current diff)
child 5110 482f63894bb1
child 5111 67916ae3220d
child 5116 ffde6191f84d
child 5118 2a19552b7697
child 5125 21bde52d7c5e
child 5128 0da399065c5d
child 5139 60b266b1fea0
child 5206 6bdda0396d9d
child 5208 a5368e5402f5
child 5210 8063e418d57c
child 5212 6bc51ee52f13
child 5220 1386e31a7fd0
child 5235 9261638fa59c
child 5238 d1077067d696
child 5240 3892e01609c6
Merge
--- a/.hgtags-top-repo	Wed Jul 05 17:08:50 2017 +0200
+++ b/.hgtags-top-repo	Wed Jul 05 17:09:16 2017 +0200
@@ -61,3 +61,4 @@
 2f3ea057d1ad56cf3b269cdc4de2741411151982 jdk7-b84
 cf26288a114be67c39f2758959ce50b60f5ae330 jdk7-b85
 433a60a9c0bf1b26ee7e65cebaa89c541f497aed jdk7-b86
+6b1069f53fbc30663ccef49d78c31bb7d6967bde jdk7-b87
--- a/corba/.hgtags	Wed Jul 05 17:08:50 2017 +0200
+++ b/corba/.hgtags	Wed Jul 05 17:09:16 2017 +0200
@@ -61,3 +61,4 @@
 68c8961a82e4a3ad2a67991e5d834192a81eb4cd jdk7-b84
 c67a9df7bc0ca291f08f9a9cc05cb78ea15d25e6 jdk7-b85
 6253e28826d16cf1aecc39ce04c8de1f6bf2df5f jdk7-b86
+09a41111a401d327f65e453384d976a10154d9ea jdk7-b87
--- a/hotspot/.hgtags	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/.hgtags	Wed Jul 05 17:09:16 2017 +0200
@@ -85,3 +85,4 @@
 418bc80ce13995149eadc9eecbba21d7a9fa02ae hs17-b10
 bf823ef06b4f211e66988d76a2e2669be5c0820e jdk7-b86
 07226e9eab8f74b37346b32715f829a2ef2c3188 hs18-b01
+e7e7e36ccdb5d56edd47e5744351202d38f3b7ad jdk7-b87
--- a/hotspot/make/hotspot_version	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/make/hotspot_version	Wed Jul 05 17:09:16 2017 +0200
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=18
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=01
+HS_BUILD_NUMBER=02
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/hotspot/make/windows/build.bat	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/make/windows/build.bat	Wed Jul 05 17:09:16 2017 +0200
@@ -28,6 +28,9 @@
 REM Since we don't have uname and we could be cross-compiling,
 REM Use the compiler to determine which ARCH we are building
 REM 
+REM Note: Running this batch file from the Windows command shell requires
+REM that "grep" be accessible on the PATH. An MKS install does this.
+REM 
 cl 2>&1 | grep "IA-64" >NUL
 if %errorlevel% == 0 goto isia64
 cl 2>&1 | grep "AMD64" >NUL
@@ -57,11 +60,12 @@
 if "%1" == "product"   goto test1
 if "%1" == "debug"     goto test1
 if "%1" == "fastdebug" goto test1
+if "%1" == "tree"      goto test1
 goto usage
 
 :test1
 if "%2" == "core"      goto test2
-if "%2" == "kernel"   goto test2
+if "%2" == "kernel"    goto test2
 if "%2" == "compiler1" goto test2
 if "%2" == "compiler2" goto test2
 if "%2" == "tiered"    goto test2
@@ -70,6 +74,7 @@
 goto usage
 
 :test2
+if "%1" == "tree"      goto build_tree
 REM check_j2se_version
 REM jvmti.make requires J2SE 1.4.x or newer.
 REM If not found then fail fast.
@@ -93,6 +98,10 @@
 nmake -f %3/make/windows/build.make Variant=compiler2 WorkSpace=%3 BootStrapDir=%4 BuildUser="%USERNAME%" HOTSPOT_BUILD_VERSION=%5 ADLC_ONLY=1 %1
 goto end
 
+:build_tree
+nmake -f %3/make/windows/build.make Variant=%2 WorkSpace=%3 BootStrapDir=%4 BuildUser="%USERNAME%" HOTSPOT_BUILD_VERSION="%5" %1
+goto end
+
 :usage
 echo Usage: build flavor version workspace bootstrap_dir [build_id] [windbg_home]
 echo.
@@ -100,8 +109,10 @@
 echo flavor is "product", "debug" or "fastdebug",
 echo version is "core", "kernel", "compiler1", "compiler2", or "tiered",
 echo workspace is source directory without trailing slash, 
-echo bootstrap_dir is a full path to echo a JDK in which bin/java 
-echo   and bin/javac are present and working, and echo build_id is an 
+echo bootstrap_dir is a full path to a JDK in which bin/java 
+echo   and bin/javac are present and working, and build_id is an 
 echo   optional build identifier displayed by java -version
+exit /b 1
 
 :end
+exit /b %errorlevel%
--- a/hotspot/make/windows/build.make	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/make/windows/build.make	Wed Jul 05 17:09:16 2017 +0200
@@ -27,6 +27,9 @@
 # environment variables (Variant, WorkSpace, BootStrapDir, BuildUser, HOTSPOT_BUILD_VERSION)
 # are passed in as command line arguments.
 
+# Note: Running nmake or build.bat from the Windows command shell requires
+# that "sh" be accessible on the PATH. An MKS install does this.
+
 # SA components are built if BUILD_WIN_SA=1 is specified.
 # See notes in README. This produces files:
 #  1. sa-jdi.jar       - This is built before building jvm.dll
@@ -233,6 +236,12 @@
 	cd $(variantDir)
 	nmake -nologo -f $(WorkSpace)\make\windows\makefiles\top.make BUILD_FLAVOR=product DEVELOP=1 ARCH=$(ARCH)
 
+# target to create just the directory structure
+tree: checks $(variantDir) $(variantDir)\local.make sanity
+	mkdir $(variantDir)\product
+	mkdir $(variantDir)\debug
+	mkdir $(variantDir)\fastdebug
+
 sanity:
 	@ echo;
 	@ cd $(variantDir)
--- a/hotspot/make/windows/create.bat	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/make/windows/create.bat	Wed Jul 05 17:09:16 2017 +0200
@@ -36,6 +36,9 @@
 REM Since we don't have uname and we could be cross-compiling,
 REM Use the compiler to determine which ARCH we are building
 REM 
+REM Note: Running this batch file from the Windows command shell requires
+REM that "grep" be accessible on the PATH. An MKS install does this.
+REM 
 cl 2>&1 | grep "IA-64" >NUL
 if %errorlevel% == 0 goto isia64
 cl 2>&1 | grep "AMD64" >NUL
--- a/hotspot/make/windows/get_msc_ver.sh	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/make/windows/get_msc_ver.sh	Wed Jul 05 17:09:16 2017 +0200
@@ -22,6 +22,8 @@
 #  
 #
 
+set -e
+
 # This shell script echoes "MSC_VER=<munged version of cl>"
 # It ignores the micro version component.
 # Examples:
@@ -38,17 +40,20 @@
 # sh, and it has been found that sometimes `which sh` fails.
 
 if [ "x$HotSpotMksHome" != "x" ]; then
- MKS_HOME="$HotSpotMksHome"
+  TOOL_DIR="$HotSpotMksHome"
 else
- SH=`which sh`
- MKS_HOME=`dirname "$SH"`
+  # HotSpotMksHome is not set so use the directory that contains "sh".
+  # This works with both MKS and Cygwin.
+  SH=`which sh`
+  TOOL_DIR=`dirname "$SH"`
 fi
 
-HEAD="$MKS_HOME/head"
-ECHO="$MKS_HOME/echo"
-EXPR="$MKS_HOME/expr"
-CUT="$MKS_HOME/cut"
-SED="$MKS_HOME/sed"
+DIRNAME="$TOOL_DIR/dirname"
+HEAD="$TOOL_DIR/head"
+ECHO="$TOOL_DIR/echo"
+EXPR="$TOOL_DIR/expr"
+CUT="$TOOL_DIR/cut"
+SED="$TOOL_DIR/sed"
 
 if [ "x$FORCE_MSC_VER" != "x" ]; then
   echo "MSC_VER=$FORCE_MSC_VER"
@@ -70,7 +75,15 @@
 if [ "x$FORCE_LINK_VER" != "x" ]; then
   echo "LINK_VER=$FORCE_LINK_VER"
 else
-  LINK_VER_RAW=`link 2>&1 | "$HEAD" -n 1 | "$SED" 's/.*Version[\ ]*\([0-9][0-9.]*\).*/\1/'`
+  # use the "link" command that is co-located with the "cl" command
+  cl_cmd=`which cl`
+  if [ "x$cl_cmd" != "x" ]; then
+    link_cmd=`$DIRNAME "$cl_cmd"`/link
+  else
+    # which can't find "cl" so just use which ever "link" we find
+    link_cmd="link"
+  fi
+  LINK_VER_RAW=`"$link_cmd" 2>&1 | "$HEAD" -n 1 | "$SED" 's/.*Version[\ ]*\([0-9][0-9.]*\).*/\1/'`
   LINK_VER_MAJOR=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f1`
   LINK_VER_MINOR=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f2`
   LINK_VER_MICRO=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f3`
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -1065,7 +1065,7 @@
           __ movptr(rbx_temp, Address(rsi_array, elem_offset));
           __ movptr(Address(rax_argslot, slot_offset), rbx_temp);
           elem_offset += type2aelembytes(elem_type);
-          slot_offset += Interpreter::stackElementSize();
+           slot_offset += Interpreter::stackElementSize();
         }
       }
 
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -22,6 +22,8 @@
  *
  */
 
+# define __STDC_FORMAT_MACROS
+
 // do not include  precompiled  header file
 # include "incls/_os_linux.cpp.incl"
 
@@ -53,6 +55,8 @@
 # include <sys/ipc.h>
 # include <sys/shm.h>
 # include <link.h>
+# include <stdint.h>
+# include <inttypes.h>
 
 #define MAX_PATH    (2 * K)
 
@@ -2492,6 +2496,91 @@
     != MAP_FAILED;
 }
 
+// Linux uses a growable mapping for the stack, and if the mapping for
+// the stack guard pages is not removed when we detach a thread the
+// stack cannot grow beyond the pages where the stack guard was
+// mapped.  If at some point later in the process the stack expands to
+// that point, the Linux kernel cannot expand the stack any further
+// because the guard pages are in the way, and a segfault occurs.
+//
+// However, it's essential not to split the stack region by unmapping
+// a region (leaving a hole) that's already part of the stack mapping,
+// so if the stack mapping has already grown beyond the guard pages at
+// the time we create them, we have to truncate the stack mapping.
+// So, we need to know the extent of the stack mapping when
+// create_stack_guard_pages() is called.
+
+// Find the bounds of the stack mapping.  Return true for success.
+//
+// We only need this for stacks that are growable: at the time of
+// writing thread stacks don't use growable mappings (i.e. those
+// creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this
+// only applies to the main thread.
+static bool
+get_stack_bounds(uintptr_t *bottom, uintptr_t *top)
+{
+  FILE *f = fopen("/proc/self/maps", "r");
+  if (f == NULL)
+    return false;
+
+  while (!feof(f)) {
+    size_t dummy;
+    char *str = NULL;
+    ssize_t len = getline(&str, &dummy, f);
+    if (len == -1) {
+      fclose(f);
+      return false;
+    }
+
+    if (len > 0 && str[len-1] == '\n') {
+      str[len-1] = 0;
+      len--;
+    }
+
+    static const char *stack_str = "[stack]";
+    if (len > (ssize_t)strlen(stack_str)
+       && (strcmp(str + len - strlen(stack_str), stack_str) == 0)) {
+      if (sscanf(str, "%" SCNxPTR "-%" SCNxPTR, bottom, top) == 2) {
+        uintptr_t sp = (uintptr_t)__builtin_frame_address(0);
+        if (sp >= *bottom && sp <= *top) {
+          free(str);
+          fclose(f);
+          return true;
+        }
+      }
+    }
+    free(str);
+  }
+  fclose(f);
+  return false;
+}
+
+// If the (growable) stack mapping already extends beyond the point
+// where we're going to put our guard pages, truncate the mapping at
+// that point by munmap()ping it.  This ensures that when we later
+// munmap() the guard pages we don't leave a hole in the stack
+// mapping.
+bool os::create_stack_guard_pages(char* addr, size_t size) {
+  uintptr_t stack_extent, stack_base;
+  if (get_stack_bounds(&stack_extent, &stack_base)) {
+    if (stack_extent < (uintptr_t)addr)
+      ::munmap((void*)stack_extent, (uintptr_t)addr - stack_extent);
+  }
+
+  return os::commit_memory(addr, size);
+}
+
+// If this is a growable mapping, remove the guard pages entirely by
+// munmap()ping them.  If not, just call uncommit_memory().
+bool os::remove_stack_guard_pages(char* addr, size_t size) {
+  uintptr_t stack_extent, stack_base;
+  if (get_stack_bounds(&stack_extent, &stack_base)) {
+    return ::munmap(addr, size) == 0;
+  }
+
+  return os::uncommit_memory(addr, size);
+}
+
 static address _highest_vm_reserved_address = NULL;
 
 // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
--- a/hotspot/src/os/solaris/dtrace/hotspot.d	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/os/solaris/dtrace/hotspot.d	Wed Jul 05 17:09:16 2017 +0200
@@ -25,9 +25,20 @@
 provider hotspot {
   probe class__loaded(char*, uintptr_t, void*, uintptr_t);
   probe class__unloaded(char*, uintptr_t, void*, uintptr_t);
+  probe class__initialization__required(char*, uintptr_t, void*, intptr_t,int);
+  probe class__initialization__recursive(char*, uintptr_t, void*, intptr_t,int);
+  probe class__initialization__concurrent(char*, uintptr_t, void*, intptr_t,int);
+  probe class__initialization__erroneous(char*, uintptr_t, void*, intptr_t, int);
+  probe class__initialization__super__failed(char*, uintptr_t, void*, intptr_t,int);
+  probe class__initialization__clinit(char*, uintptr_t, void*, intptr_t,int);
+  probe class__initialization__error(char*, uintptr_t, void*, intptr_t,int);
+  probe class__initialization__end(char*, uintptr_t, void*, intptr_t,int);
   probe vm__init__begin();
   probe vm__init__end();
   probe vm__shutdown();
+  probe vmops__request(char*, uintptr_t, int);
+  probe vmops__begin(char*, uintptr_t, int);
+  probe vmops__end(char*, uintptr_t, int);
   probe gc__begin(uintptr_t);
   probe gc__end();
   probe mem__pool__gc__begin(
@@ -38,6 +49,12 @@
     uintptr_t, uintptr_t, uintptr_t, uintptr_t);
   probe thread__start(char*, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
   probe thread__stop(char*, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+  probe thread__sleep__begin(long long);
+  probe thread__sleep__end(int);
+  probe thread__yield();
+  probe thread__park__begin(uintptr_t, int, long long);
+  probe thread__park__end(uintptr_t);
+  probe thread__unpark(uintptr_t);
   probe method__compile__begin(
     char*, uintptr_t, char*, uintptr_t, char*, uintptr_t, char*, uintptr_t); 
   probe method__compile__end(
--- a/hotspot/src/os/solaris/vm/attachListener_solaris.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/os/solaris/vm/attachListener_solaris.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -668,13 +668,18 @@
     }
   }
 
-  if (strcmp(name, "ExtendedDTraceProbes") != 0) {
-    out->print_cr("flag '%s' cannot be changed", name);
-    return JNI_ERR;
+  if (strcmp(name, "ExtendedDTraceProbes") == 0) {
+    DTrace::set_extended_dprobes(flag);
+    return JNI_OK;
   }
 
-  DTrace::set_extended_dprobes(flag);
-  return JNI_OK;
+  if (strcmp(name, "DTraceMonitorProbes") == 0) {
+    DTrace::set_monitor_dprobes(flag);
+    return JNI_OK;
+  }
+
+  out->print_cr("flag '%s' cannot be changed", name);
+  return JNI_ERR;
 }
 
 void AttachListener::pd_detachall() {
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -2698,6 +2698,14 @@
   }
 }
 
+bool os::create_stack_guard_pages(char* addr, size_t size) {
+  return os::commit_memory(addr, size);
+}
+
+bool os::remove_stack_guard_pages(char* addr, size_t size) {
+  return os::uncommit_memory(addr, size);
+}
+
 // Change the page size in a given range.
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
   assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned.");
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -2803,6 +2803,14 @@
   return VirtualFree(addr, 0, MEM_RELEASE) != 0;
 }
 
+bool os::create_stack_guard_pages(char* addr, size_t size) {
+  return os::commit_memory(addr, size);
+}
+
+bool os::remove_stack_guard_pages(char* addr, size_t size) {
+  return os::uncommit_memory(addr, size);
+}
+
 // Set protections specified
 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
                         bool is_committed) {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -760,7 +760,10 @@
   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  satb_mq_set.set_active_all_threads(true);
+  // This is the start of  the marking cycle, we're expected all
+  // threads to have SATB queues with active set to false.
+  satb_mq_set.set_active_all_threads(true, /* new active value */
+                                     false /* expected_active */);
 
   // update_g1_committed() will be called at the end of an evac pause
   // when marking is on. So, it's also called at the end of the
@@ -1079,7 +1082,11 @@
       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
   } else {
     // We're done with marking.
-    JavaThread::satb_mark_queue_set().set_active_all_threads(false);
+    // This is the end of  the marking cycle, we're expected all
+    // threads to have SATB queues with active set to true.
+    JavaThread::satb_mark_queue_set().set_active_all_threads(
+                                                  false, /* new active value */
+                                                  true /* expected_active */);
 
     if (VerifyDuringGC) {
       HandleMark hm;  // handle scope
@@ -2586,7 +2593,11 @@
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   satb_mq_set.abandon_partial_marking();
-  satb_mq_set.set_active_all_threads(false);
+  // This can be called either during or outside marking, we'll read
+  // the expected_active value from the SATB queue set.
+  satb_mq_set.set_active_all_threads(
+                                 false, /* new active value */
+                                 satb_mq_set.is_active() /* expected_active */);
 }
 
 static void print_ms_time_info(const char* prefix, const char* name,
@@ -3704,7 +3715,14 @@
         // enough to point to the next possible object header (the
         // bitmap knows by how much we need to move it as it knows its
         // granularity).
-        move_finger_to(_nextMarkBitMap->nextWord(_finger));
+        assert(_finger < _region_limit, "invariant");
+        HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
+        // Check if bitmap iteration was aborted while scanning the last object
+        if (new_finger >= _region_limit) {
+            giveup_current_region();
+        } else {
+            move_finger_to(new_finger);
+        }
       }
     }
     // At this point we have either completed iterating over the
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -24,8 +24,8 @@
 
 class G1CollectedHeap;
 class CMTask;
-typedef GenericTaskQueue<oop> CMTaskQueue;
-typedef GenericTaskQueueSet<oop> CMTaskQueueSet;
+typedef GenericTaskQueue<oop>            CMTaskQueue;
+typedef GenericTaskQueueSet<CMTaskQueue> CMTaskQueueSet;
 
 // A generic CM bit map.  This is essentially a wrapper around the BitMap
 // class, with one bit per (1<<_shifter) HeapWords.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -2102,18 +2102,21 @@
 size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
   // Return the remaining space in the cur alloc region, but not less than
   // the min TLAB size.
-  // Also, no more than half the region size, since we can't allow tlabs to
-  // grow big enough to accomodate humongous objects.
-
-  // We need to story it locally, since it might change between when we
-  // test for NULL and when we use it later.
+
+  // Also, this value can be at most the humongous object threshold,
+  // since we can't allow tlabs to grow big enough to accomodate
+  // humongous objects.
+
+  // We need to store the cur alloc region locally, since it might change
+  // between when we test for NULL and when we use it later.
   ContiguousSpace* cur_alloc_space = _cur_alloc_region;
+  size_t max_tlab_size = _humongous_object_threshold_in_words * wordSize;
+
   if (cur_alloc_space == NULL) {
-    return HeapRegion::GrainBytes/2;
+    return max_tlab_size;
   } else {
-    return MAX2(MIN2(cur_alloc_space->free(),
-                     (size_t)(HeapRegion::GrainBytes/2)),
-                (size_t)MinTLABSize);
+    return MIN2(MAX2(cur_alloc_space->free(), (size_t)MinTLABSize),
+                max_tlab_size);
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -56,8 +56,8 @@
 #  define IF_G1_DETAILED_STATS(code)
 #endif
 
-typedef GenericTaskQueue<StarTask>    RefToScanQueue;
-typedef GenericTaskQueueSet<StarTask> RefToScanQueueSet;
+typedef GenericTaskQueue<StarTask>          RefToScanQueue;
+typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
 
 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
 typedef int CardIdx_t;     // needs to hold [ 0..CardsPerRegion )
@@ -1055,7 +1055,12 @@
 
   // Returns "true" iff the given word_size is "very large".
   static bool isHumongous(size_t word_size) {
-    return word_size >= _humongous_object_threshold_in_words;
+    // Note this has to be strictly greater-than as the TLABs
+    // are capped at the humongous thresold and we want to
+    // ensure that we don't try to allocate a TLAB as
+    // humongous and that we don't allocate a humongous
+    // object in a TLAB.
+    return word_size > _humongous_object_threshold_in_words;
   }
 
   // Update mod union table with the set of dirty cards.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -101,6 +101,8 @@
 
   GenMarkSweep::_marking_stack =
     new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+  GenMarkSweep::_objarray_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true);
 
   int size = SystemDictionary::number_of_classes() * 2;
   GenMarkSweep::_revisit_klass_stack =
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -35,7 +35,7 @@
 
 void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
   assert(pre_val->is_oop_or_null(true), "Error");
-  if (!JavaThread::satb_mark_queue_set().active()) return;
+  if (!JavaThread::satb_mark_queue_set().is_active()) return;
   Thread* thr = Thread::current();
   if (thr->is_Java_thread()) {
     JavaThread* jt = (JavaThread*)thr;
@@ -51,7 +51,7 @@
 G1SATBCardTableModRefBS::write_ref_field_pre_static(T* field,
                                                     oop new_val,
                                                     JavaThread* jt) {
-  if (!JavaThread::satb_mark_queue_set().active()) return;
+  if (!JavaThread::satb_mark_queue_set().is_active()) return;
   T heap_oop = oopDesc::load_heap_oop(field);
   if (!oopDesc::is_null(heap_oop)) {
     oop pre_val = oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -62,7 +62,7 @@
 
 template <class T> void
 G1SATBCardTableModRefBS::write_ref_array_pre_work(T* dst, int count) {
-  if (!JavaThread::satb_mark_queue_set().active()) return;
+  if (!JavaThread::satb_mark_queue_set().is_active()) return;
   T* elem_ptr = dst;
   for (int i = 0; i < count; i++, elem_ptr++) {
     T heap_oop = oopDesc::load_heap_oop(elem_ptr);
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -25,8 +25,8 @@
 # include "incls/_precompiled.incl"
 # include "incls/_ptrQueue.cpp.incl"
 
-PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) :
-  _qset(qset_), _buf(NULL), _index(0), _active(false),
+PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm, bool active) :
+  _qset(qset_), _buf(NULL), _index(0), _active(active),
   _perm(perm), _lock(NULL)
 {}
 
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -62,7 +62,7 @@
 public:
   // Initialize this queue to contain a null buffer, and be part of the
   // given PtrQueueSet.
-  PtrQueue(PtrQueueSet*, bool perm = false);
+  PtrQueue(PtrQueueSet*, bool perm = false, bool active = false);
   // Release any contained resources.
   void flush();
   // Calls flush() when destroyed.
@@ -101,6 +101,8 @@
     }
   }
 
+  bool is_active() { return _active; }
+
   static int byte_index_to_index(int ind) {
     assert((ind % oopSize) == 0, "Invariant.");
     return ind / oopSize;
@@ -257,7 +259,7 @@
   bool process_completed_buffers() { return _process_completed; }
   void set_process_completed(bool x) { _process_completed = x; }
 
-  bool active() { return _all_active; }
+  bool is_active() { return _all_active; }
 
   // Set the buffer size.  Should be called before any "enqueue" operation
   // can be called.  And should only be called once.
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -82,9 +82,57 @@
   t->satb_mark_queue().handle_zero_index();
 }
 
-void SATBMarkQueueSet::set_active_all_threads(bool b) {
+#ifdef ASSERT
+void SATBMarkQueueSet::dump_active_values(JavaThread* first,
+                                          bool expected_active) {
+  gclog_or_tty->print_cr("SATB queue active values for Java Threads");
+  gclog_or_tty->print_cr(" SATB queue set: active is %s",
+                         (is_active()) ? "TRUE" : "FALSE");
+  gclog_or_tty->print_cr(" expected_active is %s",
+                         (expected_active) ? "TRUE" : "FALSE");
+  for (JavaThread* t = first; t; t = t->next()) {
+    bool active = t->satb_mark_queue().is_active();
+    gclog_or_tty->print_cr("  thread %s, active is %s",
+                           t->name(), (active) ? "TRUE" : "FALSE");
+  }
+}
+#endif // ASSERT
+
+void SATBMarkQueueSet::set_active_all_threads(bool b,
+                                              bool expected_active) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  JavaThread* first = Threads::first();
+
+#ifdef ASSERT
+  if (_all_active != expected_active) {
+    dump_active_values(first, expected_active);
+
+    // I leave this here as a guarantee, instead of an assert, so
+    // that it will still be compiled in if we choose to uncomment
+    // the #ifdef ASSERT in a product build. The whole block is
+    // within an #ifdef ASSERT so the guarantee will not be compiled
+    // in a product build anyway.
+    guarantee(false,
+              "SATB queue set has an unexpected active value");
+  }
+#endif // ASSERT
   _all_active = b;
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+
+  for (JavaThread* t = first; t; t = t->next()) {
+#ifdef ASSERT
+    bool active = t->satb_mark_queue().is_active();
+    if (active != expected_active) {
+      dump_active_values(first, expected_active);
+
+      // I leave this here as a guarantee, instead of an assert, so
+      // that it will still be compiled in if we choose to uncomment
+      // the #ifdef ASSERT in a product build. The whole block is
+      // within an #ifdef ASSERT so the guarantee will not be compiled
+      // in a product build anyway.
+      guarantee(false,
+                "thread has an unexpected active value in its SATB queue");
+    }
+#endif // ASSERT
     t->satb_mark_queue().set_active(b);
   }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -29,8 +29,7 @@
 class ObjPtrQueue: public PtrQueue {
 public:
   ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) :
-    PtrQueue(qset_, perm)
-  {}
+    PtrQueue(qset_, perm, qset_->is_active()) { }
   // Apply the closure to all elements, and reset the index to make the
   // buffer empty.
   void apply_closure(ObjectClosure* cl);
@@ -55,6 +54,9 @@
   // is ignored.
   bool apply_closure_to_completed_buffer_work(bool par, int worker);
 
+#ifdef ASSERT
+  void dump_active_values(JavaThread* first, bool expected_active);
+#endif // ASSERT
 
 public:
   SATBMarkQueueSet();
@@ -65,9 +67,11 @@
 
   static void handle_zero_index_for_thread(JavaThread* t);
 
-  // Apply "set_active(b)" to all thread tloq's.  Should be called only
-  // with the world stopped.
-  void set_active_all_threads(bool b);
+  // Apply "set_active(b)" to all Java threads' SATB queues. It should be
+  // called only with the world stopped. The method will assert that the
+  // SATB queues of all threads it visits, as well as the SATB queue
+  // set itself, has an active value same as expected_active.
+  void set_active_all_threads(bool b, bool expected_active);
 
   // Register "blk" as "the closure" for all queues.  Only one such closure
   // is allowed.  The "apply_closure_to_completed_buffer" method will apply
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Wed Jul 05 17:09:16 2017 +0200
@@ -175,6 +175,7 @@
 psAdaptiveSizePolicy.hpp		adaptiveSizePolicy.hpp
 
 psCompactionManager.cpp                 gcTaskManager.hpp
+psCompactionManager.cpp                 objArrayKlass.inline.hpp
 psCompactionManager.cpp                 objectStartArray.hpp
 psCompactionManager.cpp                 oop.hpp
 psCompactionManager.cpp                 oop.inline.hpp
@@ -189,6 +190,9 @@
 psCompactionManager.hpp                 allocation.hpp
 psCompactionManager.hpp                 taskqueue.hpp
 
+psCompactionManager.inline.hpp		psCompactionManager.hpp
+psCompactionManager.inline.hpp		psParallelCompact.hpp
+
 psGCAdaptivePolicyCounters.hpp		gcAdaptivePolicyCounters.hpp
 psGCAdaptivePolicyCounters.hpp          gcPolicyCounters.hpp
 psGCAdaptivePolicyCounters.hpp          psAdaptiveSizePolicy.hpp
@@ -379,12 +383,12 @@
 pcTasks.cpp                             jniHandles.hpp
 pcTasks.cpp                             jvmtiExport.hpp
 pcTasks.cpp                             management.hpp
+pcTasks.cpp                             objArrayKlass.inline.hpp
 pcTasks.cpp                             psParallelCompact.hpp
 pcTasks.cpp                             pcTasks.hpp
 pcTasks.cpp                             oop.inline.hpp
 pcTasks.cpp                             oop.pcgc.inline.hpp
 pcTasks.cpp                             systemDictionary.hpp
-pcTasks.cpp                             taskqueue.hpp
 pcTasks.cpp                             thread.hpp
 pcTasks.cpp                             universe.hpp
 pcTasks.cpp                             vmThread.hpp
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -48,7 +48,7 @@
     _vm_thread->oops_do(&mark_and_push_closure, &mark_and_push_in_blobs);
 
   // Do the real work
-  cm->drain_marking_stacks(&mark_and_push_closure);
+  cm->follow_marking_stacks();
 }
 
 
@@ -118,7 +118,7 @@
   }
 
   // Do the real work
-  cm->drain_marking_stacks(&mark_and_push_closure);
+  cm->follow_marking_stacks();
   // cm->deallocate_stacks();
 }
 
@@ -196,17 +196,19 @@
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
 
   oop obj = NULL;
+  ObjArrayTask task;
   int random_seed = 17;
-  while(true) {
-    if (ParCompactionManager::steal(which, &random_seed, obj)) {
+  do {
+    while (ParCompactionManager::steal_objarray(which, &random_seed, task)) {
+      objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint();
+      k->oop_follow_contents(cm, task.obj(), task.index());
+      cm->follow_marking_stacks();
+    }
+    while (ParCompactionManager::steal(which, &random_seed, obj)) {
       obj->follow_contents(cm);
-      cm->drain_marking_stacks(&mark_and_push_closure);
-    } else {
-      if (terminator()->offer_termination()) {
-        break;
-      }
+      cm->follow_marking_stacks();
     }
-  }
+  } while (!terminator()->offer_termination());
 }
 
 //
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -28,6 +28,8 @@
 PSOldGen*            ParCompactionManager::_old_gen = NULL;
 ParCompactionManager**  ParCompactionManager::_manager_array = NULL;
 OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
+ParCompactionManager::ObjArrayTaskQueueSet*
+  ParCompactionManager::_objarray_queues = NULL;
 ObjectStartArray*    ParCompactionManager::_start_array = NULL;
 ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
 RegionTaskQueueSet*   ParCompactionManager::_region_array = NULL;
@@ -46,6 +48,11 @@
 
   // We want the overflow stack to be permanent
   _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true);
+
+  _objarray_queue.initialize();
+  _objarray_overflow_stack =
+    new (ResourceObj::C_HEAP) ObjArrayOverflowStack(10, true);
+
 #ifdef USE_RegionTaskQueueWithOverflow
   region_stack()->initialize();
 #else
@@ -69,6 +76,7 @@
 
 ParCompactionManager::~ParCompactionManager() {
   delete _overflow_stack;
+  delete _objarray_overflow_stack;
   delete _revisit_klass_stack;
   delete _revisit_mdo_stack;
   // _manager_array and _stack_array are statics
@@ -86,18 +94,21 @@
 
   assert(_manager_array == NULL, "Attempt to initialize twice");
   _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 );
-  guarantee(_manager_array != NULL, "Could not initialize promotion manager");
+  guarantee(_manager_array != NULL, "Could not allocate manager_array");
 
   _stack_array = new OopTaskQueueSet(parallel_gc_threads);
-  guarantee(_stack_array != NULL, "Count not initialize promotion manager");
+  guarantee(_stack_array != NULL, "Could not allocate stack_array");
+  _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads);
+  guarantee(_objarray_queues != NULL, "Could not allocate objarray_queues");
   _region_array = new RegionTaskQueueSet(parallel_gc_threads);
-  guarantee(_region_array != NULL, "Count not initialize promotion manager");
+  guarantee(_region_array != NULL, "Could not allocate region_array");
 
   // Create and register the ParCompactionManager(s) for the worker threads.
   for(uint i=0; i<parallel_gc_threads; i++) {
     _manager_array[i] = new ParCompactionManager();
     guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
     stack_array()->register_queue(i, _manager_array[i]->marking_stack());
+    _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_queue);
 #ifdef USE_RegionTaskQueueWithOverflow
     region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue());
 #else
@@ -203,36 +214,30 @@
   }
 }
 
-void ParCompactionManager::drain_marking_stacks(OopClosure* blk) {
-#ifdef ASSERT
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-  MutableSpace* to_space = heap->young_gen()->to_space();
-  MutableSpace* old_space = heap->old_gen()->object_space();
-  MutableSpace* perm_space = heap->perm_gen()->object_space();
-#endif /* ASSERT */
-
-
+void ParCompactionManager::follow_marking_stacks() {
   do {
-
-    // Drain overflow stack first, so other threads can steal from
-    // claimed stack while we work.
-    while(!overflow_stack()->is_empty()) {
-      oop obj = overflow_stack()->pop();
+    // Drain the overflow stack first, to allow stealing from the marking stack.
+    oop obj;
+    while (!overflow_stack()->is_empty()) {
+      overflow_stack()->pop()->follow_contents(this);
+    }
+    while (marking_stack()->pop_local(obj)) {
       obj->follow_contents(this);
     }
 
-    oop obj;
-    // obj is a reference!!!
-    while (marking_stack()->pop_local(obj)) {
-      // It would be nice to assert about the type of objects we might
-      // pop, but they can come from anywhere, unfortunately.
-      obj->follow_contents(this);
+    // Process ObjArrays one at a time to avoid marking stack bloat.
+    ObjArrayTask task;
+    if (!_objarray_overflow_stack->is_empty()) {
+      task = _objarray_overflow_stack->pop();
+      objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint();
+      k->oop_follow_contents(this, task.obj(), task.index());
+    } else if (_objarray_queue.pop_local(task)) {
+      objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint();
+      k->oop_follow_contents(this, task.obj(), task.index());
     }
-  } while((marking_stack()->size() != 0) || (overflow_stack()->length() != 0));
+  } while (!marking_stacks_empty());
 
-  assert(marking_stack()->size() == 0, "Sanity");
-  assert(overflow_stack()->length() == 0, "Sanity");
+  assert(marking_stacks_empty(), "Sanity");
 }
 
 void ParCompactionManager::drain_region_overflow_stack() {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -22,18 +22,6 @@
  *
  */
 
-//
-// psPromotionManager is used by a single thread to manage object survival
-// during a scavenge. The promotion manager contains thread local data only.
-//
-// NOTE! Be carefull when allocating the stacks on cheap. If you are going
-// to use a promotion manager in more than one thread, the stacks MUST be
-// on cheap. This can lead to memory leaks, though, as they are not auto
-// deallocated.
-//
-// FIX ME FIX ME Add a destructor, and don't rely on the user to drain/flush/deallocate!
-//
-
 // Move to some global location
 #define HAS_BEEN_MOVED 0x1501d01d
 // End move to some global location
@@ -46,8 +34,6 @@
 class ParallelCompactData;
 class ParMarkBitMap;
 
-// Move to it's own file if this works out.
-
 class ParCompactionManager : public CHeapObj {
   friend class ParallelTaskTerminator;
   friend class ParMarkBitMap;
@@ -72,14 +58,27 @@
 // ------------------------  End don't putback if not needed
 
  private:
+  // 32-bit:  4K * 8 = 32KiB; 64-bit:  8K * 16 = 128KiB
+  #define OBJARRAY_QUEUE_SIZE (1 << NOT_LP64(12) LP64_ONLY(13))
+  typedef GenericTaskQueue<ObjArrayTask, OBJARRAY_QUEUE_SIZE> ObjArrayTaskQueue;
+  typedef GenericTaskQueueSet<ObjArrayTaskQueue> ObjArrayTaskQueueSet;
+  #undef OBJARRAY_QUEUE_SIZE
+
   static ParCompactionManager** _manager_array;
   static OopTaskQueueSet*       _stack_array;
+  static ObjArrayTaskQueueSet*  _objarray_queues;
   static ObjectStartArray*      _start_array;
   static RegionTaskQueueSet*    _region_array;
   static PSOldGen*              _old_gen;
 
+private:
   OopTaskQueue                  _marking_stack;
   GrowableArray<oop>*           _overflow_stack;
+
+  typedef GrowableArray<ObjArrayTask> ObjArrayOverflowStack;
+  ObjArrayTaskQueue             _objarray_queue;
+  ObjArrayOverflowStack*        _objarray_overflow_stack;
+
   // Is there a way to reuse the _marking_stack for the
   // saving empty regions?  For now just create a different
   // type of TaskQueue.
@@ -128,8 +127,8 @@
   // Pushes onto the region stack.  If the region stack is full,
   // pushes onto the region overflow stack.
   void region_stack_push(size_t region_index);
- public:
 
+public:
   Action action() { return _action; }
   void set_action(Action v) { _action = v; }
 
@@ -163,6 +162,8 @@
   // Get a oop for scanning.  If returns null, no oop were found.
   oop retrieve_for_scanning();
 
+  inline void push_objarray(oop obj, size_t index);
+
   // Save region for later processing.  Must not fail.
   void save_for_processing(size_t region_index);
   // Get a region for processing.  If returns null, no region were found.
@@ -175,12 +176,17 @@
     return stack_array()->steal(queue_num, seed, t);
   }
 
+  static bool steal_objarray(int queue_num, int* seed, ObjArrayTask& t) {
+    return _objarray_queues->steal(queue_num, seed, t);
+  }
+
   static bool steal(int queue_num, int* seed, RegionTask& t) {
     return region_array()->steal(queue_num, seed, t);
   }
 
-  // Process tasks remaining on any stack
-  void drain_marking_stacks(OopClosure *blk);
+  // Process tasks remaining on any marking stack
+  void follow_marking_stacks();
+  inline bool marking_stacks_empty() const;
 
   // Process tasks remaining on any stack
   void drain_region_stacks();
@@ -200,3 +206,8 @@
     "out of range manager_array access");
   return _manager_array[index];
 }
+
+bool ParCompactionManager::marking_stacks_empty() const {
+  return _marking_stack.size() == 0 && _overflow_stack->is_empty() &&
+    _objarray_queue.size() == 0 && _objarray_overflow_stack->is_empty();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+void ParCompactionManager::push_objarray(oop obj, size_t index)
+{
+  ObjArrayTask task(obj, index);
+  assert(task.is_valid(), "bad ObjArrayTask");
+  if (!_objarray_queue.push(task)) {
+    _objarray_overflow_stack->push(task);
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -479,6 +479,7 @@
   _preserved_oop_stack = NULL;
 
   _marking_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+  _objarray_stack = new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true);
 
   int size = SystemDictionary::number_of_classes() * 2;
   _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true);
@@ -497,6 +498,7 @@
   }
 
   delete _marking_stack;
+  delete _objarray_stack;
   delete _revisit_klass_stack;
   delete _revisit_mdo_stack;
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -785,7 +785,7 @@
 void PSParallelCompact::AdjustPointerClosure::do_oop(oop* p)       { adjust_pointer(p, _is_root); }
 void PSParallelCompact::AdjustPointerClosure::do_oop(narrowOop* p) { adjust_pointer(p, _is_root); }
 
-void PSParallelCompact::FollowStackClosure::do_void() { follow_stack(_compaction_manager); }
+void PSParallelCompact::FollowStackClosure::do_void() { _compaction_manager->follow_marking_stacks(); }
 
 void PSParallelCompact::MarkAndPushClosure::do_oop(oop* p)       { mark_and_push(_compaction_manager, p); }
 void PSParallelCompact::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(_compaction_manager, p); }
@@ -2376,7 +2376,7 @@
   // Follow code cache roots.
   CodeCache::do_unloading(is_alive_closure(), &mark_and_push_closure,
                           purged_class);
-  follow_stack(cm); // Flush marking stack.
+  cm->follow_marking_stacks(); // Flush marking stack.
 
   // Update subklass/sibling/implementor links of live klasses
   // revisit_klass_stack is used in follow_weak_klass_links().
@@ -2389,8 +2389,7 @@
   SymbolTable::unlink(is_alive_closure());
   StringTable::unlink(is_alive_closure());
 
-  assert(cm->marking_stack()->size() == 0, "stack should be empty by now");
-  assert(cm->overflow_stack()->is_empty(), "stack should be empty by now");
+  assert(cm->marking_stacks_empty(), "marking stacks should be empty");
 }
 
 // This should be moved to the shared markSweep code!
@@ -2709,22 +2708,6 @@
   young_gen->move_and_update(cm);
 }
 
-
-void PSParallelCompact::follow_stack(ParCompactionManager* cm) {
-  while(!cm->overflow_stack()->is_empty()) {
-    oop obj = cm->overflow_stack()->pop();
-    obj->follow_contents(cm);
-  }
-
-  oop obj;
-  // obj is a reference!!!
-  while (cm->marking_stack()->pop_local(obj)) {
-    // It would be nice to assert about the type of objects we might
-    // pop, but they can come from anywhere, unfortunately.
-    obj->follow_contents(cm);
-  }
-}
-
 void
 PSParallelCompact::follow_weak_klass_links() {
   // All klasses on the revisit stack are marked at this point.
@@ -2745,7 +2728,7 @@
         &keep_alive_closure);
     }
     // revisit_klass_stack is cleared in reset()
-    follow_stack(cm);
+    cm->follow_marking_stacks();
   }
 }
 
@@ -2776,7 +2759,7 @@
       rms->at(j)->follow_weak_refs(is_alive_closure());
     }
     // revisit_mdo_stack is cleared in reset()
-    follow_stack(cm);
+    cm->follow_marking_stacks();
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -901,7 +901,6 @@
   // Mark live objects
   static void marking_phase(ParCompactionManager* cm,
                             bool maximum_heap_compaction);
-  static void follow_stack(ParCompactionManager* cm);
   static void follow_weak_klass_links();
   static void follow_mdo_weak_refs();
 
@@ -1276,7 +1275,7 @@
       }
     }
   }
-  follow_stack(cm);
+  cm->follow_marking_stacks();
 }
 
 template <class T>
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -25,8 +25,9 @@
 #include "incls/_precompiled.incl"
 #include "incls/_markSweep.cpp.incl"
 
-GrowableArray<oop>*     MarkSweep::_marking_stack       = NULL;
-GrowableArray<Klass*>*  MarkSweep::_revisit_klass_stack = NULL;
+GrowableArray<oop>*          MarkSweep::_marking_stack = NULL;
+GrowableArray<ObjArrayTask>* MarkSweep::_objarray_stack = NULL;
+GrowableArray<Klass*>*       MarkSweep::_revisit_klass_stack = NULL;
 GrowableArray<DataLayout*>*  MarkSweep::_revisit_mdo_stack = NULL;
 
 GrowableArray<oop>*     MarkSweep::_preserved_oop_stack = NULL;
@@ -104,11 +105,19 @@
 void MarkSweep::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(p); }
 
 void MarkSweep::follow_stack() {
-  while (!_marking_stack->is_empty()) {
-    oop obj = _marking_stack->pop();
-    assert (obj->is_gc_marked(), "p must be marked");
-    obj->follow_contents();
-  }
+  do {
+    while (!_marking_stack->is_empty()) {
+      oop obj = _marking_stack->pop();
+      assert (obj->is_gc_marked(), "p must be marked");
+      obj->follow_contents();
+    }
+    // Process ObjArrays one at a time to avoid marking stack bloat.
+    if (!_objarray_stack->is_empty()) {
+      ObjArrayTask task = _objarray_stack->pop();
+      objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint();
+      k->oop_follow_contents(task.obj(), task.index());
+    }
+  } while (!_marking_stack->is_empty() || !_objarray_stack->is_empty());
 }
 
 MarkSweep::FollowStackClosure MarkSweep::follow_stack_closure;
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -110,8 +110,9 @@
   // Vars
   //
  protected:
-  // Traversal stack used during phase1
+  // Traversal stacks used during phase1
   static GrowableArray<oop>*             _marking_stack;
+  static GrowableArray<ObjArrayTask>*    _objarray_stack;
   // Stack for live klasses to revisit at end of marking phase
   static GrowableArray<Klass*>*          _revisit_klass_stack;
   // Set (stack) of MDO's to revisit at end of marking phase
@@ -188,6 +189,7 @@
   template <class T> static inline void mark_and_follow(T* p);
   // Check mark and maybe push on marking stack
   template <class T> static inline void mark_and_push(T* p);
+  static inline void push_objarray(oop obj, size_t index);
 
   static void follow_stack();   // Empty marking stack.
 
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -77,6 +77,12 @@
   }
 }
 
+void MarkSweep::push_objarray(oop obj, size_t index) {
+  ObjArrayTask task(obj, index);
+  assert(task.is_valid(), "bad ObjArrayTask");
+  _objarray_stack->push(task);
+}
+
 template <class T> inline void MarkSweep::adjust_pointer(T* p, bool isroot) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
--- a/hotspot/src/share/vm/includeDB_core	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_core	Wed Jul 05 17:09:16 2017 +0200
@@ -2026,6 +2026,7 @@
 
 instanceKlass.cpp                       collectedHeap.inline.hpp
 instanceKlass.cpp                       compileBroker.hpp
+instanceKlass.cpp                       dtrace.hpp
 instanceKlass.cpp                       fieldDescriptor.hpp
 instanceKlass.cpp                       genOopClosures.inline.hpp
 instanceKlass.cpp                       handles.inline.hpp
@@ -2485,6 +2486,7 @@
 jvm.cpp                                 collectedHeap.inline.hpp
 jvm.cpp                                 copy.hpp
 jvm.cpp                                 defaultStream.hpp
+jvm.cpp                                 dtrace.hpp
 jvm.cpp                                 dtraceJSDT.hpp
 jvm.cpp                                 events.hpp
 jvm.cpp                                 handles.inline.hpp
@@ -2726,8 +2728,10 @@
 
 markSweep.cpp                           compileBroker.hpp
 markSweep.cpp                           methodDataOop.hpp
+markSweep.cpp				objArrayKlass.inline.hpp
 
 markSweep.hpp                           collectedHeap.hpp
+markSweep.hpp				taskqueue.hpp
 
 memRegion.cpp                           globals.hpp
 memRegion.cpp                           memRegion.hpp
@@ -3057,8 +3061,10 @@
 objArrayKlass.cpp                       genOopClosures.inline.hpp
 objArrayKlass.cpp                       handles.inline.hpp
 objArrayKlass.cpp                       instanceKlass.hpp
+objArrayKlass.cpp                       markSweep.inline.hpp
 objArrayKlass.cpp                       mutexLocker.hpp
 objArrayKlass.cpp                       objArrayKlass.hpp
+objArrayKlass.cpp                       objArrayKlass.inline.hpp
 objArrayKlass.cpp                       objArrayKlassKlass.hpp
 objArrayKlass.cpp                       objArrayOop.hpp
 objArrayKlass.cpp                       oop.inline.hpp
@@ -3069,11 +3075,12 @@
 objArrayKlass.cpp                       universe.inline.hpp
 objArrayKlass.cpp                       vmSymbols.hpp
 
-
 objArrayKlass.hpp                       arrayKlass.hpp
 objArrayKlass.hpp                       instanceKlass.hpp
 objArrayKlass.hpp                       specialized_oop_closures.hpp
 
+objArrayKlass.inline.hpp		objArrayKlass.hpp
+
 objArrayKlassKlass.cpp                  collectedHeap.inline.hpp
 objArrayKlassKlass.cpp                  instanceKlass.hpp
 objArrayKlassKlass.cpp                  javaClasses.hpp
@@ -4099,6 +4106,7 @@
 task.hpp                                top.hpp
 
 taskqueue.cpp                           debug.hpp
+taskqueue.cpp				oop.inline.hpp
 taskqueue.cpp                           os.hpp
 taskqueue.cpp                           taskqueue.hpp
 taskqueue.cpp                           thread_<os_family>.inline.hpp
@@ -4452,6 +4460,7 @@
 
 unsafe.cpp                              allocation.inline.hpp
 unsafe.cpp                              copy.hpp
+unsafe.cpp                              dtrace.hpp
 unsafe.cpp                              globals.hpp
 unsafe.cpp                              interfaceSupport.hpp
 unsafe.cpp                              jni.h
@@ -4623,6 +4632,7 @@
 
 vmThread.cpp                            collectedHeap.hpp
 vmThread.cpp                            compileBroker.hpp
+vmThread.cpp                            dtrace.hpp
 vmThread.cpp                            events.hpp
 vmThread.cpp                            interfaceSupport.hpp
 vmThread.cpp                            methodOop.hpp
--- a/hotspot/src/share/vm/includeDB_gc_parallel	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/includeDB_gc_parallel	Wed Jul 05 17:09:16 2017 +0200
@@ -115,10 +115,14 @@
 objArrayKlass.cpp                       g1CollectedHeap.inline.hpp
 objArrayKlass.cpp                       g1OopClosures.inline.hpp
 objArrayKlass.cpp                       oop.pcgc.inline.hpp
+objArrayKlass.cpp                       psCompactionManager.hpp
 objArrayKlass.cpp                       psPromotionManager.inline.hpp
 objArrayKlass.cpp                       psScavenge.inline.hpp
 objArrayKlass.cpp                       parOopClosures.inline.hpp
 
+objArrayKlass.inline.hpp		psCompactionManager.inline.hpp
+objArrayKlass.inline.hpp		psParallelCompact.hpp
+
 oop.pcgc.inline.hpp                     parNewGeneration.hpp
 oop.pcgc.inline.hpp                     parallelScavengeHeap.hpp
 oop.pcgc.inline.hpp                     psCompactionManager.hpp
--- a/hotspot/src/share/vm/memory/genMarkSweep.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/memory/genMarkSweep.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -159,6 +159,7 @@
   _preserved_oop_stack = NULL;
 
   _marking_stack       = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+  _objarray_stack      = new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true);
 
   int size = SystemDictionary::number_of_classes() * 2;
   _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true);
@@ -194,7 +195,6 @@
 
 
 void GenMarkSweep::deallocate_stacks() {
-
   if (!UseG1GC) {
     GenCollectedHeap* gch = GenCollectedHeap::heap();
     gch->release_scratch();
@@ -208,6 +208,7 @@
   }
 
   delete _marking_stack;
+  delete _objarray_stack;
   delete _revisit_klass_stack;
   delete _revisit_mdo_stack;
 
--- a/hotspot/src/share/vm/memory/genOopClosures.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/memory/genOopClosures.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -28,10 +28,10 @@
 class CardTableModRefBS;
 class DefNewGeneration;
 
-template<class E> class GenericTaskQueue;
-typedef GenericTaskQueue<oop> OopTaskQueue;
-template<class E> class GenericTaskQueueSet;
-typedef GenericTaskQueueSet<oop> OopTaskQueueSet;
+template<class E, unsigned int N> class GenericTaskQueue;
+typedef GenericTaskQueue<oop, TASKQUEUE_SIZE> OopTaskQueue;
+template<class T> class GenericTaskQueueSet;
+typedef GenericTaskQueueSet<OopTaskQueue> OopTaskQueueSet;
 
 // Closure for iterating roots from a particular generation
 // Note: all classes deriving from this MUST call this do_barrier
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -25,6 +25,58 @@
 # include "incls/_precompiled.incl"
 # include "incls/_instanceKlass.cpp.incl"
 
+#ifdef DTRACE_ENABLED
+
+HS_DTRACE_PROBE_DECL4(hotspot, class__initialization__required,
+  char*, intptr_t, oop, intptr_t);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__recursive,
+  char*, intptr_t, oop, intptr_t, int);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__concurrent,
+  char*, intptr_t, oop, intptr_t, int);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__erroneous,
+  char*, intptr_t, oop, intptr_t, int);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__super__failed,
+  char*, intptr_t, oop, intptr_t, int);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__clinit,
+  char*, intptr_t, oop, intptr_t, int);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__error,
+  char*, intptr_t, oop, intptr_t, int);
+HS_DTRACE_PROBE_DECL5(hotspot, class__initialization__end,
+  char*, intptr_t, oop, intptr_t, int);
+
+#define DTRACE_CLASSINIT_PROBE(type, clss, thread_type)          \
+  {                                                              \
+    char* data = NULL;                                           \
+    int len = 0;                                                 \
+    symbolOop name = (clss)->name();                             \
+    if (name != NULL) {                                          \
+      data = (char*)name->bytes();                               \
+      len = name->utf8_length();                                 \
+    }                                                            \
+    HS_DTRACE_PROBE4(hotspot, class__initialization__##type,     \
+      data, len, (clss)->class_loader(), thread_type);           \
+  }
+
+#define DTRACE_CLASSINIT_PROBE_WAIT(type, clss, thread_type, wait) \
+  {                                                              \
+    char* data = NULL;                                           \
+    int len = 0;                                                 \
+    symbolOop name = (clss)->name();                             \
+    if (name != NULL) {                                          \
+      data = (char*)name->bytes();                               \
+      len = name->utf8_length();                                 \
+    }                                                            \
+    HS_DTRACE_PROBE5(hotspot, class__initialization__##type,     \
+      data, len, (clss)->class_loader(), thread_type, wait);     \
+  }
+
+#else //  ndef DTRACE_ENABLED
+
+#define DTRACE_CLASSINIT_PROBE(type, clss, thread_type)
+#define DTRACE_CLASSINIT_PROBE_WAIT(type, clss, thread_type, wait)
+
+#endif //  ndef DTRACE_ENABLED
+
 bool instanceKlass::should_be_initialized() const {
   return !is_initialized();
 }
@@ -292,6 +344,10 @@
   // A class could already be verified, since it has been reflected upon.
   this_oop->link_class(CHECK);
 
+  DTRACE_CLASSINIT_PROBE(required, instanceKlass::cast(this_oop()), -1);
+
+  bool wait = false;
+
   // refer to the JVM book page 47 for description of steps
   // Step 1
   { ObjectLocker ol(this_oop, THREAD);
@@ -303,19 +359,25 @@
     // we might end up throwing IE from link/symbol resolution sites
     // that aren't expected to throw.  This would wreak havoc.  See 6320309.
     while(this_oop->is_being_initialized() && !this_oop->is_reentrant_initialization(self)) {
+        wait = true;
       ol.waitUninterruptibly(CHECK);
     }
 
     // Step 3
-    if (this_oop->is_being_initialized() && this_oop->is_reentrant_initialization(self))
+    if (this_oop->is_being_initialized() && this_oop->is_reentrant_initialization(self)) {
+      DTRACE_CLASSINIT_PROBE_WAIT(recursive, instanceKlass::cast(this_oop()), -1,wait);
       return;
+    }
 
     // Step 4
-    if (this_oop->is_initialized())
+    if (this_oop->is_initialized()) {
+      DTRACE_CLASSINIT_PROBE_WAIT(concurrent, instanceKlass::cast(this_oop()), -1,wait);
       return;
+    }
 
     // Step 5
     if (this_oop->is_in_error_state()) {
+      DTRACE_CLASSINIT_PROBE_WAIT(erroneous, instanceKlass::cast(this_oop()), -1,wait);
       ResourceMark rm(THREAD);
       const char* desc = "Could not initialize class ";
       const char* className = this_oop->external_name();
@@ -348,6 +410,7 @@
         this_oop->set_initialization_state_and_notify(initialization_error, THREAD); // Locks object, set state, and notify all waiting threads
         CLEAR_PENDING_EXCEPTION;   // ignore any exception thrown, superclass initialization error is thrown below
       }
+      DTRACE_CLASSINIT_PROBE_WAIT(super__failed, instanceKlass::cast(this_oop()), -1,wait);
       THROW_OOP(e());
     }
   }
@@ -356,6 +419,7 @@
   {
     assert(THREAD->is_Java_thread(), "non-JavaThread in initialize_impl");
     JavaThread* jt = (JavaThread*)THREAD;
+    DTRACE_CLASSINIT_PROBE_WAIT(clinit, instanceKlass::cast(this_oop()), -1,wait);
     // Timer includes any side effects of class initialization (resolution,
     // etc), but not recursive entry into call_class_initializer().
     PerfClassTraceTime timer(ClassLoader::perf_class_init_time(),
@@ -383,6 +447,7 @@
       this_oop->set_initialization_state_and_notify(initialization_error, THREAD);
       CLEAR_PENDING_EXCEPTION;   // ignore any exception thrown, class initialization error is thrown below
     }
+    DTRACE_CLASSINIT_PROBE_WAIT(error, instanceKlass::cast(this_oop()), -1,wait);
     if (e->is_a(SystemDictionary::Error_klass())) {
       THROW_OOP(e());
     } else {
@@ -392,6 +457,7 @@
                 &args);
     }
   }
+  DTRACE_CLASSINIT_PROBE_WAIT(end, instanceKlass::cast(this_oop()), -1,wait);
 }
 
 
--- a/hotspot/src/share/vm/oops/objArrayKlass.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -314,24 +314,24 @@
 
 void objArrayKlass::oop_follow_contents(oop obj) {
   assert (obj->is_array(), "obj must be array");
-  objArrayOop a = objArrayOop(obj);
-  a->follow_header();
-  ObjArrayKlass_OOP_ITERATE( \
-    a, p, \
-    /* we call mark_and_follow here to avoid excessive marking stack usage */ \
-    MarkSweep::mark_and_follow(p))
+  objArrayOop(obj)->follow_header();
+  if (UseCompressedOops) {
+    objarray_follow_contents<narrowOop>(obj, 0);
+  } else {
+    objarray_follow_contents<oop>(obj, 0);
+  }
 }
 
 #ifndef SERIALGC
 void objArrayKlass::oop_follow_contents(ParCompactionManager* cm,
                                         oop obj) {
-  assert (obj->is_array(), "obj must be array");
-  objArrayOop a = objArrayOop(obj);
-  a->follow_header(cm);
-  ObjArrayKlass_OOP_ITERATE( \
-    a, p, \
-    /* we call mark_and_follow here to avoid excessive marking stack usage */ \
-    PSParallelCompact::mark_and_follow(cm, p))
+  assert(obj->is_array(), "obj must be array");
+  objArrayOop(obj)->follow_header(cm);
+  if (UseCompressedOops) {
+    objarray_follow_contents<narrowOop>(cm, obj, 0);
+  } else {
+    objarray_follow_contents<oop>(cm, obj, 0);
+  }
 }
 #endif // SERIALGC
 
--- a/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -91,10 +91,18 @@
 
   // Garbage collection
   void oop_follow_contents(oop obj);
+  inline void oop_follow_contents(oop obj, int index);
+  template <class T> inline void objarray_follow_contents(oop obj, int index);
+
   int  oop_adjust_pointers(oop obj);
 
   // Parallel Scavenge and Parallel Old
   PARALLEL_GC_DECLS
+#ifndef SERIALGC
+  inline void oop_follow_contents(ParCompactionManager* cm, oop obj, int index);
+  template <class T> inline void
+    objarray_follow_contents(ParCompactionManager* cm, oop obj, int index);
+#endif // !SERIALGC
 
   // Iterators
   int oop_oop_iterate(oop obj, OopClosure* blk) {
@@ -131,5 +139,4 @@
   void oop_verify_on(oop obj, outputStream* st);
   void oop_verify_old_oop(oop obj, oop* p, bool allow_dirty);
   void oop_verify_old_oop(oop obj, narrowOop* p, bool allow_dirty);
-
 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+void objArrayKlass::oop_follow_contents(oop obj, int index) {
+  if (UseCompressedOops) {
+    objarray_follow_contents<narrowOop>(obj, index);
+  } else {
+    objarray_follow_contents<oop>(obj, index);
+  }
+}
+
+template <class T>
+void objArrayKlass::objarray_follow_contents(oop obj, int index) {
+  objArrayOop a = objArrayOop(obj);
+  const size_t len = size_t(a->length());
+  const size_t beg_index = size_t(index);
+  assert(beg_index < len || len == 0, "index too large");
+
+  const size_t stride = MIN2(len - beg_index, ObjArrayMarkingStride);
+  const size_t end_index = beg_index + stride;
+  T* const base = (T*)a->base();
+  T* const beg = base + beg_index;
+  T* const end = base + end_index;
+
+  // Push the non-NULL elements of the next stride on the marking stack.
+  for (T* e = beg; e < end; e++) {
+    MarkSweep::mark_and_push<T>(e);
+  }
+
+  if (end_index < len) {
+    MarkSweep::push_objarray(a, end_index); // Push the continuation.
+  }
+}
+
+#ifndef SERIALGC
+void objArrayKlass::oop_follow_contents(ParCompactionManager* cm, oop obj,
+                                        int index) {
+  if (UseCompressedOops) {
+    objarray_follow_contents<narrowOop>(cm, obj, index);
+  } else {
+    objarray_follow_contents<oop>(cm, obj, index);
+  }
+}
+
+template <class T>
+void objArrayKlass::objarray_follow_contents(ParCompactionManager* cm, oop obj,
+                                             int index) {
+  objArrayOop a = objArrayOop(obj);
+  const size_t len = size_t(a->length());
+  const size_t beg_index = size_t(index);
+  assert(beg_index < len || len == 0, "index too large");
+
+  const size_t stride = MIN2(len - beg_index, ObjArrayMarkingStride);
+  const size_t end_index = beg_index + stride;
+  T* const base = (T*)a->base();
+  T* const beg = base + beg_index;
+  T* const end = base + end_index;
+
+  // Push the non-NULL elements of the next stride on the marking stack.
+  for (T* e = beg; e < end; e++) {
+    PSParallelCompact::mark_and_push<T>(cm, e);
+  }
+
+  if (end_index < len) {
+    cm->push_objarray(a, end_index); // Push the continuation.
+  }
+}
+#endif // #ifndef SERIALGC
--- a/hotspot/src/share/vm/prims/jvm.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/prims/jvm.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -26,6 +26,10 @@
 #include "incls/_jvm.cpp.incl"
 #include <errno.h>
 
+HS_DTRACE_PROBE_DECL1(hotspot, thread__sleep__begin, long long);
+HS_DTRACE_PROBE_DECL1(hotspot, thread__sleep__end, int);
+HS_DTRACE_PROBE_DECL0(hotspot, thread__yield);
+
 /*
   NOTE about use of any ctor or function call that can trigger a safepoint/GC:
   such ctors and calls MUST NOT come between an oop declaration/init and its
@@ -2762,6 +2766,7 @@
 JVM_ENTRY(void, JVM_Yield(JNIEnv *env, jclass threadClass))
   JVMWrapper("JVM_Yield");
   if (os::dont_yield()) return;
+  HS_DTRACE_PROBE0(hotspot, thread__yield);
   // When ConvertYieldToSleep is off (default), this matches the classic VM use of yield.
   // Critical for similar threading behaviour
   if (ConvertYieldToSleep) {
@@ -2787,6 +2792,8 @@
   // And set new thread state to SLEEPING.
   JavaThreadSleepState jtss(thread);
 
+  HS_DTRACE_PROBE1(hotspot, thread__sleep__begin, millis);
+
   if (millis == 0) {
     // When ConvertSleepToYield is on, this matches the classic VM implementation of
     // JVM_Sleep. Critical for similar threading behaviour (Win32)
@@ -2807,6 +2814,7 @@
       // An asynchronous exception (e.g., ThreadDeathException) could have been thrown on
       // us while we were sleeping. We do not overwrite those.
       if (!HAS_PENDING_EXCEPTION) {
+        HS_DTRACE_PROBE1(hotspot, thread__sleep__end,1);
         // TODO-FIXME: THROW_MSG returns which means we will not call set_state()
         // to properly restore the thread state.  That's likely wrong.
         THROW_MSG(vmSymbols::java_lang_InterruptedException(), "sleep interrupted");
@@ -2814,6 +2822,7 @@
     }
     thread->osthread()->set_state(old_state);
   }
+  HS_DTRACE_PROBE1(hotspot, thread__sleep__end,0);
 JVM_END
 
 JVM_ENTRY(jobject, JVM_CurrentThread(JNIEnv* env, jclass threadClass))
--- a/hotspot/src/share/vm/prims/unsafe.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/prims/unsafe.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -29,6 +29,10 @@
 #include "incls/_precompiled.incl"
 #include "incls/_unsafe.cpp.incl"
 
+HS_DTRACE_PROBE_DECL3(hotspot, thread__park__begin, uintptr_t, int, long long);
+HS_DTRACE_PROBE_DECL1(hotspot, thread__park__end, uintptr_t);
+HS_DTRACE_PROBE_DECL1(hotspot, thread__unpark, uintptr_t);
+
 #define MAX_OBJECT_SIZE \
   ( arrayOopDesc::header_size(T_DOUBLE) * HeapWordSize \
     + ((julong)max_jint * sizeof(double)) )
@@ -1083,8 +1087,10 @@
 
 UNSAFE_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute, jlong time))
   UnsafeWrapper("Unsafe_Park");
+  HS_DTRACE_PROBE3(hotspot, thread__park__begin, thread->parker(), (int) isAbsolute, time);
   JavaThreadParkedState jtps(thread, time != 0);
   thread->parker()->park(isAbsolute != 0, time);
+  HS_DTRACE_PROBE1(hotspot, thread__park__end, thread->parker());
 UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread))
@@ -1116,6 +1122,7 @@
     }
   }
   if (p != NULL) {
+    HS_DTRACE_PROBE1(hotspot, thread__unpark, p);
     p->unpark();
   }
 UNSAFE_END
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -1346,9 +1346,7 @@
   }
 
   if (FLAG_IS_DEFAULT(MarkStackSize)) {
-    // Size as a multiple of TaskQueueSuper::N which is larger
-    // for 64-bit.
-    FLAG_SET_DEFAULT(MarkStackSize, 128 * TaskQueueSuper::total_size());
+    FLAG_SET_DEFAULT(MarkStackSize, 128 * TASKQUEUE_SIZE);
   }
   if (PrintGCDetails && Verbose) {
     tty->print_cr("MarkStackSize: %uk  MarkStackSizeMax: %uk",
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -1795,6 +1795,10 @@
   product(uintx, PreserveMarkStackSize, 1024,                               \
           "Size for stack used in promotion failure handling")              \
                                                                             \
+  develop(uintx, ObjArrayMarkingStride, 512,                                \
+          "Number of ObjArray elements to push onto the marking stack"      \
+          "before pushing a continuation entry")                            \
+                                                                            \
   product_pd(bool, UseTLAB, "Use thread-local object allocation")           \
                                                                             \
   product_pd(bool, ResizeTLAB,                                              \
@@ -2289,6 +2293,10 @@
           "print safepoint statistics only when safepoint takes"            \
           " more than PrintSafepointSatisticsTimeout in millis")            \
                                                                             \
+  product(bool, TraceSafepointCleanupTime, false,                           \
+          "print the break down of clean up tasks performed during"         \
+          " safepoint")                                                     \
+                                                                            \
   develop(bool, InlineAccessors, true,                                      \
           "inline accessor methods (get/set)")                              \
                                                                             \
--- a/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/runtime/os.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -218,6 +218,9 @@
 
   static bool   guard_memory(char* addr, size_t bytes);
   static bool   unguard_memory(char* addr, size_t bytes);
+  static bool   create_stack_guard_pages(char* addr, size_t bytes);
+  static bool   remove_stack_guard_pages(char* addr, size_t bytes);
+
   static char*  map_memory(int fd, const char* file_name, size_t file_offset,
                            char *addr, size_t bytes, bool read_only = false,
                            bool allow_exec = false);
--- a/hotspot/src/share/vm/runtime/safepoint.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/runtime/safepoint.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -457,21 +457,21 @@
 // Various cleaning tasks that should be done periodically at safepoints
 void SafepointSynchronize::do_cleanup_tasks() {
   {
-    TraceTime t1("deflating idle monitors", TraceSafepoint);
+    TraceTime t1("deflating idle monitors", TraceSafepointCleanupTime);
     ObjectSynchronizer::deflate_idle_monitors();
   }
 
   {
-    TraceTime t2("updating inline caches", TraceSafepoint);
+    TraceTime t2("updating inline caches", TraceSafepointCleanupTime);
     InlineCacheBuffer::update_inline_caches();
   }
 
   if(UseCounterDecay && CounterDecay::is_decay_needed()) {
-    TraceTime t3("decaying counter", TraceSafepoint);
+    TraceTime t3("decaying counter", TraceSafepointCleanupTime);
     CounterDecay::decay();
   }
 
-  TraceTime t4("sweeping nmethods", TraceSafepoint);
+  TraceTime t4("sweeping nmethods", TraceSafepointCleanupTime);
   NMethodSweeper::sweep();
 }
 
--- a/hotspot/src/share/vm/runtime/thread.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -2137,7 +2137,7 @@
   int allocate = os::allocate_stack_guard_pages();
   // warning("Guarding at " PTR_FORMAT " for len " SIZE_FORMAT "\n", low_addr, len);
 
-  if (allocate && !os::commit_memory((char *) low_addr, len)) {
+  if (allocate && !os::create_stack_guard_pages((char *) low_addr, len)) {
     warning("Attempt to allocate stack guard pages failed.");
     return;
   }
@@ -2158,7 +2158,7 @@
   size_t len = (StackYellowPages + StackRedPages) * os::vm_page_size();
 
   if (os::allocate_stack_guard_pages()) {
-    if (os::uncommit_memory((char *) low_addr, len)) {
+    if (os::remove_stack_guard_pages((char *) low_addr, len)) {
       _stack_guard_state = stack_guard_unused;
     } else {
       warning("Attempt to deallocate stack guard pages failed.");
--- a/hotspot/src/share/vm/runtime/vmThread.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/runtime/vmThread.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -25,6 +25,10 @@
 # include "incls/_precompiled.incl"
 # include "incls/_vmThread.cpp.incl"
 
+HS_DTRACE_PROBE_DECL3(hotspot, vmops__request, char *, uintptr_t, int);
+HS_DTRACE_PROBE_DECL3(hotspot, vmops__begin, char *, uintptr_t, int);
+HS_DTRACE_PROBE_DECL3(hotspot, vmops__end, char *, uintptr_t, int);
+
 // Dummy VM operation to act as first element in our circular double-linked list
 class VM_Dummy: public VM_Operation {
   VMOp_Type type() const { return VMOp_Dummy; }
@@ -132,6 +136,10 @@
 //-----------------------------------------------------------------
 // High-level interface
 bool VMOperationQueue::add(VM_Operation *op) {
+
+  HS_DTRACE_PROBE3(hotspot, vmops__request, op->name(), strlen(op->name()),
+                   op->evaluation_mode());
+
   // Encapsulates VM queue policy. Currently, that
   // only involves putting them on the right list
   if (op->evaluate_at_safepoint()) {
@@ -325,7 +333,11 @@
 
   {
     PerfTraceTime vm_op_timer(perf_accumulated_vm_operation_time());
+    HS_DTRACE_PROBE3(hotspot, vmops__begin, op->name(), strlen(op->name()),
+                     op->evaluation_mode());
     op->evaluate();
+    HS_DTRACE_PROBE3(hotspot, vmops__end, op->name(), strlen(op->name()),
+                     op->evaluation_mode());
   }
 
   // Last access of info in _cur_vm_operation!
--- a/hotspot/src/share/vm/services/dtraceAttacher.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/services/dtraceAttacher.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -135,4 +135,9 @@
   }
 }
 
+void DTrace::set_monitor_dprobes(bool flag) {
+  // explicit setting of DTraceMonitorProbes flag
+  set_bool_flag("DTraceMonitorProbes", flag);
+}
+
 #endif /* SOLARIS */
--- a/hotspot/src/share/vm/services/dtraceAttacher.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/services/dtraceAttacher.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -41,4 +41,6 @@
   static void detach_all_clients();
   // set ExtendedDTraceProbes flag
   static void set_extended_dprobes(bool value);
+  // set DTraceMonitorProbes flag
+  static void set_monitor_dprobes(bool value);
 };
--- a/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/services/management.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -1537,7 +1537,6 @@
     global->type = JMM_VMGLOBAL_TYPE_JSTRING;
   } else {
     global->type = JMM_VMGLOBAL_TYPE_UNKNOWN;
-    assert(false, "Unsupported VMGlobal Type");
     return false;
   }
 
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -827,6 +827,8 @@
 #define       badHeapWord       (::badHeapWordVal)
 #define       badJNIHandle      ((oop)::badJNIHandleVal)
 
+// Default TaskQueue size is 16K (32-bit) or 128K (64-bit)
+#define TASKQUEUE_SIZE (NOT_LP64(1<<14) LP64_ONLY(1<<17))
 
 //----------------------------------------------------------------------------------------------------
 // Utility functions for bitfield manipulations
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp	Wed Jul 05 17:09:16 2017 +0200
@@ -31,10 +31,6 @@
 uint ParallelTaskTerminator::_total_peeks = 0;
 #endif
 
-bool TaskQueueSuper::peek() {
-  return _bottom != _age.top();
-}
-
 int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
   const int a =      16807;
   const int m = 2147483647;
@@ -180,6 +176,13 @@
   }
 }
 
+#ifdef ASSERT
+bool ObjArrayTask::is_valid() const {
+  return _obj != NULL && _obj->is_objArray() && _index > 0 &&
+    _index < objArrayOop(_obj)->length();
+}
+#endif // ASSERT
+
 bool RegionTaskQueueWithOverflow::is_empty() {
   return (_region_queue.size() == 0) &&
          (_overflow_stack->length() == 0);
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp	Wed Jul 05 17:08:50 2017 +0200
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp	Wed Jul 05 17:09:16 2017 +0200
@@ -22,6 +22,7 @@
  *
  */
 
+template <unsigned int N>
 class TaskQueueSuper: public CHeapObj {
 protected:
   // Internal type for indexing the queue; also used for the tag.
@@ -30,10 +31,7 @@
   // The first free element after the last one pushed (mod N).
   volatile uint _bottom;
 
-  enum {
-    N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K
-    MOD_N_MASK = N - 1                   // To compute x mod N efficiently.
-  };
+  enum { MOD_N_MASK = N - 1 };
 
   class Age {
   public:
@@ -84,12 +82,12 @@
 
   // Returns a number in the range [0..N).  If the result is "N-1", it should be
   // interpreted as 0.
-  uint dirty_size(uint bot, uint top) {
+  uint dirty_size(uint bot, uint top) const {
     return (bot - top) & MOD_N_MASK;
   }
 
   // Returns the size corresponding to the given "bot" and "top".
-  uint size(uint bot, uint top) {
+  uint size(uint bot, uint top) const {
     uint sz = dirty_size(bot, top);
     // Has the queue "wrapped", so that bottom is less than top?  There's a
     // complicated special case here.  A pair of threads could perform pop_local
@@ -111,17 +109,17 @@
 public:
   TaskQueueSuper() : _bottom(0), _age() {}
 
-  // Return "true" if the TaskQueue contains any tasks.
-  bool peek();
+  // Return true if the TaskQueue contains any tasks.
+  bool peek() { return _bottom != _age.top(); }
 
   // Return an estimate of the number of elements in the queue.
   // The "careful" version admits the possibility of pop_local/pop_global
   // races.
-  uint size() {
+  uint size() const {
     return size(_bottom, _age.top());
   }
 
-  uint dirty_size() {
+  uint dirty_size() const {
     return dirty_size(_bottom, _age.top());
   }
 
@@ -132,19 +130,36 @@
 
   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
-  uint max_elems() { return N - 2; }
+  uint max_elems() const { return N - 2; }
 
   // Total size of queue.
   static const uint total_size() { return N; }
 };
 
-template<class E> class GenericTaskQueue: public TaskQueueSuper {
+template<class E, unsigned int N = TASKQUEUE_SIZE>
+class GenericTaskQueue: public TaskQueueSuper<N> {
+protected:
+  typedef typename TaskQueueSuper<N>::Age Age;
+  typedef typename TaskQueueSuper<N>::idx_t idx_t;
+
+  using TaskQueueSuper<N>::_bottom;
+  using TaskQueueSuper<N>::_age;
+  using TaskQueueSuper<N>::increment_index;
+  using TaskQueueSuper<N>::decrement_index;
+  using TaskQueueSuper<N>::dirty_size;
+
+public:
+  using TaskQueueSuper<N>::max_elems;
+  using TaskQueueSuper<N>::size;
+
 private:
   // Slow paths for push, pop_local.  (pop_global has no fast path.)
   bool push_slow(E t, uint dirty_n_elems);
   bool pop_local_slow(uint localBot, Age oldAge);
 
 public:
+  typedef E element_type;
+
   // Initializes the queue to empty.
   GenericTaskQueue();
 
@@ -175,19 +190,19 @@
   volatile E* _elems;
 };
 
-template<class E>
-GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
+template<class E, unsigned int N>
+GenericTaskQueue<E, N>::GenericTaskQueue() {
   assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
 }
 
-template<class E>
-void GenericTaskQueue<E>::initialize() {
+template<class E, unsigned int N>
+void GenericTaskQueue<E, N>::initialize() {
   _elems = NEW_C_HEAP_ARRAY(E, N);
   guarantee(_elems != NULL, "Allocation failed.");
 }
 
-template<class E>
-void GenericTaskQueue<E>::oops_do(OopClosure* f) {
+template<class E, unsigned int N>
+void GenericTaskQueue<E, N>::oops_do(OopClosure* f) {
   // tty->print_cr("START OopTaskQueue::oops_do");
   uint iters = size();
   uint index = _bottom;
@@ -203,21 +218,21 @@
   // tty->print_cr("END OopTaskQueue::oops_do");
 }
 
-
-template<class E>
-bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
+template<class E, unsigned int N>
+bool GenericTaskQueue<E, N>::push_slow(E t, uint dirty_n_elems) {
   if (dirty_n_elems == N - 1) {
     // Actually means 0, so do the push.
     uint localBot = _bottom;
-    _elems[localBot] = t;
+    // g++ complains if the volatile result of the assignment is unused.
+    const_cast<E&>(_elems[localBot] = t);
     OrderAccess::release_store(&_bottom, increment_index(localBot));
     return true;
   }
   return false;
 }
 
-template<class E>
-bool GenericTaskQueue<E>::
+template<class E, unsigned int N>
+bool GenericTaskQueue<E, N>::
 pop_local_slow(uint localBot, Age oldAge) {
   // This queue was observed to contain exactly one element; either this
   // thread will claim it, or a competing "pop_global".  In either case,
@@ -249,8 +264,8 @@
   return false;
 }
 
-template<class E>
-bool GenericTaskQueue<E>::pop_global(E& t) {
+template<class E, unsigned int N>
+bool GenericTaskQueue<E, N>::pop_global(E& t) {
   Age oldAge = _age.get();
   uint localBot = _bottom;
   uint n_elems = size(localBot, oldAge.top());
@@ -258,7 +273,7 @@
     return false;
   }
 
-  t = _elems[oldAge.top()];
+  const_cast<E&>(t = _elems[oldAge.top()]);
   Age newAge(oldAge);
   newAge.increment();
   Age resAge = _age.cmpxchg(newAge, oldAge);
@@ -269,8 +284,8 @@
   return resAge == oldAge;
 }
 
-template<class E>
-GenericTaskQueue<E>::~GenericTaskQueue() {
+template<class E, unsigned int N>
+GenericTaskQueue<E, N>::~GenericTaskQueue() {
   FREE_C_HEAP_ARRAY(E, _elems);
 }
 
@@ -283,16 +298,18 @@
   virtual bool peek() = 0;
 };
 
-template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper {
+template<class T>
+class GenericTaskQueueSet: public TaskQueueSetSuper {
 private:
   uint _n;
-  GenericTaskQueue<E>** _queues;
+  T** _queues;
 
 public:
+  typedef typename T::element_type E;
+
   GenericTaskQueueSet(int n) : _n(n) {
-    typedef GenericTaskQueue<E>* GenericTaskQueuePtr;
+    typedef T* GenericTaskQueuePtr;
     _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n);
-    guarantee(_queues != NULL, "Allocation failure.");
     for (int i = 0; i < n; i++) {
       _queues[i] = NULL;
     }
@@ -302,9 +319,9 @@
   bool steal_best_of_2(uint queue_num, int* seed, E& t);
   bool steal_best_of_all(uint queue_num, int* seed, E& t);
 
-  void register_queue(uint i, GenericTaskQueue<E>* q);
+  void register_queue(uint i, T* q);
 
-  GenericTaskQueue<E>* queue(uint n);
+  T* queue(uint n);
 
   // The thread with queue number "queue_num" (and whose random number seed
   // is at "seed") is trying to steal a task from some other queue.  (It
@@ -316,27 +333,27 @@
   bool peek();
 };
 
-template<class E>
-void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) {
+template<class T> void
+GenericTaskQueueSet<T>::register_queue(uint i, T* q) {
   assert(i < _n, "index out of range.");
   _queues[i] = q;
 }
 
-template<class E>
-GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) {
+template<class T> T*
+GenericTaskQueueSet<T>::queue(uint i) {
   return _queues[i];
 }
 
-template<class E>
-bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) {
+template<class T> bool
+GenericTaskQueueSet<T>::steal(uint queue_num, int* seed, E& t) {
   for (uint i = 0; i < 2 * _n; i++)
     if (steal_best_of_2(queue_num, seed, t))
       return true;
   return false;
 }
 
-template<class E>
-bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) {
+template<class T> bool
+GenericTaskQueueSet<T>::steal_best_of_all(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     int best_k;
     uint best_sz = 0;
@@ -359,8 +376,8 @@
   }
 }
 
-template<class E>
-bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) {
+template<class T> bool
+GenericTaskQueueSet<T>::steal_1_random(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     uint k = queue_num;
     while (k == queue_num) k = randomParkAndMiller(seed) % _n;
@@ -375,8 +392,8 @@
   }
 }
 
-template<class E>
-bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) {
+template<class T> bool
+GenericTaskQueueSet<T>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     uint k1 = queue_num;
     while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n;
@@ -397,8 +414,8 @@
   }
 }
 
-template<class E>
-bool GenericTaskQueueSet<E>::peek() {
+template<class T>
+bool GenericTaskQueueSet<T>::peek() {
   // Try all the queues.
   for (uint j = 0; j < _n; j++) {
     if (_queues[j]->peek())
@@ -468,14 +485,16 @@
 #endif
 };
 
-template<class E> inline bool GenericTaskQueue<E>::push(E t) {
+template<class E, unsigned int N> inline bool
+GenericTaskQueue<E, N>::push(E t) {
   uint localBot = _bottom;
   assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
   idx_t top = _age.top();
   uint dirty_n_elems = dirty_size(localBot, top);
-  assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
+  assert(dirty_n_elems < N, "n_elems out of range.");
   if (dirty_n_elems < max_elems()) {
-    _elems[localBot] = t;
+    // g++ complains if the volatile result of the assignment is unused.
+    const_cast<E&>(_elems[localBot] = t);
     OrderAccess::release_store(&_bottom, increment_index(localBot));
     return true;
   } else {
@@ -483,7 +502,8 @@
   }
 }
 
-template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
+template<class E, unsigned int N> inline bool
+GenericTaskQueue<E, N>::pop_local(E& t) {
   uint localBot = _bottom;
   // This value cannot be N-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
@@ -497,7 +517,7 @@
   // This is necessary to prevent any read below from being reordered
   // before the store just above.
   OrderAccess::fence();
-  t = _elems[localBot];
+  const_cast<E&>(t = _elems[localBot]);
   // This is a second read of "age"; the "size()" above is the first.
   // If there's still at least one element in the queue, based on the
   // "_bottom" and "age" we've read, then there can be no interference with
@@ -514,17 +534,23 @@
 }
 
 typedef oop Task;
-typedef GenericTaskQueue<Task>         OopTaskQueue;
-typedef GenericTaskQueueSet<Task>      OopTaskQueueSet;
+typedef GenericTaskQueue<Task>            OopTaskQueue;
+typedef GenericTaskQueueSet<OopTaskQueue> OopTaskQueueSet;
 
-
-#define COMPRESSED_OOP_MASK  1
+#ifdef _MSC_VER
+#pragma warning(push)
+// warning C4522: multiple assignment operators specified
+#pragma warning(disable:4522)
+#endif
 
 // This is a container class for either an oop* or a narrowOop*.
 // Both are pushed onto a task queue and the consumer will test is_narrow()
 // to determine which should be processed.
 class StarTask {
   void*  _holder;        // either union oop* or narrowOop*
+
+  enum { COMPRESSED_OOP_MASK = 1 };
+
  public:
   StarTask(narrowOop* p) {
     assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!");
@@ -540,20 +566,61 @@
     return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK);
   }
 
-  // Operators to preserve const/volatile in assignments required by gcc
-  void operator=(const volatile StarTask& t) volatile { _holder = t._holder; }
+  StarTask& operator=(const StarTask& t) {
+    _holder = t._holder;
+    return *this;
+  }
+  volatile StarTask& operator=(const volatile StarTask& t) volatile {
+    _holder = t._holder;
+    return *this;
+  }
 
   bool is_narrow() const {
     return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0);
   }
 };
 
-typedef GenericTaskQueue<StarTask>     OopStarTaskQueue;
-typedef GenericTaskQueueSet<StarTask>  OopStarTaskQueueSet;
+class ObjArrayTask
+{
+public:
+  ObjArrayTask(oop o = NULL, int idx = 0): _obj(o), _index(idx) { }
+  ObjArrayTask(oop o, size_t idx): _obj(o), _index(int(idx)) {
+    assert(idx <= size_t(max_jint), "too big");
+  }
+  ObjArrayTask(const ObjArrayTask& t): _obj(t._obj), _index(t._index) { }
+
+  ObjArrayTask& operator =(const ObjArrayTask& t) {
+    _obj = t._obj;
+    _index = t._index;
+    return *this;
+  }
+  volatile ObjArrayTask&
+  operator =(const volatile ObjArrayTask& t) volatile {
+    _obj = t._obj;
+    _index = t._index;
+    return *this;
+  }
+
+  inline oop obj()   const { return _obj; }
+  inline int index() const { return _index; }
+
+  DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid.
+
+private:
+  oop _obj;
+  int _index;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+typedef GenericTaskQueue<StarTask>            OopStarTaskQueue;
+typedef GenericTaskQueueSet<OopStarTaskQueue> OopStarTaskQueueSet;
 
 typedef size_t RegionTask;  // index for region
-typedef GenericTaskQueue<RegionTask>    RegionTaskQueue;
-typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet;
+typedef GenericTaskQueue<RegionTask>         RegionTaskQueue;
+typedef GenericTaskQueueSet<RegionTaskQueue> RegionTaskQueueSet;
 
 class RegionTaskQueueWithOverflow: public CHeapObj {
  protected:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6929067/T.java	Wed Jul 05 17:09:16 2017 +0200
@@ -0,0 +1,12 @@
+public class T
+{
+  public static boolean foo(boolean bar)
+  {
+    return bar;
+  }
+
+  public static void printIt()
+  {
+    System.out.println("Hello");
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6929067/Test6929067.sh	Wed Jul 05 17:09:16 2017 +0200
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+##
+## @test Test6929067.sh
+## @bug 6929067
+## @summary Stack guard pages should be removed when thread is detached
+## @run shell Test6929067.sh
+##
+
+if [ "${TESTSRC}" = "" ]
+then TESTSRC=.
+fi
+
+if [ "${TESTJAVA}" = "" ]
+then
+  PARENT=`dirname \`which java\``
+  TESTJAVA=`dirname ${PARENT}`
+  echo "TESTJAVA not set, selecting " ${TESTJAVA}
+  echo "If this is incorrect, try setting the variable manually."
+fi
+
+BIT_FLAG=""
+
+# set platform-dependent variables
+OS=`uname -s`
+case "$OS" in
+  Linux)
+    NULL=/dev/null
+    PS=":"
+    FS="/"
+    ;;
+  SunOS | Windows_* )
+    NULL=NUL
+    PS=";"
+    FS="\\"
+    echo "Test passed; only valid for Linux"
+    exit 0;
+    ;;
+  * )
+    echo "Unrecognized system!"
+    exit 1;
+    ;;
+esac
+
+LD_LIBRARY_PATH=.:${TESTJAVA}/jre/lib/i386/client:/usr/openwin/lib:/usr/dt/lib:/usr/lib:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH
+
+THIS_DIR=`pwd`
+
+cp ${TESTSRC}${FS}invoke.c ${THIS_DIR}
+cp ${TESTSRC}${FS}T.java ${THIS_DIR}
+
+
+${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -fullversion
+
+${TESTJAVA}${FS}bin${FS}javac T.java
+
+gcc -o invoke -I${TESTJAVA}/include -I${TESTJAVA}/include/linux invoke.c ${TESTJAVA}/jre/lib/i386/client/libjvm.so
+./invoke
+exit $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6929067/invoke.c	Wed Jul 05 17:09:16 2017 +0200
@@ -0,0 +1,90 @@
+#include <assert.h>
+#include <jni.h>
+#include <alloca.h>
+
+#include <pthread.h>
+
+union env_union
+{
+  void *void_env;
+  JNIEnv *jni_env;
+};
+
+union env_union tmp;
+JNIEnv* env;
+JavaVM* jvm;
+JavaVMInitArgs vm_args;
+JavaVMOption options[1];
+jclass class_id;
+jmethodID method_id;
+jint result;
+
+long product(unsigned long n, unsigned long m) {
+    if (m == 1) {
+      return n;
+    } else {
+      int *p = alloca(sizeof (int));
+      *p = n;
+      return product (n, m-1) + *p;
+    }
+}
+
+void *
+floobydust (void *p)
+{
+  (*jvm)->AttachCurrentThread(jvm, &tmp.void_env, NULL);
+  env = tmp.jni_env;
+
+  class_id = (*env)->FindClass (env, "T");
+  assert (class_id);
+
+  method_id = (*env)->GetStaticMethodID (env, class_id, "printIt", "()V");
+  assert (method_id);
+
+  (*env)->CallStaticVoidMethod (env, class_id, method_id, NULL);
+
+  (*jvm)->DetachCurrentThread(jvm);
+
+  printf("%ld\n", product(5000,5000));
+
+  (*jvm)->AttachCurrentThread(jvm, &tmp.void_env, NULL);
+  env = tmp.jni_env;
+
+  class_id = (*env)->FindClass (env, "T");
+  assert (class_id);
+
+  method_id = (*env)->GetStaticMethodID (env, class_id, "printIt", "()V");
+  assert (method_id);
+
+  (*env)->CallStaticVoidMethod (env, class_id, method_id, NULL);
+
+  (*jvm)->DetachCurrentThread(jvm);
+
+  printf("%ld\n", product(5000,5000));
+
+  return NULL;
+}
+
+int
+main (int argc, const char** argv)
+{
+  options[0].optionString = "-Xss320k";
+
+  vm_args.version = JNI_VERSION_1_2;
+  vm_args.ignoreUnrecognized = JNI_TRUE;
+  vm_args.options = options;
+  vm_args.nOptions = 1;
+
+  result = JNI_CreateJavaVM (&jvm, &tmp.void_env, &vm_args);
+  assert (result >= 0);
+
+  env = tmp.jni_env;
+
+  floobydust (NULL);
+
+  pthread_t thr;
+  pthread_create (&thr, NULL, floobydust, NULL);
+  pthread_join (thr, NULL);
+
+  return 0;
+}
--- a/jaxp/.hgtags	Wed Jul 05 17:08:50 2017 +0200
+++ b/jaxp/.hgtags	Wed Jul 05 17:09:16 2017 +0200
@@ -61,3 +61,4 @@
 32c0cf01d555747918529a6ff9e06b0090c7a474 jdk7-b84
 6c0ccabb430dacdcd4479f8b197980d5da4eeb66 jdk7-b85
 81c0f115bbe5d3bcf59864465b5eca5538567c79 jdk7-b86
+8b493f1aa136d86de0885fcba15262c4fa2b1412 jdk7-b87
--- a/jaxws/.hgtags	Wed Jul 05 17:08:50 2017 +0200
+++ b/jaxws/.hgtags	Wed Jul 05 17:09:16 2017 +0200
@@ -61,3 +61,4 @@
 8bc02839eee4ef02cd1b50e87638874368a26535 jdk7-b84
 8424512588ff95362c1f1e5f11c6efd4e7f7db6e jdk7-b85
 512b0e924a5ae0c0b7ad326182cae0dc0e4d1aa8 jdk7-b86
+3febd6fab2ac8ffddbaf7bed00d11290262af153 jdk7-b87
--- a/jdk/.hgtags	Wed Jul 05 17:08:50 2017 +0200
+++ b/jdk/.hgtags	Wed Jul 05 17:09:16 2017 +0200
@@ -61,3 +61,4 @@
 7cb9388bb1a16365fa5118c5efa38b1cd58be40d jdk7-b84
 b396584a3e64988839cca21ea1f7fbdcc9248783 jdk7-b85
 eae6e9ab26064d9ba0e7665dd646a1fd2506fcc1 jdk7-b86
+2cafbbe9825e911a6ca6c17d9a18eb1f0bf0873c jdk7-b87
--- a/langtools/.hgtags	Wed Jul 05 17:08:50 2017 +0200
+++ b/langtools/.hgtags	Wed Jul 05 17:09:16 2017 +0200
@@ -61,3 +61,4 @@
 d9cd5b8286e44f3baf90da290cd295433e21c05a jdk7-b84
 136bfc67946219fb02ee223984540a4a9c5b209f jdk7-b85
 ef07347428f2198ae6b8144ac0b9086bbe39fd16 jdk7-b86
+409db93d19c002333980df5b797c6b965150c7a0 jdk7-b87