Merge
authorcl
Thu, 09 Sep 2010 14:27:59 -0700
changeset 6282 59327a28f6c0
parent 6281 58e262648b99 (current diff)
parent 6280 0415ecf319cf (diff)
child 6283 1730d0ec1d4b
Merge
hotspot/.hgtags
hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp
--- a/hotspot/.hgtags	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/.hgtags	Thu Sep 09 14:27:59 2010 -0700
@@ -115,4 +115,6 @@
 cc3fdfeb54b049f18edcf3463e6ab051d0b7b609 hs19-b05
 688a538aa65412178286ae2a6b0c00b6711e121b hs19-b06
 bf496cbe9b74dda5975a1559da7ecfdd313e509e jdk7-b107
+0000000000000000000000000000000000000000 hs19-b06
+6c43216df13513a0f96532aa06f213066c49e27b hs19-b06
 e44a93947ccbfce712b51725f313163606f15486 jdk7-b108
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -112,6 +112,11 @@
     }
   }
 
+#ifdef COMPILER2
+  // Currently not supported anywhere.
+  FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+#endif
+
   char buf[512];
   jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v8() ? ", has_v8" : ""),
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -482,6 +482,15 @@
     }
   }
 
+#ifdef COMPILER2
+  if (UseFPUForSpilling) {
+    if (UseSSE < 2) {
+      // Only supported with SSE2+
+      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+    }
+  }
+#endif
+
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
@@ -520,6 +529,11 @@
     if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
       AllocatePrefetchDistance = 192;
       AllocatePrefetchLines = 4;
+#ifdef COMPILER2
+      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
     }
   }
   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Thu Sep 09 14:27:59 2010 -0700
@@ -852,6 +852,39 @@
   }
 }
 
+static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                            int src_hi, int dst_hi, int size, outputStream* st ) {
+  // 32-bit
+  if (cbuf) {
+    emit_opcode(*cbuf, 0x66);
+    emit_opcode(*cbuf, 0x0F);
+    emit_opcode(*cbuf, 0x6E);
+    emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
+#ifndef PRODUCT
+  } else if (!do_size) {
+    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+#endif
+  }
+  return 4;
+}
+
+
+static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                                 int src_hi, int dst_hi, int size, outputStream* st ) {
+  // 32-bit
+  if (cbuf) {
+    emit_opcode(*cbuf, 0x66);
+    emit_opcode(*cbuf, 0x0F);
+    emit_opcode(*cbuf, 0x7E);
+    emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
+#ifndef PRODUCT
+  } else if (!do_size) {
+    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+#endif
+  }
+  return 4;
+}
+
 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
   if( cbuf ) {
     emit_opcode(*cbuf, 0x8B );
@@ -947,6 +980,12 @@
   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 
+  // Check for integer reg-xmm reg copy
+  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
+    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
+            "no 64 bit integer-float reg moves" );
+    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+  }
   // --------------------------------------
   // Check for float reg-reg copy
   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
@@ -1018,6 +1057,13 @@
     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
   }
 
+  // Check for xmm reg-integer reg copy
+  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
+    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
+            "no 64 bit float-integer reg moves" );
+    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+  }
+
   // Check for xmm store
   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Thu Sep 09 14:27:59 2010 -0700
@@ -1607,8 +1607,8 @@
           emit_opcode(*cbuf, 0x0F);
           emit_opcode(*cbuf, 0x7E);
           emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+                  Matcher::_regEncode[src_first] & 7,
+                  Matcher::_regEncode[dst_first] & 7);
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1637,8 +1637,8 @@
           emit_opcode(*cbuf, 0x0F);
           emit_opcode(*cbuf, 0x7E);
           emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+                  Matcher::_regEncode[src_first] & 7,
+                  Matcher::_regEncode[dst_first] & 7);
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
--- a/hotspot/src/cpu/zero/vm/bytecodeInterpreter_zero.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/cpu/zero/vm/bytecodeInterpreter_zero.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007 Red Hat, Inc.
+ * Copyright 2007, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -268,7 +268,7 @@
   return op1 - op2;
 }
 
-inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+inline juint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
   return ((juint) op1) >> (op2 & 0x1F);
 }
 
--- a/hotspot/src/cpu/zero/vm/javaFrameAnchor_zero.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/cpu/zero/vm/javaFrameAnchor_zero.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -82,6 +82,10 @@
     return _last_Java_fp;
   }
 
+  address last_Java_pc() const {
+    return _last_Java_pc;
+  }
+
   static ByteSize last_Java_fp_offset() {
     return byte_offset_of(JavaFrameAnchor, _last_Java_fp);
   }
--- a/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,8 +36,8 @@
 }
 
 inline void OrderAccess::release() {
-  jint* dummy = (jint*)&dummy;
-  __asm__ volatile("stw %%g0, [%0]" : : "r" (dummy) : "memory");
+  jint* local_dummy = (jint*)&local_dummy;
+  __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory");
 }
 
 inline void OrderAccess::fence() {
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,16 +30,18 @@
 inline void OrderAccess::storeload()  { fence(); }
 
 inline void OrderAccess::acquire() {
-  volatile intptr_t dummy;
+  volatile intptr_t local_dummy;
 #ifdef AMD64
-  __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (dummy) : : "memory");
+  __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
 #else
-  __asm__ volatile ("movl 0(%%esp),%0" : "=r" (dummy) : : "memory");
+  __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
 #endif // AMD64
 }
 
 inline void OrderAccess::release() {
-  dummy = 0;
+  // Avoid hitting the same cache-line from
+  // different threads.
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
--- a/hotspot/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -435,22 +435,22 @@
   void _Copy_arrayof_conjoint_bytes(HeapWord* from,
                                     HeapWord* to,
                                     size_t    count) {
-    ShouldNotCallThis();
+    memmove(to, from, count);
   }
   void _Copy_arrayof_conjoint_jshorts(HeapWord* from,
                                       HeapWord* to,
                                       size_t    count) {
-    ShouldNotCallThis();
+    memmove(to, from, count * 2);
   }
   void _Copy_arrayof_conjoint_jints(HeapWord* from,
                                     HeapWord* to,
                                     size_t    count) {
-    ShouldNotCallThis();
+    memmove(to, from, count * 4);
   }
   void _Copy_arrayof_conjoint_jlongs(HeapWord* from,
                                      HeapWord* to,
                                      size_t    count) {
-    ShouldNotCallThis();
+    memmove(to, from, count * 8);
   }
 };
 
--- a/hotspot/src/os_cpu/linux_zero/vm/thread_linux_zero.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/linux_zero/vm/thread_linux_zero.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2007, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2009 Red Hat, Inc.
+ * Copyright 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,9 @@
  *
  */
 
-// This file is intentionally empty
+#include "incls/_precompiled.incl"
+#include "incls/_thread_linux_zero.cpp.incl"
 
-void JavaThread::cache_global_variables() { }
+void JavaThread::cache_global_variables() {
+  // nothing to do
+}
--- a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,8 +42,8 @@
 }
 
 inline void OrderAccess::release() {
-  jint* dummy = (jint*)&dummy;
-  __asm__ volatile("stw %%g0, [%0]" : : "r" (dummy) : "memory");
+  jint* local_dummy = (jint*)&local_dummy;
+  __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory");
 }
 
 inline void OrderAccess::fence() {
@@ -57,7 +57,9 @@
 }
 
 inline void OrderAccess::release() {
-  dummy = 0;
+  // Avoid hitting the same cache-line from
+  // different threads.
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
--- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,9 @@
 }
 
 inline void OrderAccess::release() {
-  dummy = 0;
+  // Avoid hitting the same cache-line from
+  // different threads.
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
@@ -53,11 +55,11 @@
 
 extern "C" {
   inline void _OrderAccess_acquire() {
-    volatile intptr_t dummy;
+    volatile intptr_t local_dummy;
 #ifdef AMD64
-    __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (dummy) : : "memory");
+    __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
 #else
-    __asm__ volatile ("movl 0(%%esp),%0" : "=r" (dummy) : : "memory");
+    __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
 #endif // AMD64
   }
   inline void _OrderAccess_fence() {
--- a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,7 @@
 
 inline void OrderAccess::release() {
   // A volatile store has release semantics.
-  dummy = 0;
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
--- a/hotspot/src/share/vm/code/nmethod.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -433,6 +433,10 @@
   _unload_reported            = false;           // jvmti state
 
   NOT_PRODUCT(_has_debug_info = false);
+#ifdef ASSERT
+  _oops_are_stale             = false;
+#endif
+
   _oops_do_mark_link       = NULL;
   _jmethod_id              = NULL;
   _osr_link                = NULL;
@@ -1230,11 +1234,10 @@
 bool nmethod::make_not_entrant_or_zombie(unsigned int state) {
   assert(state == zombie || state == not_entrant, "must be zombie or not_entrant");
 
-  bool was_alive = false;
-
   // Make sure neither the nmethod nor the method is flushed in case of a safepoint in code below.
   nmethodLocker nml(this);
   methodHandle the_method(method());
+  No_Safepoint_Verifier nsv;
 
   {
     // If the method is already zombie there is nothing to do
@@ -1303,13 +1306,27 @@
   // state will be flushed later when the transition to zombie
   // happens or they get unloaded.
   if (state == zombie) {
-    // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event
-    // and it hasn't already been reported for this nmethod then report it now.
-    // (the event may have been reported earilier if the GC marked it for unloading).
-    post_compiled_method_unload();
+    {
+      // Flushing dependecies must be done before any possible
+      // safepoint can sneak in, otherwise the oops used by the
+      // dependency logic could have become stale.
+      MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+      flush_dependencies(NULL);
+    }
 
-    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    flush_dependencies(NULL);
+    {
+      // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event
+      // and it hasn't already been reported for this nmethod then report it now.
+      // (the event may have been reported earilier if the GC marked it for unloading).
+      Pause_No_Safepoint_Verifier pnsv(&nsv);
+      post_compiled_method_unload();
+    }
+
+#ifdef ASSERT
+    // It's no longer safe to access the oops section since zombie
+    // nmethods aren't scanned for GC.
+    _oops_are_stale = true;
+#endif
   } else {
     assert(state == not_entrant, "other cases may need to be handled differently");
   }
--- a/hotspot/src/share/vm/code/nmethod.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -177,6 +177,10 @@
   // Protected by Patching_lock
   unsigned char _state;                      // {alive, not_entrant, zombie, unloaded)
 
+#ifdef ASSERT
+  bool _oops_are_stale;  // indicates that it's no longer safe to access oops section
+#endif
+
   enum { alive        = 0,
          not_entrant  = 1, // uncommon trap has happened but activations may still exist
          zombie       = 2,
@@ -434,6 +438,7 @@
   oop*  oop_addr_at(int index) const {  // for GC
     // relocation indexes are biased by 1 (because 0 is reserved)
     assert(index > 0 && index <= oops_size(), "must be a valid non-zero index");
+    assert(!_oops_are_stale, "oops are stale");
     return &oops_begin()[index - 1];
   }
 
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1652,12 +1652,10 @@
 void CompileBroker::handle_full_code_cache() {
   UseInterpreter = true;
   if (UseCompiler || AlwaysCompileLoopMethods ) {
-    CompilerThread* thread = CompilerThread::current();
-    CompileLog* log = thread->log();
-    if (log != NULL) {
-      log->begin_elem("code_cache_full");
-      log->stamp();
-      log->end_elem();
+    if (xtty != NULL) {
+      xtty->begin_elem("code_cache_full");
+      xtty->stamp();
+      xtty->end_elem();
     }
     warning("CodeCache is full. Compiler has been disabled.");
     warning("Try increasing the code cache size using -XX:ReservedCodeCacheSize=");
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -402,6 +402,29 @@
   return res;
 }
 
+void LinearAllocBlock::print_on(outputStream* st) const {
+  st->print_cr(" LinearAllocBlock: ptr = " PTR_FORMAT ", word_size = " SIZE_FORMAT
+            ", refillsize = " SIZE_FORMAT ", allocation_size_limit = " SIZE_FORMAT,
+            _ptr, _word_size, _refillSize, _allocation_size_limit);
+}
+
+void CompactibleFreeListSpace::print_on(outputStream* st) const {
+  st->print_cr("COMPACTIBLE FREELIST SPACE");
+  st->print_cr(" Space:");
+  Space::print_on(st);
+
+  st->print_cr("promoInfo:");
+  _promoInfo.print_on(st);
+
+  st->print_cr("_smallLinearAllocBlock");
+  _smallLinearAllocBlock.print_on(st);
+
+  // dump_memory_block(_smallLinearAllocBlock->_ptr, 128);
+
+  st->print_cr(" _fitStrategy = %s, _adaptive_freelists = %s",
+               _fitStrategy?"true":"false", _adaptive_freelists?"true":"false");
+}
+
 void CompactibleFreeListSpace::print_indexed_free_lists(outputStream* st)
 const {
   reportIndexedFreeListStatistics();
@@ -557,13 +580,15 @@
 void CompactibleFreeListSpace::set_end(HeapWord* value) {
   HeapWord* prevEnd = end();
   assert(prevEnd != value, "unnecessary set_end call");
-  assert(prevEnd == NULL || value >= unallocated_block(), "New end is below unallocated block");
+  assert(prevEnd == NULL || !BlockOffsetArrayUseUnallocatedBlock || value >= unallocated_block(),
+        "New end is below unallocated block");
   _end = value;
   if (prevEnd != NULL) {
     // Resize the underlying block offset table.
     _bt.resize(pointer_delta(value, bottom()));
     if (value <= prevEnd) {
-      assert(value >= unallocated_block(), "New end is below unallocated block");
+      assert(!BlockOffsetArrayUseUnallocatedBlock || value >= unallocated_block(),
+             "New end is below unallocated block");
     } else {
       // Now, take this new chunk and add it to the free blocks.
       // Note that the BOT has not yet been updated for this block.
@@ -938,7 +963,6 @@
 
 size_t CompactibleFreeListSpace::block_size(const HeapWord* p) const {
   NOT_PRODUCT(verify_objects_initialized());
-  assert(MemRegion(bottom(), end()).contains(p), "p not in space");
   // This must be volatile, or else there is a danger that the compiler
   // will compile the code below into a sometimes-infinite loop, by keeping
   // the value read the first time in a register.
@@ -957,7 +981,7 @@
       // must read from what 'p' points to in each loop.
       klassOop k = ((volatile oopDesc*)p)->klass_or_null();
       if (k != NULL) {
-        assert(k->is_oop(true /* ignore mark word */), "Should really be klass oop.");
+        assert(k->is_oop(true /* ignore mark word */), "Should be klass oop");
         oop o = (oop)p;
         assert(o->is_parsable(), "Should be parsable");
         assert(o->is_oop(true /* ignore mark word */), "Should be an oop.");
@@ -1231,7 +1255,6 @@
         // satisfy the request.  This is different that
         // evm.
         // Don't record chunk off a LinAB?  smallSplitBirth(size);
-
     } else {
       // Raid the exact free lists larger than size, even if they are not
       // overpopulated.
@@ -1449,6 +1472,7 @@
     // Update BOT last so that other (parallel) GC threads see a consistent
     // view of the BOT and free blocks.
     // Above must occur before BOT is updated below.
+    OrderAccess::storestore();
     _bt.split_block(res, blk_size, size);  // adjust block offset table
   }
   return res;
@@ -1477,6 +1501,7 @@
     // Update BOT last so that other (parallel) GC threads see a consistent
     // view of the BOT and free blocks.
     // Above must occur before BOT is updated below.
+    OrderAccess::storestore();
     _bt.split_block(res, blk_size, size);  // adjust block offset table
     _bt.allocated(res, size);
   }
@@ -1856,6 +1881,8 @@
   ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
   // Above must occur before BOT is updated below.
   // adjust block offset table
+  OrderAccess::storestore();
+  assert(chunk->isFree() && ffc->isFree(), "Error");
   _bt.split_block((HeapWord*)chunk, chunk->size(), new_size);
   if (rem_size < SmallForDictionary) {
     bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
@@ -1911,8 +1938,7 @@
   // mark the "end" of the used space at the time of this call;
   // note, however, that promoted objects from this point
   // on are tracked in the _promoInfo below.
-  set_saved_mark_word(BlockOffsetArrayUseUnallocatedBlock ?
-                      unallocated_block() : end());
+  set_saved_mark_word(unallocated_block());
   // inform allocator that promotions should be tracked.
   assert(_promoInfo.noPromotions(), "_promoInfo inconsistency");
   _promoInfo.startTrackingPromotions();
@@ -2238,8 +2264,7 @@
 }
 
 void CompactibleFreeListSpace::print() const {
-  tty->print(" CompactibleFreeListSpace");
-  Space::print();
+  Space::print_on(tty);
 }
 
 void CompactibleFreeListSpace::prepare_for_verify() {
@@ -2253,18 +2278,28 @@
  private:
   const CompactibleFreeListSpace* _sp;
   const MemRegion                 _span;
+  HeapWord*                       _last_addr;
+  size_t                          _last_size;
+  bool                            _last_was_obj;
+  bool                            _last_was_live;
 
  public:
   VerifyAllBlksClosure(const CompactibleFreeListSpace* sp,
-    MemRegion span) :  _sp(sp), _span(span) { }
+    MemRegion span) :  _sp(sp), _span(span),
+                       _last_addr(NULL), _last_size(0),
+                       _last_was_obj(false), _last_was_live(false) { }
 
   virtual size_t do_blk(HeapWord* addr) {
     size_t res;
+    bool   was_obj  = false;
+    bool   was_live = false;
     if (_sp->block_is_obj(addr)) {
+      was_obj = true;
       oop p = oop(addr);
       guarantee(p->is_oop(), "Should be an oop");
       res = _sp->adjustObjectSize(p->size());
       if (_sp->obj_is_alive(addr)) {
+        was_live = true;
         p->verify();
       }
     } else {
@@ -2275,7 +2310,20 @@
                   "Chunk should be on a free list");
       }
     }
-    guarantee(res != 0, "Livelock: no rank reduction!");
+    if (res == 0) {
+      gclog_or_tty->print_cr("Livelock: no rank reduction!");
+      gclog_or_tty->print_cr(
+        " Current:  addr = " PTR_FORMAT ", size = " SIZE_FORMAT ", obj = %s, live = %s \n"
+        " Previous: addr = " PTR_FORMAT ", size = " SIZE_FORMAT ", obj = %s, live = %s \n",
+        addr,       res,        was_obj      ?"true":"false", was_live      ?"true":"false",
+        _last_addr, _last_size, _last_was_obj?"true":"false", _last_was_live?"true":"false");
+      _sp->print_on(gclog_or_tty);
+      guarantee(false, "Seppuku!");
+    }
+    _last_addr = addr;
+    _last_size = res;
+    _last_was_obj  = was_obj;
+    _last_was_live = was_live;
     return res;
   }
 };
@@ -2521,7 +2569,7 @@
 
 HeapWord* CFLS_LAB::alloc(size_t word_sz) {
   FreeChunk* res;
-  word_sz = _cfls->adjustObjectSize(word_sz);
+  guarantee(word_sz == _cfls->adjustObjectSize(word_sz), "Error");
   if (word_sz >=  CompactibleFreeListSpace::IndexSetSize) {
     // This locking manages sync with other large object allocations.
     MutexLockerEx x(_cfls->parDictionaryAllocLock(),
@@ -2667,12 +2715,12 @@
          (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
          (CMSSplitIndexedFreeListBlocks || k <= 1);
          k++, cur_sz = k * word_sz) {
-      FreeList* gfl = &_indexedFreeList[cur_sz];
       FreeList fl_for_cur_sz;  // Empty.
       fl_for_cur_sz.set_size(cur_sz);
       {
         MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
                         Mutex::_no_safepoint_check_flag);
+        FreeList* gfl = &_indexedFreeList[cur_sz];
         if (gfl->count() != 0) {
           // nn is the number of chunks of size cur_sz that
           // we'd need to split k-ways each, in order to create
@@ -2685,9 +2733,9 @@
             // we increment the split death count by the number of blocks
             // we just took from the cur_sz-size blocks list and which
             // we will be splitting below.
-            ssize_t deaths = _indexedFreeList[cur_sz].splitDeaths() +
+            ssize_t deaths = gfl->splitDeaths() +
                              fl_for_cur_sz.count();
-            _indexedFreeList[cur_sz].set_splitDeaths(deaths);
+            gfl->set_splitDeaths(deaths);
           }
         }
       }
@@ -2703,18 +2751,25 @@
             // access the main chunk sees it as a single free block until we
             // change it.
             size_t fc_size = fc->size();
+            assert(fc->isFree(), "Error");
             for (int i = k-1; i >= 0; i--) {
               FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
+              assert((i != 0) ||
+                        ((fc == ffc) && ffc->isFree() &&
+                         (ffc->size() == k*word_sz) && (fc_size == word_sz)),
+                        "Counting error");
               ffc->setSize(word_sz);
+              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
               ffc->linkNext(NULL);
-              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
               // Above must occur before BOT is updated below.
-              // splitting from the right, fc_size == (k - i + 1) * wordsize
-              _bt.mark_block((HeapWord*)ffc, word_sz);
+              OrderAccess::storestore();
+              // splitting from the right, fc_size == i * word_sz
+              _bt.mark_block((HeapWord*)ffc, word_sz, true /* reducing */);
               fc_size -= word_sz;
-              _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
+              assert(fc_size == i*word_sz, "Error");
+              _bt.verify_not_unallocated((HeapWord*)ffc, word_sz);
               _bt.verify_single_block((HeapWord*)fc, fc_size);
-              _bt.verify_single_block((HeapWord*)ffc, ffc->size());
+              _bt.verify_single_block((HeapWord*)ffc, word_sz);
               // Push this on "fl".
               fl->returnChunkAtHead(ffc);
             }
@@ -2744,7 +2799,7 @@
                                   _dictionary->minSize()),
                                   FreeBlockDictionary::atLeast);
       if (fc != NULL) {
-        _bt.allocated((HeapWord*)fc, fc->size());  // update _unallocated_blk
+        _bt.allocated((HeapWord*)fc, fc->size(), true /* reducing */);  // update _unallocated_blk
         dictionary()->dictCensusUpdate(fc->size(),
                                        true /*split*/,
                                        false /*birth*/);
@@ -2754,8 +2809,10 @@
       }
     }
     if (fc == NULL) return;
+    // Otherwise, split up that block.
     assert((ssize_t)n >= 1, "Control point invariant");
-    // Otherwise, split up that block.
+    assert(fc->isFree(), "Error: should be a free block");
+    _bt.verify_single_block((HeapWord*)fc, fc->size());
     const size_t nn = fc->size() / word_sz;
     n = MIN2(nn, n);
     assert((ssize_t)n >= 1, "Control point invariant");
@@ -2773,6 +2830,7 @@
     // dictionary and return, leaving "fl" empty.
     if (n == 0) {
       returnChunkToDictionary(fc);
+      assert(fl->count() == 0, "We never allocated any blocks");
       return;
     }
 
@@ -2785,11 +2843,14 @@
       size_t prefix_size = n * word_sz;
       rem_fc = (FreeChunk*)((HeapWord*)fc + prefix_size);
       rem_fc->setSize(rem);
+      rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
       rem_fc->linkNext(NULL);
-      rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
       // Above must occur before BOT is updated below.
       assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error");
+      OrderAccess::storestore();
       _bt.split_block((HeapWord*)fc, fc->size(), prefix_size);
+      assert(fc->isFree(), "Error");
+      fc->setSize(prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
         dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/);
@@ -2815,11 +2876,12 @@
   for (ssize_t i = n-1; i > 0; i--) {
     FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
     ffc->setSize(word_sz);
+    ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
     ffc->linkNext(NULL);
-    ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
     // Above must occur before BOT is updated below.
+    OrderAccess::storestore();
     // splitting from the right, fc_size == (n - i + 1) * wordsize
-    _bt.mark_block((HeapWord*)ffc, word_sz);
+    _bt.mark_block((HeapWord*)ffc, word_sz, true /* reducing */);
     fc_size -= word_sz;
     _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
     _bt.verify_single_block((HeapWord*)ffc, ffc->size());
@@ -2828,9 +2890,11 @@
     fl->returnChunkAtHead(ffc);
   }
   // First chunk
+  assert(fc->isFree() && fc->size() == n*word_sz, "Error: should still be a free block");
+  // The blocks above should show their new sizes before the first block below
   fc->setSize(word_sz);
+  fc->linkPrev(NULL);    // idempotent wrt free-ness, see assert above
   fc->linkNext(NULL);
-  fc->linkPrev(NULL);
   _bt.verify_not_unallocated((HeapWord*)fc, fc->size());
   _bt.verify_single_block((HeapWord*)fc, fc->size());
   fl->returnChunkAtHead(fc);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,8 @@
   size_t    _word_size;
   size_t    _refillSize;
   size_t    _allocation_size_limit;  // largest size that will be allocated
+
+  void print_on(outputStream* st) const;
 };
 
 // Concrete subclass of CompactibleSpace that implements
@@ -249,10 +251,14 @@
   size_t     numFreeBlocksInIndexedFreeLists() const;
   // Accessor
   HeapWord* unallocated_block() const {
-    HeapWord* ub = _bt.unallocated_block();
-    assert(ub >= bottom() &&
-           ub <= end(), "space invariant");
-    return ub;
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      HeapWord* ub = _bt.unallocated_block();
+      assert(ub >= bottom() &&
+             ub <= end(), "space invariant");
+      return ub;
+    } else {
+      return end();
+    }
   }
   void freed(HeapWord* start, size_t size) {
     _bt.freed(start, size);
@@ -476,6 +482,7 @@
 
   // Debugging support
   void print()                            const;
+  void print_on(outputStream* st)         const;
   void prepare_for_verify();
   void verify(bool allow_dirty)           const;
   void verifyFreeLists()                  const PRODUCT_RETURN;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1019,7 +1019,7 @@
 }
 
 HeapWord* ConcurrentMarkSweepGeneration::have_lock_and_allocate(size_t size,
-                                                  bool   tlab) {
+                                                  bool   tlab /* ignored */) {
   assert_lock_strong(freelistLock());
   size_t adjustedSize = CompactibleFreeListSpace::adjustObjectSize(size);
   HeapWord* res = cmsSpace()->allocate(adjustedSize);
@@ -1032,6 +1032,11 @@
   // allowing the object to be blackened (and its references scanned)
   // either during a preclean phase or at the final checkpoint.
   if (res != NULL) {
+    // We may block here with an uninitialized object with
+    // its mark-bit or P-bits not yet set. Such objects need
+    // to be safely navigable by block_start().
+    assert(oop(res)->klass_or_null() == NULL, "Object should be uninitialized here.");
+    assert(!((FreeChunk*)res)->isFree(), "Error, block will look free but show wrong size");
     collector()->direct_allocated(res, adjustedSize);
     _direct_allocated_words += adjustedSize;
     // allocation counters
@@ -1061,8 +1066,14 @@
     // [see comments preceding SweepClosure::do_blk() below for details]
     // 1. need to mark the object as live so it isn't collected
     // 2. need to mark the 2nd bit to indicate the object may be uninitialized
-    // 3. need to mark the end of the object so sweeper can skip over it
-    //    if it's uninitialized when the sweeper reaches it.
+    // 3. need to mark the end of the object so marking, precleaning or sweeping
+    //    can skip over uninitialized or unparsable objects. An allocated
+    //    object is considered uninitialized for our purposes as long as
+    //    its klass word is NULL. (Unparsable objects are those which are
+    //    initialized in the sense just described, but whose sizes can still
+    //    not be correctly determined. Note that the class of unparsable objects
+    //    can only occur in the perm gen. All old gen objects are parsable
+    //    as soon as they are initialized.)
     _markBitMap.mark(start);          // object is live
     _markBitMap.mark(start + 1);      // object is potentially uninitialized?
     _markBitMap.mark(start + size - 1);
@@ -1088,7 +1099,13 @@
     // We don't need to mark the object as uninitialized (as
     // in direct_allocated above) because this is being done with the
     // world stopped and the object will be initialized by the
-    // time the sweeper gets to look at it.
+    // time the marking, precleaning or sweeping get to look at it.
+    // But see the code for copying objects into the CMS generation,
+    // where we need to ensure that concurrent readers of the
+    // block offset table are able to safely navigate a block that
+    // is in flux from being free to being allocated (and in
+    // transition while being copied into) and subsequently
+    // becoming a bona-fide object when the copy/promotion is complete.
     assert(SafepointSynchronize::is_at_safepoint(),
            "expect promotion only at safepoints");
 
@@ -1304,6 +1321,48 @@
   return collector()->allocation_limit_reached(space, top, word_sz);
 }
 
+// IMPORTANT: Notes on object size recognition in CMS.
+// ---------------------------------------------------
+// A block of storage in the CMS generation is always in
+// one of three states. A free block (FREE), an allocated
+// object (OBJECT) whose size() method reports the correct size,
+// and an intermediate state (TRANSIENT) in which its size cannot
+// be accurately determined.
+// STATE IDENTIFICATION:   (32 bit and 64 bit w/o COOPS)
+// -----------------------------------------------------
+// FREE:      klass_word & 1 == 1; mark_word holds block size
+//
+// OBJECT:    klass_word installed; klass_word != 0 && klass_word & 0 == 0;
+//            obj->size() computes correct size
+//            [Perm Gen objects needs to be "parsable" before they can be navigated]
+//
+// TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
+//
+// STATE IDENTIFICATION: (64 bit+COOPS)
+// ------------------------------------
+// FREE:      mark_word & CMS_FREE_BIT == 1; mark_word & ~CMS_FREE_BIT gives block_size
+//
+// OBJECT:    klass_word installed; klass_word != 0;
+//            obj->size() computes correct size
+//            [Perm Gen comment above continues to hold]
+//
+// TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
+//
+//
+// STATE TRANSITION DIAGRAM
+//
+//        mut / parnew                     mut  /  parnew
+// FREE --------------------> TRANSIENT ---------------------> OBJECT --|
+//  ^                                                                   |
+//  |------------------------ DEAD <------------------------------------|
+//         sweep                            mut
+//
+// While a block is in TRANSIENT state its size cannot be determined
+// so readers will either need to come back later or stall until
+// the size can be determined. Note that for the case of direct
+// allocation, P-bits, when available, may be used to determine the
+// size of an object that may not yet have been initialized.
+
 // Things to support parallel young-gen collection.
 oop
 ConcurrentMarkSweepGeneration::par_promote(int thread_num,
@@ -1331,33 +1390,39 @@
     }
   }
   assert(promoInfo->has_spooling_space(), "Control point invariant");
-  HeapWord* obj_ptr = ps->lab.alloc(word_sz);
+  const size_t alloc_sz = CompactibleFreeListSpace::adjustObjectSize(word_sz);
+  HeapWord* obj_ptr = ps->lab.alloc(alloc_sz);
   if (obj_ptr == NULL) {
-     obj_ptr = expand_and_par_lab_allocate(ps, word_sz);
+     obj_ptr = expand_and_par_lab_allocate(ps, alloc_sz);
      if (obj_ptr == NULL) {
        return NULL;
      }
   }
   oop obj = oop(obj_ptr);
+  OrderAccess::storestore();
   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
+  assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
+  // IMPORTANT: See note on object initialization for CMS above.
   // Otherwise, copy the object.  Here we must be careful to insert the
   // klass pointer last, since this marks the block as an allocated object.
   // Except with compressed oops it's the mark word.
   HeapWord* old_ptr = (HeapWord*)old;
+  // Restore the mark word copied above.
+  obj->set_mark(m);
+  assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
+  assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
+  OrderAccess::storestore();
+
+  if (UseCompressedOops) {
+    // Copy gap missed by (aligned) header size calculation below
+    obj->set_klass_gap(old->klass_gap());
+  }
   if (word_sz > (size_t)oopDesc::header_size()) {
     Copy::aligned_disjoint_words(old_ptr + oopDesc::header_size(),
                                  obj_ptr + oopDesc::header_size(),
                                  word_sz - oopDesc::header_size());
   }
 
-  if (UseCompressedOops) {
-    // Copy gap missed by (aligned) header size calculation above
-    obj->set_klass_gap(old->klass_gap());
-  }
-
-  // Restore the mark word copied above.
-  obj->set_mark(m);
-
   // Now we can track the promoted object, if necessary.  We take care
   // to delay the transition from uninitialized to full object
   // (i.e., insertion of klass pointer) until after, so that it
@@ -1365,18 +1430,22 @@
   if (promoInfo->tracking()) {
     promoInfo->track((PromotedObject*)obj, old->klass());
   }
+  assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
+  assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
+  assert(old->is_oop(), "Will use and dereference old klass ptr below");
 
   // Finally, install the klass pointer (this should be volatile).
+  OrderAccess::storestore();
   obj->set_klass(old->klass());
-
-  assert(old->is_oop(), "Will dereference klass ptr below");
+  // We should now be able to calculate the right size for this object
+  assert(obj->is_oop() && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
+
   collector()->promoted(true,          // parallel
                         obj_ptr, old->is_objArray(), word_sz);
 
   NOT_PRODUCT(
-    Atomic::inc(&_numObjectsPromoted);
-    Atomic::add((jint)CompactibleFreeListSpace::adjustObjectSize(obj->size()),
-                &_numWordsPromoted);
+    Atomic::inc_ptr(&_numObjectsPromoted);
+    Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
   )
 
   return obj;
@@ -1965,6 +2034,9 @@
                                             _intra_sweep_estimate.padded_average());
   }
 
+  {
+    TraceCMSMemoryManagerStats();
+  }
   GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
     ref_processor(), clear_all_soft_refs);
   #ifdef ASSERT
@@ -3415,6 +3487,7 @@
 void CMSCollector::checkpointRootsInitial(bool asynch) {
   assert(_collectorState == InitialMarking, "Wrong collector state");
   check_correct_thread_executing();
+  TraceCMSMemoryManagerStats tms(_collectorState);
   ReferenceProcessor* rp = ref_processor();
   SpecializationStats::clear();
   assert(_restart_addr == NULL, "Control point invariant");
@@ -4748,6 +4821,7 @@
   // world is stopped at this checkpoint
   assert(SafepointSynchronize::is_at_safepoint(),
          "world should be stopped");
+  TraceCMSMemoryManagerStats tms(_collectorState);
   verify_work_stacks_empty();
   verify_overflow_empty();
 
@@ -5849,6 +5923,8 @@
   verify_work_stacks_empty();
   verify_overflow_empty();
   increment_sweep_count();
+  TraceCMSMemoryManagerStats tms(_collectorState);
+
   _inter_sweep_timer.stop();
   _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
   size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());
@@ -7861,14 +7937,20 @@
   FreeChunk* fc = (FreeChunk*)addr;
   size_t res;
 
-  // check if we are done sweepinrg
-  if (addr == _limit) { // we have swept up to the limit, do nothing more
+  // Check if we are done sweeping. Below we check "addr >= _limit" rather
+  // than "addr == _limit" because although _limit was a block boundary when
+  // we started the sweep, it may no longer be one because heap expansion
+  // may have caused us to coalesce the block ending at the address _limit
+  // with a newly expanded chunk (this happens when _limit was set to the
+  // previous _end of the space), so we may have stepped past _limit; see CR 6977970.
+  if (addr >= _limit) { // we have swept up to or past the limit, do nothing more
     assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
            "sweep _limit out of bounds");
+    assert(addr < _sp->end(), "addr out of bounds");
     // help the closure application finish
-    return pointer_delta(_sp->end(), _limit);
-  }
-  assert(addr <= _limit, "sweep invariant");
+    return pointer_delta(_sp->end(), addr);
+  }
+  assert(addr < _limit, "sweep invariant");
 
   // check if we should yield
   do_yield_check(addr);
@@ -9121,3 +9203,57 @@
   }
   return res;
 }
+
+TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase): TraceMemoryManagerStats() {
+
+  switch (phase) {
+    case CMSCollector::InitialMarking:
+      initialize(true  /* fullGC */ ,
+                 true  /* recordGCBeginTime */,
+                 true  /* recordPreGCUsage */,
+                 false /* recordPeakUsage */,
+                 false /* recordPostGCusage */,
+                 true  /* recordAccumulatedGCTime */,
+                 false /* recordGCEndTime */,
+                 false /* countCollection */  );
+      break;
+
+    case CMSCollector::FinalMarking:
+      initialize(true  /* fullGC */ ,
+                 false /* recordGCBeginTime */,
+                 false /* recordPreGCUsage */,
+                 false /* recordPeakUsage */,
+                 false /* recordPostGCusage */,
+                 true  /* recordAccumulatedGCTime */,
+                 false /* recordGCEndTime */,
+                 false /* countCollection */  );
+      break;
+
+    case CMSCollector::Sweeping:
+      initialize(true  /* fullGC */ ,
+                 false /* recordGCBeginTime */,
+                 false /* recordPreGCUsage */,
+                 true  /* recordPeakUsage */,
+                 true  /* recordPostGCusage */,
+                 false /* recordAccumulatedGCTime */,
+                 true  /* recordGCEndTime */,
+                 true  /* countCollection */  );
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+// when bailing out of cms in concurrent mode failure
+TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(): TraceMemoryManagerStats() {
+  initialize(true /* fullGC */ ,
+             true /* recordGCBeginTime */,
+             true /* recordPreGCUsage */,
+             true /* recordPeakUsage */,
+             true /* recordPostGCusage */,
+             true /* recordAccumulatedGCTime */,
+             true /* recordGCEndTime */,
+             true /* countCollection */ );
+}
+
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -507,6 +507,7 @@
   friend class VM_CMS_Operation;
   friend class VM_CMS_Initial_Mark;
   friend class VM_CMS_Final_Remark;
+  friend class TraceCMSMemoryManagerStats;
 
  private:
   jlong _time_of_last_gc;
@@ -1009,10 +1010,10 @@
 
   // Non-product stat counters
   NOT_PRODUCT(
-    int _numObjectsPromoted;
-    int _numWordsPromoted;
-    int _numObjectsAllocated;
-    int _numWordsAllocated;
+    size_t _numObjectsPromoted;
+    size_t _numWordsPromoted;
+    size_t _numObjectsAllocated;
+    size_t _numWordsAllocated;
   )
 
   // Used for sizing decisions
@@ -1858,3 +1859,11 @@
     _dead_bit_map(dead_bit_map) {}
   size_t do_blk(HeapWord* addr);
 };
+
+class TraceCMSMemoryManagerStats : public TraceMemoryManagerStats {
+
+ public:
+  TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase);
+  TraceCMSMemoryManagerStats();
+};
+
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -110,15 +110,21 @@
   }
   void linkNext(FreeChunk* ptr) { _next = ptr; }
   void linkPrev(FreeChunk* ptr) {
-     LP64_ONLY(if (UseCompressedOops) _prev = ptr; else)
-     _prev = (FreeChunk*)((intptr_t)ptr | 0x1);
+    LP64_ONLY(if (UseCompressedOops) _prev = ptr; else)
+    _prev = (FreeChunk*)((intptr_t)ptr | 0x1);
   }
   void clearPrev()              { _prev = NULL; }
   void clearNext()              { _next = NULL; }
   void markNotFree() {
-   LP64_ONLY(if (UseCompressedOops) set_mark(markOopDesc::prototype());)
-   // Also set _prev to null
-   _prev = NULL;
+    // Set _prev (klass) to null before (if) clearing the mark word below
+    _prev = NULL;
+#ifdef _LP64
+    if (UseCompressedOops) {
+      OrderAccess::storestore();
+      set_mark(markOopDesc::prototype());
+    }
+#endif
+    assert(!isFree(), "Error");
   }
 
   // Return the address past the end of this chunk
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -330,7 +330,7 @@
 void PromotionInfo::print_on(outputStream* st) const {
   SpoolBlock* curSpool = NULL;
   size_t i = 0;
-  st->print_cr("start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")",
+  st->print_cr(" start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")",
                _firstIndex, _nextIndex);
   for (curSpool = _spoolHead; curSpool != _spoolTail && curSpool != NULL;
        curSpool = curSpool->nextSpoolBlock) {
@@ -350,7 +350,7 @@
     st->print_cr(" free ");
     i++;
   }
-  st->print_cr(SIZE_FORMAT " header spooling blocks", i);
+  st->print_cr("  " SIZE_FORMAT " header spooling blocks", i);
 }
 
 void SpoolBlock::print_on(outputStream* st) const {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -339,7 +339,9 @@
   return res;
 }
 
-void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+void ConcurrentG1Refine::clean_up_cache(int worker_i,
+                                        G1RemSet* g1rs,
+                                        DirtyCardQueue* into_cset_dcq) {
   assert(!use_cache(), "cache should be disabled");
   int start_idx;
 
@@ -353,7 +355,19 @@
       for (int i = start_idx; i < end_idx; i++) {
         jbyte* entry = _hot_cache[i];
         if (entry != NULL) {
-          g1rs->concurrentRefineOneCard(entry, worker_i);
+          if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
+            // 'entry' contains references that point into the current
+            // collection set. We need to record 'entry' in the DCQS
+            // that's used for that purpose.
+            //
+            // The only time we care about recording cards that contain
+            // references that point into the collection set is during
+            // RSet updating while within an evacuation pause.
+            // In this case worker_i should be the id of a GC worker thread
+            assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
+            assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "incorrect worker id");
+            into_cset_dcq->enqueue(entry);
+          }
         }
       }
     }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -184,7 +184,7 @@
   jbyte* cache_insert(jbyte* card_ptr, bool* defer);
 
   // Process the cached entries.
-  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+  void clean_up_cache(int worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq);
 
   // Set up for parallel processing of the cards in the hot cache
   void clear_hot_cache_claimed_index() {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -2586,9 +2586,6 @@
   double end_time = os::elapsedTime();
   double elapsed_time_ms = (end_time - start) * 1000.0;
   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
-  if (PrintGCDetails) {
-    gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
-  }
 
   ClearMarksInHRClosure clr(nextMarkBitMap());
   g1h->collection_set_iterate(&clr);
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,13 +178,14 @@
 }
 
 bool DirtyCardQueueSet::
-apply_closure_to_completed_buffer_helper(int worker_i,
+apply_closure_to_completed_buffer_helper(CardTableEntryClosure* cl,
+                                         int worker_i,
                                          BufferNode* nd) {
   if (nd != NULL) {
     void **buf = BufferNode::make_buffer_from_node(nd);
     size_t index = nd->index();
     bool b =
-      DirtyCardQueue::apply_closure_to_buffer(_closure, buf,
+      DirtyCardQueue::apply_closure_to_buffer(cl, buf,
                                               index, _sz,
                                               true, worker_i);
     if (b) {
@@ -199,15 +200,22 @@
   }
 }
 
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(CardTableEntryClosure* cl,
+                                                          int worker_i,
+                                                          int stop_at,
+                                                          bool during_pause) {
+  assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
+  BufferNode* nd = get_completed_buffer(stop_at);
+  bool res = apply_closure_to_completed_buffer_helper(cl, worker_i, nd);
+  if (res) Atomic::inc(&_processed_buffers_rs_thread);
+  return res;
+}
+
 bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
                                                           int stop_at,
-                                                          bool during_pause)
-{
-  assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
-  BufferNode* nd = get_completed_buffer(stop_at);
-  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
-  if (res) Atomic::inc(&_processed_buffers_rs_thread);
-  return res;
+                                                          bool during_pause) {
+  return apply_closure_to_completed_buffer(_closure, worker_i,
+                                           stop_at, during_pause);
 }
 
 void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
@@ -222,8 +230,8 @@
   }
 }
 
-void DirtyCardQueueSet::abandon_logs() {
-  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+// Deallocates any completed log buffers
+void DirtyCardQueueSet::clear() {
   BufferNode* buffers_to_delete = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
@@ -242,6 +250,12 @@
     buffers_to_delete = nd->next();
     deallocate_buffer(BufferNode::make_buffer_from_node(nd));
   }
+
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  clear();
   // Since abandon is done only at safepoints, we can safely manipulate
   // these queues.
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -123,7 +123,21 @@
                                          int stop_at = 0,
                                          bool during_pause = false);
 
-  bool apply_closure_to_completed_buffer_helper(int worker_i,
+  // If there exists some completed buffer, pop it, then apply the
+  // specified closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(CardTableEntryClosure* cl,
+                                         int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool during_pause = false);
+
+  // Helper routine for the above.
+  bool apply_closure_to_completed_buffer_helper(CardTableEntryClosure* cl,
+                                                int worker_i,
                                                 BufferNode* nd);
 
   BufferNode* get_completed_buffer(int stop_at);
@@ -136,6 +150,9 @@
     return &_shared_dirty_card_queue;
   }
 
+  // Deallocate any completed log buffers
+  void clear();
+
   // If a full collection is happening, reset partial logs, and ignore
   // completed ones: the full collection will make them all irrelevant.
   void abandon_logs();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -56,7 +56,12 @@
     _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
   {}
   bool do_card_ptr(jbyte* card_ptr, int worker_i) {
-    _g1rs->concurrentRefineOneCard(card_ptr, worker_i);
+    bool oops_into_cset = _g1rs->concurrentRefineOneCard(card_ptr, worker_i, false);
+    // This path is executed by the concurrent refine or mutator threads,
+    // concurrently, and so we do not care if card_ptr contains references
+    // that point into the collection set.
+    assert(!oops_into_cset, "should be");
+
     if (_concurrent && _sts->should_yield()) {
       // Caller will actually yield.
       return false;
@@ -1039,29 +1044,56 @@
   const size_t capacity_after_gc = capacity();
   const size_t free_after_gc = capacity_after_gc - used_after_gc;
 
+  // This is enforced in arguments.cpp.
+  assert(MinHeapFreeRatio <= MaxHeapFreeRatio,
+         "otherwise the code below doesn't make sense");
+
   // We don't have floating point command-line arguments
-  const double minimum_free_percentage = (double) MinHeapFreeRatio / 100;
+  const double minimum_free_percentage = (double) MinHeapFreeRatio / 100.0;
   const double maximum_used_percentage = 1.0 - minimum_free_percentage;
-  const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
+  const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100.0;
   const double minimum_used_percentage = 1.0 - maximum_free_percentage;
 
-  size_t minimum_desired_capacity = (size_t) (used_after_gc / maximum_used_percentage);
-  size_t maximum_desired_capacity = (size_t) (used_after_gc / minimum_used_percentage);
-
-  // Don't shrink less than the initial size.
-  minimum_desired_capacity =
-    MAX2(minimum_desired_capacity,
-         collector_policy()->initial_heap_byte_size());
-  maximum_desired_capacity =
-    MAX2(maximum_desired_capacity,
-         collector_policy()->initial_heap_byte_size());
-
-  // We are failing here because minimum_desired_capacity is
-  assert(used_after_gc <= minimum_desired_capacity, "sanity check");
-  assert(minimum_desired_capacity <= maximum_desired_capacity, "sanity check");
+  const size_t min_heap_size = collector_policy()->min_heap_byte_size();
+  const size_t max_heap_size = collector_policy()->max_heap_byte_size();
+
+  // We have to be careful here as these two calculations can overflow
+  // 32-bit size_t's.
+  double used_after_gc_d = (double) used_after_gc;
+  double minimum_desired_capacity_d = used_after_gc_d / maximum_used_percentage;
+  double maximum_desired_capacity_d = used_after_gc_d / minimum_used_percentage;
+
+  // Let's make sure that they are both under the max heap size, which
+  // by default will make them fit into a size_t.
+  double desired_capacity_upper_bound = (double) max_heap_size;
+  minimum_desired_capacity_d = MIN2(minimum_desired_capacity_d,
+                                    desired_capacity_upper_bound);
+  maximum_desired_capacity_d = MIN2(maximum_desired_capacity_d,
+                                    desired_capacity_upper_bound);
+
+  // We can now safely turn them into size_t's.
+  size_t minimum_desired_capacity = (size_t) minimum_desired_capacity_d;
+  size_t maximum_desired_capacity = (size_t) maximum_desired_capacity_d;
+
+  // This assert only makes sense here, before we adjust them
+  // with respect to the min and max heap size.
+  assert(minimum_desired_capacity <= maximum_desired_capacity,
+         err_msg("minimum_desired_capacity = "SIZE_FORMAT", "
+                 "maximum_desired_capacity = "SIZE_FORMAT,
+                 minimum_desired_capacity, maximum_desired_capacity));
+
+  // Should not be greater than the heap max size. No need to adjust
+  // it with respect to the heap min size as it's a lower bound (i.e.,
+  // we'll try to make the capacity larger than it, not smaller).
+  minimum_desired_capacity = MIN2(minimum_desired_capacity, max_heap_size);
+  // Should not be less than the heap min size. No need to adjust it
+  // with respect to the heap max size as it's an upper bound (i.e.,
+  // we'll try to make the capacity smaller than it, not greater).
+  maximum_desired_capacity =  MAX2(maximum_desired_capacity, min_heap_size);
 
   if (PrintGC && Verbose) {
-    const double free_percentage = ((double)free_after_gc) / capacity();
+    const double free_percentage =
+      (double) free_after_gc / (double) capacity_after_gc;
     gclog_or_tty->print_cr("Computing new size after full GC ");
     gclog_or_tty->print_cr("  "
                            "  minimum_free_percentage: %6.2f",
@@ -1073,45 +1105,47 @@
                            "  capacity: %6.1fK"
                            "  minimum_desired_capacity: %6.1fK"
                            "  maximum_desired_capacity: %6.1fK",
-                           capacity() / (double) K,
-                           minimum_desired_capacity / (double) K,
-                           maximum_desired_capacity / (double) K);
+                           (double) capacity_after_gc / (double) K,
+                           (double) minimum_desired_capacity / (double) K,
+                           (double) maximum_desired_capacity / (double) K);
     gclog_or_tty->print_cr("  "
-                           "   free_after_gc   : %6.1fK"
-                           "   used_after_gc   : %6.1fK",
-                           free_after_gc / (double) K,
-                           used_after_gc / (double) K);
+                           "  free_after_gc: %6.1fK"
+                           "  used_after_gc: %6.1fK",
+                           (double) free_after_gc / (double) K,
+                           (double) used_after_gc / (double) K);
     gclog_or_tty->print_cr("  "
                            "   free_percentage: %6.2f",
                            free_percentage);
   }
-  if (capacity() < minimum_desired_capacity) {
+  if (capacity_after_gc < minimum_desired_capacity) {
     // Don't expand unless it's significant
     size_t expand_bytes = minimum_desired_capacity - capacity_after_gc;
     expand(expand_bytes);
     if (PrintGC && Verbose) {
-      gclog_or_tty->print_cr("    expanding:"
+      gclog_or_tty->print_cr("  "
+                             "  expanding:"
+                             "  max_heap_size: %6.1fK"
                              "  minimum_desired_capacity: %6.1fK"
                              "  expand_bytes: %6.1fK",
-                             minimum_desired_capacity / (double) K,
-                             expand_bytes / (double) K);
+                             (double) max_heap_size / (double) K,
+                             (double) minimum_desired_capacity / (double) K,
+                             (double) expand_bytes / (double) K);
     }
 
     // No expansion, now see if we want to shrink
-  } else if (capacity() > maximum_desired_capacity) {
+  } else if (capacity_after_gc > maximum_desired_capacity) {
     // Capacity too large, compute shrinking size
     size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity;
     shrink(shrink_bytes);
     if (PrintGC && Verbose) {
       gclog_or_tty->print_cr("  "
                              "  shrinking:"
-                             "  initSize: %.1fK"
-                             "  maximum_desired_capacity: %.1fK",
-                             collector_policy()->initial_heap_byte_size() / (double) K,
-                             maximum_desired_capacity / (double) K);
-      gclog_or_tty->print_cr("  "
-                             "  shrink_bytes: %.1fK",
-                             shrink_bytes / (double) K);
+                             "  min_heap_size: %6.1fK"
+                             "  maximum_desired_capacity: %6.1fK"
+                             "  shrink_bytes: %6.1fK",
+                             (double) min_heap_size / (double) K,
+                             (double) maximum_desired_capacity / (double) K,
+                             (double) shrink_bytes / (double) K);
     }
   }
 }
@@ -1322,6 +1356,7 @@
   SharedHeap(policy_),
   _g1_policy(policy_),
   _dirty_card_queue_set(false),
+  _into_cset_dirty_card_queue_set(false),
   _ref_processor(NULL),
   _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
@@ -1572,6 +1607,16 @@
                                       Shared_DirtyCardQ_lock,
                                       &JavaThread::dirty_card_queue_set());
   }
+
+  // Initialize the card queue set used to hold cards containing
+  // references into the collection set.
+  _into_cset_dirty_card_queue_set.initialize(DirtyCardQ_CBL_mon,
+                                             DirtyCardQ_FL_lock,
+                                             -1, // never trigger processing
+                                             -1, // no limit on length
+                                             Shared_DirtyCardQ_lock,
+                                             &JavaThread::dirty_card_queue_set());
+
   // In case we're keeping closure specialization stats, initialize those
   // counts and that mechanism.
   SpecializationStats::clear();
@@ -1603,14 +1648,16 @@
   return _g1_committed.byte_size();
 }
 
-void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
+void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
+                                                 DirtyCardQueue* into_cset_dcq,
+                                                 bool concurrent,
                                                  int worker_i) {
   // Clean cards in the hot card cache
-  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set());
+  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set(), into_cset_dcq);
 
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   int n_completed_buffers = 0;
-  while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
+  while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
   g1_policy()->record_update_rs_processed_buffers(worker_i,
@@ -2147,9 +2194,12 @@
   }
 }
 
-HeapWord* G1CollectedHeap::allocate_new_tlab(size_t size) {
+HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) {
+  assert(!isHumongous(word_size),
+         err_msg("a TLAB should not be of humongous size, "
+                 "word_size = "SIZE_FORMAT, word_size));
   bool dummy;
-  return G1CollectedHeap::mem_allocate(size, false, true, &dummy);
+  return G1CollectedHeap::mem_allocate(word_size, false, true, &dummy);
 }
 
 bool G1CollectedHeap::allocs_are_zero_filled() {
@@ -2692,6 +2742,35 @@
   }
 };
 
+#if TASKQUEUE_STATS
+void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) {
+  st->print_raw_cr("GC Task Stats");
+  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
+  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
+}
+
+void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const {
+  print_taskqueue_stats_hdr(st);
+
+  TaskQueueStats totals;
+  const int n = MAX2(workers()->total_workers(), 1);
+  for (int i = 0; i < n; ++i) {
+    st->print("%3d ", i); task_queue(i)->stats.print(st); st->cr();
+    totals += task_queue(i)->stats;
+  }
+  st->print_raw("tot "); totals.print(st); st->cr();
+
+  DEBUG_ONLY(totals.verify());
+}
+
+void G1CollectedHeap::reset_taskqueue_stats() {
+  const int n = MAX2(workers()->total_workers(), 1);
+  for (int i = 0; i < n; ++i) {
+    task_queue(i)->stats.reset();
+  }
+}
+#endif // TASKQUEUE_STATS
+
 void
 G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
   if (GC_locker::check_active_before_gc()) {
@@ -2825,93 +2904,57 @@
       g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-      // Now choose the CS. We may abandon a pause if we find no
-      // region that will fit in the MMU pause.
-      bool abandoned = g1_policy()->choose_collection_set(target_pause_time_ms);
+      g1_policy()->choose_collection_set(target_pause_time_ms);
 
       // Nothing to do if we were unable to choose a collection set.
-      if (!abandoned) {
 #if G1_REM_SET_LOGGING
-        gclog_or_tty->print_cr("\nAfter pause, heap:");
-        print();
+      gclog_or_tty->print_cr("\nAfter pause, heap:");
+      print();
 #endif
-        PrepareForRSScanningClosure prepare_for_rs_scan;
-        collection_set_iterate(&prepare_for_rs_scan);
-
-        setup_surviving_young_words();
-
-        // Set up the gc allocation regions.
-        get_gc_alloc_regions();
-
-        // Actually do the work...
-        evacuate_collection_set();
-
-        free_collection_set(g1_policy()->collection_set());
-        g1_policy()->clear_collection_set();
-
-        cleanup_surviving_young_words();
-
-        // Start a new incremental collection set for the next pause.
-        g1_policy()->start_incremental_cset_building();
-
-        // Clear the _cset_fast_test bitmap in anticipation of adding
-        // regions to the incremental collection set for the next
-        // evacuation pause.
-        clear_cset_fast_test();
-
-        if (g1_policy()->in_young_gc_mode()) {
-          _young_list->reset_sampled_info();
-
-          // Don't check the whole heap at this point as the
-          // GC alloc regions from this pause have been tagged
-          // as survivors and moved on to the survivor list.
-          // Survivor regions will fail the !is_young() check.
-          assert(check_young_list_empty(false /* check_heap */),
-              "young list should be empty");
+      PrepareForRSScanningClosure prepare_for_rs_scan;
+      collection_set_iterate(&prepare_for_rs_scan);
+
+      setup_surviving_young_words();
+
+      // Set up the gc allocation regions.
+      get_gc_alloc_regions();
+
+      // Actually do the work...
+      evacuate_collection_set();
+
+      free_collection_set(g1_policy()->collection_set());
+      g1_policy()->clear_collection_set();
+
+      cleanup_surviving_young_words();
+
+      // Start a new incremental collection set for the next pause.
+      g1_policy()->start_incremental_cset_building();
+
+      // Clear the _cset_fast_test bitmap in anticipation of adding
+      // regions to the incremental collection set for the next
+      // evacuation pause.
+      clear_cset_fast_test();
+
+      if (g1_policy()->in_young_gc_mode()) {
+        _young_list->reset_sampled_info();
+
+        // Don't check the whole heap at this point as the
+        // GC alloc regions from this pause have been tagged
+        // as survivors and moved on to the survivor list.
+        // Survivor regions will fail the !is_young() check.
+        assert(check_young_list_empty(false /* check_heap */),
+               "young list should be empty");
 
 #if YOUNG_LIST_VERBOSE
-          gclog_or_tty->print_cr("Before recording survivors.\nYoung List:");
-          _young_list->print();
+        gclog_or_tty->print_cr("Before recording survivors.\nYoung List:");
+        _young_list->print();
 #endif // YOUNG_LIST_VERBOSE
 
-          g1_policy()->record_survivor_regions(_young_list->survivor_length(),
+        g1_policy()->record_survivor_regions(_young_list->survivor_length(),
                                           _young_list->first_survivor_region(),
                                           _young_list->last_survivor_region());
 
-          _young_list->reset_auxilary_lists();
-        }
-      } else {
-        // We have abandoned the current collection. This can only happen
-        // if we're not doing young or partially young collections, and
-        // we didn't find an old region that we're able to collect within
-        // the allowed time.
-
-        assert(g1_policy()->collection_set() == NULL, "should be");
-        assert(_young_list->length() == 0, "because it should be");
-
-        // This should be a no-op.
-        abandon_collection_set(g1_policy()->inc_cset_head());
-
-        g1_policy()->clear_incremental_cset();
-        g1_policy()->stop_incremental_cset_building();
-
-        // Start a new incremental collection set for the next pause.
-        g1_policy()->start_incremental_cset_building();
-
-        // Clear the _cset_fast_test bitmap in anticipation of adding
-        // regions to the incremental collection set for the next
-        // evacuation pause.
-        clear_cset_fast_test();
-
-        // This looks confusing, because the DPT should really be empty
-        // at this point -- since we have not done any collection work,
-        // there should not be any derived pointers in the table to update;
-        // however, there is some additional state in the DPT which is
-        // reset at the end of the (null) "gc" here via the following call.
-        // A better approach might be to split off that state resetting work
-        // into a separate method that asserts that the DPT is empty and call
-        // that here. That is deferred for now.
-        COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+        _young_list->reset_auxilary_lists();
       }
 
       if (evacuation_failed()) {
@@ -2945,7 +2988,7 @@
       double end_time_sec = os::elapsedTime();
       double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
       g1_policy()->record_pause_time_ms(pause_time_ms);
-      g1_policy()->record_collection_pause_end(abandoned);
+      g1_policy()->record_collection_pause_end();
 
       assert(regions_accounted_for(), "Region leakage.");
 
@@ -2988,6 +3031,9 @@
     }
   }
 
+  TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
+  TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
+
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
@@ -3346,25 +3392,6 @@
   }
 };
 
-class UpdateRSetImmediate : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem_set;
-public:
-  UpdateRSetImmediate(G1CollectedHeap* g1) :
-    _g1(g1), _g1_rem_set(g1->g1_rem_set()) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
-      _g1_rem_set->par_write_ref(_from, p, 0);
-    }
-  }
-};
-
 class UpdateRSetDeferred : public OopsInHeapRegionClosure {
 private:
   G1CollectedHeap* _g1;
@@ -3389,8 +3416,6 @@
   }
 };
 
-
-
 class RemoveSelfPointerClosure: public ObjectClosure {
 private:
   G1CollectedHeap* _g1;
@@ -3453,7 +3478,7 @@
 };
 
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h);
+  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
   DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
   UpdateRSetDeferred deferred_update(_g1h, &dcq);
   OopsInHeapRegionClosure *cl;
@@ -3583,7 +3608,7 @@
   if (!r->evacuation_failed()) {
     r->set_evacuation_failed(true);
     if (G1PrintHeapRegions) {
-      gclog_or_tty->print("evacuation failed in heap region "PTR_FORMAT" "
+      gclog_or_tty->print("overflow in heap region "PTR_FORMAT" "
                           "["PTR_FORMAT","PTR_FORMAT")\n",
                           r, r->bottom(), r->end());
     }
@@ -3617,6 +3642,10 @@
 
 HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
                                                   size_t word_size) {
+  assert(!isHumongous(word_size),
+         err_msg("we should not be seeing humongous allocation requests "
+                 "during GC, word_size = "SIZE_FORMAT, word_size));
+
   HeapRegion* alloc_region = _gc_alloc_regions[purpose];
   // let the caller handle alloc failure
   if (alloc_region == NULL) return NULL;
@@ -3649,6 +3678,10 @@
                                          HeapRegion*    alloc_region,
                                          bool           par,
                                          size_t         word_size) {
+  assert(!isHumongous(word_size),
+         err_msg("we should not be seeing humongous allocation requests "
+                 "during GC, word_size = "SIZE_FORMAT, word_size));
+
   HeapWord* block = NULL;
   // In the parallel case, a previous thread to obtain the lock may have
   // already assigned a new gc_alloc_region.
@@ -3754,10 +3787,6 @@
     _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
-#if G1_DETAILED_STATS
-    _pushes(0), _pops(0), _steals(0),
-    _steal_attempts(0),  _overflow_pushes(0),
-#endif
     _strong_roots_time(0), _term_time(0),
     _alloc_buffer_waste(0), _undo_waste(0)
 {
@@ -3777,14 +3806,41 @@
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
   memset(_surviving_young_words, 0, real_length * sizeof(size_t));
 
-  _overflowed_refs = new OverflowQueue(10);
-
   _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
   _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
 
   _start = os::elapsedTime();
 }
 
+void
+G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination-------"
+                   " ------waste (KiB)------");
+  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts"
+                   "  total   alloc    undo");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ --------"
+                   " ------- ------- -------");
+}
+
+void
+G1ParScanThreadState::print_termination_stats(int i,
+                                              outputStream* const st) const
+{
+  const double elapsed_ms = elapsed_time() * 1000.0;
+  const double s_roots_ms = strong_roots_time() * 1000.0;
+  const double term_ms    = term_time() * 1000.0;
+  st->print_cr("%3d %9.2f %9.2f %6.2f "
+               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
+               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
+               i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+               term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
+               (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K,
+               alloc_buffer_waste() * HeapWordSize / K,
+               undo_waste() * HeapWordSize / K);
+}
+
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state) { }
@@ -3991,12 +4047,9 @@
     G1ParScanThreadState* pss = par_scan_state();
     while (true) {
       pss->trim_queue();
-      IF_G1_DETAILED_STATS(pss->note_steal_attempt());
 
       StarTask stolen_task;
       if (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) {
-        IF_G1_DETAILED_STATS(pss->note_steal());
-
         // slightly paranoid tests; I'm trying to catch potential
         // problems before we go into push_on_queue to know where the
         // problem is coming from
@@ -4115,35 +4168,9 @@
     // Clean up any par-expanded rem sets.
     HeapRegionRemSet::par_cleanup();
 
-    MutexLocker x(stats_lock());
     if (ParallelGCVerbose) {
-      gclog_or_tty->print("Thread %d complete:\n", i);
-#if G1_DETAILED_STATS
-      gclog_or_tty->print("  Pushes: %7d    Pops: %7d   Overflows: %7d   Steals %7d (in %d attempts)\n",
-                          pss.pushes(),
-                          pss.pops(),
-                          pss.overflow_pushes(),
-                          pss.steals(),
-                          pss.steal_attempts());
-#endif
-      double elapsed      = pss.elapsed();
-      double strong_roots = pss.strong_roots_time();
-      double term         = pss.term_time();
-      gclog_or_tty->print("  Elapsed: %7.2f ms.\n"
-                          "    Strong roots: %7.2f ms (%6.2f%%)\n"
-                          "    Termination:  %7.2f ms (%6.2f%%) "
-                                                 "(in "SIZE_FORMAT" entries)\n",
-                          elapsed * 1000.0,
-                          strong_roots * 1000.0, (strong_roots*100.0/elapsed),
-                          term * 1000.0, (term*100.0/elapsed),
-                          pss.term_attempts());
-      size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste();
-      gclog_or_tty->print("  Waste: %8dK\n"
-                 "    Alloc Buffer: %8dK\n"
-                 "    Undo: %8dK\n",
-                 (total_waste * HeapWordSize) / K,
-                 (pss.alloc_buffer_waste() * HeapWordSize) / K,
-                 (pss.undo_waste() * HeapWordSize) / K);
+      MutexLocker x(stats_lock());
+      pss.print_termination_stats(i);
     }
 
     assert(pss.refs_to_scan() == 0, "Task queue should be empty");
@@ -4260,6 +4287,7 @@
   if (ParallelGCThreads > 0) {
     // The individual threads will set their evac-failure closures.
     StrongRootsScope srs(this);
+    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
     workers()->run_task(&g1_par_task);
   } else {
     StrongRootsScope srs(this);
@@ -4293,7 +4321,7 @@
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
     if (PrintGCDetails) {
-      gclog_or_tty->print(" (evacuation failed)");
+      gclog_or_tty->print(" (to-space overflow)");
     } else if (PrintGC) {
       gclog_or_tty->print("--");
     }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -46,17 +46,7 @@
 class ConcurrentG1Refine;
 class ConcurrentZFThread;
 
-// If want to accumulate detailed statistics on work queues
-// turn this on.
-#define G1_DETAILED_STATS 0
-
-#if G1_DETAILED_STATS
-#  define IF_G1_DETAILED_STATS(code) code
-#else
-#  define IF_G1_DETAILED_STATS(code)
-#endif
-
-typedef GenericTaskQueue<StarTask>          RefToScanQueue;
+typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
 
 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
@@ -471,6 +461,12 @@
   virtual void shrink(size_t expand_bytes);
   void shrink_helper(size_t expand_bytes);
 
+  #if TASKQUEUE_STATS
+  static void print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_taskqueue_stats(outputStream* const st = gclog_or_tty) const;
+  void reset_taskqueue_stats();
+  #endif // TASKQUEUE_STATS
+
   // Do an incremental collection: identify a collection set, and evacuate
   // its live objects elsewhere.
   virtual void do_collection_pause();
@@ -505,6 +501,12 @@
   // A function to check the consistency of dirty card logs.
   void check_ct_logs_at_safepoint();
 
+  // A DirtyCardQueueSet that is used to hold cards that contain
+  // references into the current collection set. This is used to
+  // update the remembered sets of the regions in the collection
+  // set in the event of an evacuation failure.
+  DirtyCardQueueSet _into_cset_dirty_card_queue_set;
+
   // After a collection pause, make the regions in the CS into free
   // regions.
   void free_collection_set(HeapRegion* cs_head);
@@ -656,11 +658,18 @@
 public:
   void set_refine_cte_cl_concurrency(bool concurrent);
 
-  RefToScanQueue *task_queue(int i);
+  RefToScanQueue *task_queue(int i) const;
 
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
 
+  // A DirtyCardQueueSet that is used to hold cards that contain
+  // references into the current collection set. This is used to
+  // update the remembered sets of the regions in the collection
+  // set in the event of an evacuation failure.
+  DirtyCardQueueSet& into_cset_dirty_card_queue_set()
+        { return _into_cset_dirty_card_queue_set; }
+
   // Create a G1CollectedHeap with the specified policy.
   // Must call the initialize method afterwards.
   // May not return if something goes wrong.
@@ -715,7 +724,9 @@
     OrderAccess::fence();
   }
 
-  void iterate_dirty_card_closure(bool concurrent, int worker_i);
+  void iterate_dirty_card_closure(CardTableEntryClosure* cl,
+                                  DirtyCardQueue* into_cset_dcq,
+                                  bool concurrent, int worker_i);
 
   // The shared block offset table array.
   G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
@@ -1021,7 +1032,7 @@
   virtual bool supports_tlab_allocation() const;
   virtual size_t tlab_capacity(Thread* thr) const;
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
-  virtual HeapWord* allocate_new_tlab(size_t size);
+  virtual HeapWord* allocate_new_tlab(size_t word_size);
 
   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
@@ -1564,9 +1575,6 @@
   CardTableModRefBS* _ct_bs;
   G1RemSet* _g1_rem;
 
-  typedef GrowableArray<StarTask> OverflowQueue;
-  OverflowQueue* _overflowed_refs;
-
   G1ParGCAllocBuffer  _surviving_alloc_buffer;
   G1ParGCAllocBuffer  _tenured_alloc_buffer;
   G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
@@ -1583,10 +1591,6 @@
   int _queue_num;
 
   size_t _term_attempts;
-#if G1_DETAILED_STATS
-  int _pushes, _pops, _steals, _steal_attempts;
-  int _overflow_pushes;
-#endif
 
   double _start;
   double _start_strong_roots;
@@ -1600,7 +1604,7 @@
   // this points into the array, as we use the first few entries for padding
   size_t* _surviving_young_words;
 
-#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
 
   void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
 
@@ -1635,15 +1639,14 @@
   }
 
   RefToScanQueue*   refs()            { return _refs;             }
-  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
   ageTable*         age_table()       { return &_age_table;       }
 
   G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return _alloc_buffers[purpose];
   }
 
-  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
-  size_t undo_waste()                            { return _undo_waste; }
+  size_t alloc_buffer_waste() const              { return _alloc_buffer_waste; }
+  size_t undo_waste() const                      { return _undo_waste; }
 
   template <class T> void push_on_queue(T* ref) {
     assert(ref != NULL, "invariant");
@@ -1656,12 +1659,7 @@
       assert(_g1h->obj_in_cs(p), "Should be in CS");
     }
 #endif
-    if (!refs()->push(ref)) {
-      overflowed_refs()->push(ref);
-      IF_G1_DETAILED_STATS(note_overflow_push());
-    } else {
-      IF_G1_DETAILED_STATS(note_push());
-    }
+    refs()->push(ref);
   }
 
   void pop_from_queue(StarTask& ref) {
@@ -1672,7 +1670,6 @@
              _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
                                                      : oopDesc::load_decode_heap_oop((oop*)ref)),
               "invariant");
-      IF_G1_DETAILED_STATS(note_pop());
     } else {
       StarTask null_task;
       ref = null_task;
@@ -1680,7 +1677,8 @@
   }
 
   void pop_from_overflow_queue(StarTask& ref) {
-    StarTask new_ref = overflowed_refs()->pop();
+    StarTask new_ref;
+    refs()->pop_overflow(new_ref);
     assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
     assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
     assert(has_partial_array_mask((oop*)new_ref) ||
@@ -1690,8 +1688,8 @@
     ref = new_ref;
   }
 
-  int refs_to_scan()                             { return refs()->size();                 }
-  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+  int refs_to_scan()            { return refs()->size(); }
+  int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); }
 
   template <class T> void update_rs(HeapRegion* from, T* p, int tid) {
     if (G1DeferredRSUpdate) {
@@ -1760,30 +1758,16 @@
   int* hash_seed() { return &_hash_seed; }
   int  queue_num() { return _queue_num; }
 
-  size_t term_attempts()   { return _term_attempts; }
+  size_t term_attempts() const  { return _term_attempts; }
   void note_term_attempt() { _term_attempts++; }
 
-#if G1_DETAILED_STATS
-  int pushes()          { return _pushes; }
-  int pops()            { return _pops; }
-  int steals()          { return _steals; }
-  int steal_attempts()  { return _steal_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-
-  void note_push()          { _pushes++; }
-  void note_pop()           { _pops++; }
-  void note_steal()         { _steals++; }
-  void note_steal_attempt() { _steal_attempts++; }
-  void note_overflow_push() { _overflow_pushes++; }
-#endif
-
   void start_strong_roots() {
     _start_strong_roots = os::elapsedTime();
   }
   void end_strong_roots() {
     _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
   }
-  double strong_roots_time() { return _strong_roots_time; }
+  double strong_roots_time() const { return _strong_roots_time; }
 
   void start_term_time() {
     note_term_attempt();
@@ -1792,12 +1776,17 @@
   void end_term_time() {
     _term_time += (os::elapsedTime() - _start_term);
   }
-  double term_time() { return _term_time; }
+  double term_time() const { return _term_time; }
 
-  double elapsed() {
+  double elapsed_time() const {
     return os::elapsedTime() - _start;
   }
 
+  static void
+    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void
+    print_termination_stats(int i, outputStream* const st = gclog_or_tty) const;
+
   size_t* surviving_young_words() {
     // We add on to hide entry 0 which accumulates surviving words for
     // age -1 regions (i.e. non-young ones)
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -57,8 +57,9 @@
   assert( SafepointSynchronize::is_at_safepoint() ||
           Heap_lock->owned_by_self(), "pre-condition of the call" );
 
-  if (_cur_alloc_region != NULL) {
-
+  // All humongous allocation requests should go through the slow path in
+  // attempt_allocation_slow().
+  if (!isHumongous(word_size) && _cur_alloc_region != NULL) {
     // If this allocation causes a region to become non empty,
     // then we need to update our free_regions count.
 
@@ -69,23 +70,23 @@
     } else {
       res = _cur_alloc_region->allocate(word_size);
     }
-  }
-  if (res != NULL) {
-    if (!SafepointSynchronize::is_at_safepoint()) {
-      assert( Heap_lock->owned_by_self(), "invariant" );
-      Heap_lock->unlock();
+
+    if (res != NULL) {
+      if (!SafepointSynchronize::is_at_safepoint()) {
+        assert( Heap_lock->owned_by_self(), "invariant" );
+        Heap_lock->unlock();
+      }
+      return res;
     }
-    return res;
   }
   // attempt_allocation_slow will also unlock the heap lock when appropriate.
   return attempt_allocation_slow(word_size, permit_collection_pause);
 }
 
-inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const {
   return _task_queues->queue(i);
 }
 
-
 inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
   return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -88,7 +88,6 @@
   _all_mod_union_times_ms(new NumberSeq()),
 
   _summary(new Summary()),
-  _abandoned_summary(new AbandonedSummary()),
 
 #ifndef PRODUCT
   _cur_clear_ct_time_ms(0.0),
@@ -238,7 +237,6 @@
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
 
   _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
-  _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads];
 
   _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
 
@@ -842,7 +840,6 @@
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
-    _par_last_scan_new_refs_times_ms[i] = -1234.0;
     _par_last_obj_copy_times_ms[i] = -1234.0;
     _par_last_termination_times_ms[i] = -1234.0;
     _par_last_termination_attempts[i] = -1234.0;
@@ -1126,7 +1123,7 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
+void G1CollectorPolicy::record_collection_pause_end() {
   double end_time_sec = os::elapsedTime();
   double elapsed_ms = _last_pause_time_ms;
   bool parallel = ParallelGCThreads > 0;
@@ -1136,7 +1133,7 @@
   size_t cur_used_bytes = _g1->used();
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
-  bool update_stats = !abandoned && !_g1->evacuation_failed();
+  bool update_stats = !_g1->evacuation_failed();
 
 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1275,12 +1272,7 @@
     gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
   }
 
-  PauseSummary* summary;
-  if (abandoned) {
-    summary = _abandoned_summary;
-  } else {
-    summary = _summary;
-  }
+  PauseSummary* summary = _summary;
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
   double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
@@ -1348,61 +1340,58 @@
 
   double other_time_ms = elapsed_ms;
 
-  if (!abandoned) {
-    if (_satb_drain_time_set)
-      other_time_ms -= _cur_satb_drain_time_ms;
-
-    if (parallel)
-      other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
-    else
-      other_time_ms -=
-        update_rs_time +
-        ext_root_scan_time + mark_stack_scan_time +
-        scan_rs_time + obj_copy_time;
+  if (_satb_drain_time_set) {
+    other_time_ms -= _cur_satb_drain_time_ms;
+  }
+
+  if (parallel) {
+    other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
+  } else {
+    other_time_ms -=
+      update_rs_time +
+      ext_root_scan_time + mark_stack_scan_time +
+      scan_rs_time + obj_copy_time;
   }
 
   if (PrintGCDetails) {
-    gclog_or_tty->print_cr("%s%s, %1.8lf secs]",
-                           abandoned ? " (abandoned)" : "",
+    gclog_or_tty->print_cr("%s, %1.8lf secs]",
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (!abandoned) {
-      if (_satb_drain_time_set) {
-        print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-      }
-      if (_last_satb_drain_processed_buffers >= 0) {
-        print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
-      }
-      if (parallel) {
-        print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
-        print_par_stats(2, "GC Worker Start Time",
-                        _par_last_gc_worker_start_times_ms, false);
-        print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-        print_par_sizes(3, "Processed Buffers",
-                        _par_last_update_rs_processed_buffers, true);
-        print_par_stats(2, "Ext Root Scanning",
-                        _par_last_ext_root_scan_times_ms);
-        print_par_stats(2, "Mark Stack Scanning",
-                        _par_last_mark_stack_scan_times_ms);
-        print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
-        print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
-        print_par_stats(2, "Termination", _par_last_termination_times_ms);
-        print_par_sizes(3, "Termination Attempts",
-                        _par_last_termination_attempts, true);
-        print_par_stats(2, "GC Worker End Time",
-                        _par_last_gc_worker_end_times_ms, false);
-        print_stats(2, "Other", parallel_other_time);
-        print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
-      } else {
-        print_stats(1, "Update RS", update_rs_time);
-        print_stats(2, "Processed Buffers",
-                    (int)update_rs_processed_buffers);
-        print_stats(1, "Ext Root Scanning", ext_root_scan_time);
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
-        print_stats(1, "Scan RS", scan_rs_time);
-        print_stats(1, "Object Copying", obj_copy_time);
-      }
+    if (_satb_drain_time_set) {
+      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
+    }
+    if (_last_satb_drain_processed_buffers >= 0) {
+      print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
+    }
+    if (parallel) {
+      print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
+      print_par_stats(2, "GC Worker Start Time",
+                      _par_last_gc_worker_start_times_ms, false);
+      print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
+      print_par_sizes(3, "Processed Buffers",
+                      _par_last_update_rs_processed_buffers, true);
+      print_par_stats(2, "Ext Root Scanning",
+                      _par_last_ext_root_scan_times_ms);
+      print_par_stats(2, "Mark Stack Scanning",
+                      _par_last_mark_stack_scan_times_ms);
+      print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
+      print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
+      print_par_stats(2, "Termination", _par_last_termination_times_ms);
+      print_par_sizes(3, "Termination Attempts",
+                      _par_last_termination_attempts, true);
+      print_par_stats(2, "GC Worker End Time",
+                      _par_last_gc_worker_end_times_ms, false);
+      print_stats(2, "Other", parallel_other_time);
+      print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+    } else {
+      print_stats(1, "Update RS", update_rs_time);
+      print_stats(2, "Processed Buffers",
+                  (int)update_rs_processed_buffers);
+      print_stats(1, "Ext Root Scanning", ext_root_scan_time);
+      print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+      print_stats(1, "Scan RS", scan_rs_time);
+      print_stats(1, "Object Copying", obj_copy_time);
     }
 #ifndef PRODUCT
     print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
@@ -2178,33 +2167,27 @@
     print_summary(1, "Other", summary->get_other_seq());
     {
       NumberSeq calc_other_times_ms;
-      if (body_summary != NULL) {
-        // not abandoned
-        if (parallel) {
-          // parallel
-          NumberSeq* other_parts[] = {
-            body_summary->get_satb_drain_seq(),
-            body_summary->get_parallel_seq(),
-            body_summary->get_clear_ct_seq()
-          };
-          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                          3, other_parts);
-        } else {
-          // serial
-          NumberSeq* other_parts[] = {
-            body_summary->get_satb_drain_seq(),
-            body_summary->get_update_rs_seq(),
-            body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
-            body_summary->get_scan_rs_seq(),
-            body_summary->get_obj_copy_seq()
-          };
-          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                          7, other_parts);
-        }
+      if (parallel) {
+        // parallel
+        NumberSeq* other_parts[] = {
+          body_summary->get_satb_drain_seq(),
+          body_summary->get_parallel_seq(),
+          body_summary->get_clear_ct_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        3, other_parts);
       } else {
-        // abandoned
-        calc_other_times_ms = NumberSeq();
+        // serial
+        NumberSeq* other_parts[] = {
+          body_summary->get_satb_drain_seq(),
+          body_summary->get_update_rs_seq(),
+          body_summary->get_ext_root_scan_seq(),
+          body_summary->get_mark_stack_scan_seq(),
+          body_summary->get_scan_rs_seq(),
+          body_summary->get_obj_copy_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        7, other_parts);
       }
       check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
     }
@@ -2215,20 +2198,6 @@
   gclog_or_tty->print_cr("");
 }
 
-void
-G1CollectorPolicy::print_abandoned_summary(PauseSummary* summary) const {
-  bool printed = false;
-  if (summary->get_total_seq()->num() > 0) {
-    printed = true;
-    print_summary(summary);
-  }
-  if (!printed) {
-    print_indent(0);
-    gclog_or_tty->print_cr("none");
-    gclog_or_tty->print_cr("");
-  }
-}
-
 void G1CollectorPolicy::print_tracing_info() const {
   if (TraceGen0Time) {
     gclog_or_tty->print_cr("ALL PAUSES");
@@ -2242,9 +2211,6 @@
     gclog_or_tty->print_cr("EVACUATION PAUSES");
     print_summary(_summary);
 
-    gclog_or_tty->print_cr("ABANDONED PAUSES");
-    print_abandoned_summary(_abandoned_summary);
-
     gclog_or_tty->print_cr("MISC");
     print_summary_sd(0, "Stop World", _all_stop_world_times_ms);
     print_summary_sd(0, "Yields", _all_yield_times_ms);
@@ -2870,19 +2836,12 @@
 }
 #endif // !PRODUCT
 
-bool
+void
 G1CollectorPolicy_BestRegionsFirst::choose_collection_set(
                                                   double target_pause_time_ms) {
   // Set this here - in case we're not doing young collections.
   double non_young_start_time_sec = os::elapsedTime();
 
-  // The result that this routine will return. This will be set to
-  // false if:
-  // * we're doing a young or partially young collection and we
-  //   have added the youg regions to collection set, or
-  // * we add old regions to the collection set.
-  bool abandon_collection = true;
-
   start_recording_regions();
 
   guarantee(target_pause_time_ms > 0.0,
@@ -2986,10 +2945,6 @@
     }
 
     assert(_inc_cset_size == _g1->young_list()->length(), "Invariant");
-    if (_inc_cset_size > 0) {
-      assert(_collection_set != NULL, "Invariant");
-      abandon_collection = false;
-    }
 
     double young_end_time_sec = os::elapsedTime();
     _recorded_young_cset_choice_time_ms =
@@ -3011,10 +2966,6 @@
     NumberSeq seq;
     double avg_prediction = 100000000000000000.0; // something very large
 
-    // Save the current size of the collection set to detect
-    // if we actually added any old regions.
-    size_t n_young_regions = _collection_set_size;
-
     do {
       hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
                                                       avg_prediction);
@@ -3041,12 +2992,6 @@
     if (!adaptive_young_list_length() &&
         _collection_set_size < _young_list_fixed_length)
       _should_revert_to_full_young_gcs  = true;
-
-    if (_collection_set_size > n_young_regions) {
-      // We actually added old regions to the collection set
-      // so we are not abandoning this collection.
-      abandon_collection = false;
-    }
   }
 
 choose_collection_set_end:
@@ -3059,19 +3004,6 @@
   double non_young_end_time_sec = os::elapsedTime();
   _recorded_non_young_cset_choice_time_ms =
     (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
-
-  // Here we are supposed to return whether the pause should be
-  // abandoned or not (i.e., whether the collection set is empty or
-  // not). However, this introduces a subtle issue when a pause is
-  // initiated explicitly with System.gc() and
-  // +ExplicitGCInvokesConcurrent (see Comment #2 in CR 6944166), it's
-  // supposed to start a marking cycle, and it's abandoned. So, by
-  // returning false here we are telling the caller never to consider
-  // a pause to be abandoned. We'll actually remove all the code
-  // associated with abandoned pauses as part of CR 6963209, but we are
-  // just disabling them this way for the moment to avoid increasing
-  // further the amount of changes for CR 6944166.
-  return false;
 }
 
 void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() {
@@ -3086,7 +3018,7 @@
 }
 
 void G1CollectorPolicy_BestRegionsFirst::
-record_collection_pause_end(bool abandoned) {
-  G1CollectorPolicy::record_collection_pause_end(abandoned);
+record_collection_pause_end() {
+  G1CollectorPolicy::record_collection_pause_end();
   assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -63,8 +63,6 @@
     define_num_seq(mark_stack_scan)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
-    define_num_seq(scan_new_refs) // Only for temp use; added to
-                                  // in parallel case.
     define_num_seq(obj_copy)
     define_num_seq(termination) // parallel only
     define_num_seq(parallel_other) // parallel only
@@ -78,9 +76,6 @@
   virtual MainBodySummary*    main_body_summary()    { return this; }
 };
 
-class AbandonedSummary: public PauseSummary {
-};
-
 class G1CollectorPolicy: public CollectorPolicy {
 protected:
   // The number of pauses during the execution.
@@ -150,7 +145,6 @@
   TruncatedSeq* _concurrent_mark_cleanup_times_ms;
 
   Summary*           _summary;
-  AbandonedSummary*  _abandoned_summary;
 
   NumberSeq* _all_pause_times_ms;
   NumberSeq* _all_full_gc_times_ms;
@@ -177,7 +171,6 @@
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
-  double* _par_last_scan_new_refs_times_ms;
   double* _par_last_obj_copy_times_ms;
   double* _par_last_termination_times_ms;
   double* _par_last_termination_attempts;
@@ -576,7 +569,6 @@
                          NumberSeq* calc_other_times_ms) const;
 
   void print_summary (PauseSummary* stats) const;
-  void print_abandoned_summary(PauseSummary* summary) const;
 
   void print_summary (int level, const char* str, NumberSeq* seq) const;
   void print_summary_sd (int level, const char* str, NumberSeq* seq) const;
@@ -889,7 +881,7 @@
   virtual void record_collection_pause_end_CH_strong_roots();
   virtual void record_collection_pause_end_G1_strong_roots();
 
-  virtual void record_collection_pause_end(bool abandoned);
+  virtual void record_collection_pause_end();
 
   // Record the fact that a full collection occurred.
   virtual void record_full_collection_start();
@@ -933,14 +925,6 @@
     _par_last_scan_rs_times_ms[thread] = ms;
   }
 
-  void record_scan_new_refs_time(int thread, double ms) {
-    _par_last_scan_new_refs_times_ms[thread] = ms;
-  }
-
-  double get_scan_new_refs_time(int thread) {
-    return _par_last_scan_new_refs_times_ms[thread];
-  }
-
   void reset_obj_copy_time(int thread) {
     _par_last_obj_copy_times_ms[thread] = 0.0;
   }
@@ -1010,7 +994,7 @@
   // Choose a new collection set.  Marks the chosen regions as being
   // "in_collection_set", and links them together.  The head and number of
   // the collection set are available via access methods.
-  virtual bool choose_collection_set(double target_pause_time_ms) = 0;
+  virtual void choose_collection_set(double target_pause_time_ms) = 0;
 
   // The head of the list (via "next_in_collection_set()") representing the
   // current collection set.
@@ -1267,7 +1251,7 @@
   // If the estimated is less then desirable, resize if possible.
   void expand_if_possible(size_t numRegions);
 
-  virtual bool choose_collection_set(double target_pause_time_ms);
+  virtual void choose_collection_set(double target_pause_time_ms);
   virtual void record_collection_pause_start(double start_time_sec,
                                              size_t start_used);
   virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
@@ -1278,7 +1262,7 @@
   G1CollectorPolicy_BestRegionsFirst() {
     _collectionSetChooser = new CollectionSetChooser();
   }
-  void record_collection_pause_end(bool abandoned);
+  void record_collection_pause_end();
   bool should_do_collection_pause(size_t word_size);
   // This is not needed any more, after the CSet choosing code was
   // changed to use the pause prediction work. But let's leave the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,8 @@
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
 #if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
-    _dcto_cl->incr_count();
+    if (_dcto_cl != NULL)
+      _dcto_cl->incr_count();
 #endif
   }
 }
@@ -113,7 +114,10 @@
     if (_g1->in_cset_fast_test(obj)) {
       Prefetch::write(obj->mark_addr(), 0);
       Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // Place on the references queue
       _par_scan_state->push_on_queue(p);
     }
   }
 }
+
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -122,23 +122,24 @@
 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
   : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
-    _par_traversal_in_progress(false), _new_refs(NULL),
+    _traversal_in_progress(false),
+    _cset_rs_update_cl(NULL),
     _cards_scanned(NULL), _total_cards_scanned(0)
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
   for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
+    _cset_rs_update_cl[i] = NULL;
   }
 }
 
 HRInto_G1RemSet::~HRInto_G1RemSet() {
   delete _seq_task;
   for (uint i = 0; i < n_workers(); i++) {
-    delete _new_refs[i];
+    assert(_cset_rs_update_cl[i] == NULL, "it should be");
   }
-  FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
+  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -306,12 +307,45 @@
   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
 }
 
-void HRInto_G1RemSet::updateRS(int worker_i) {
-  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+// Closure used for updating RSets and recording references that
+// point into the collection set. Only called during an
+// evacuation pause.
+
+class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
+  G1RemSet* _g1rs;
+  DirtyCardQueue* _into_cset_dcq;
+public:
+  RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
+                                              DirtyCardQueue* into_cset_dcq) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
+  {}
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    // The only time we care about recording cards that
+    // contain references that point into the collection set
+    // is during RSet updating within an evacuation pause.
+    // In this case worker_i should be the id of a GC worker thread.
+    assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
+    assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "should be a GC worker");
 
+    if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) {
+      // 'card_ptr' contains references that point into the collection
+      // set. We need to record the card in the DCQS
+      // (G1CollectedHeap::into_cset_dirty_card_queue_set())
+      // that's used for that purpose.
+      //
+      // Enqueue the card
+      _into_cset_dcq->enqueue(card_ptr);
+    }
+    return true;
+  }
+};
+
+void HRInto_G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, int worker_i) {
   double start = os::elapsedTime();
-  // Apply the appropriate closure to all remaining log entries.
-  _g1->iterate_dirty_card_closure(false, worker_i);
+  // Apply the given closure to all remaining log entries.
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
+  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
+
   // Now there should be no dirty cards.
   if (G1RSLogCheckCardTable) {
     CountNonCleanMemRegionClosure cl(_g1);
@@ -405,33 +439,6 @@
   }
 };
 
-template <class T> void
-HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
-                                    int worker_i) {
-  double scan_new_refs_start_sec = os::elapsedTime();
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
-  for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
-    T* p = (T*) _new_refs[worker_i]->at(i);
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    // *p was in the collection set when p was pushed on "_new_refs", but
-    // another thread may have processed this location from an RS, so it
-    // might not point into the CS any longer.  If so, it's obviously been
-    // processed, and we don't need to do anything further.
-    if (g1h->obj_in_cs(obj)) {
-      HeapRegion* r = g1h->heap_region_containing(p);
-
-      DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
-      oc->set_region(r);
-      // If "p" has already been processed concurrently, this is
-      // idempotent.
-      oc->do_oop(p);
-    }
-  }
-  double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0;
-  _g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms);
-}
-
 void HRInto_G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
@@ -457,32 +464,48 @@
     count_cl.print_histo();
   }
 
-  if (ParallelGCThreads > 0) {
-    // The two flags below were introduced temporarily to serialize
-    // the updating and scanning of remembered sets. There are some
-    // race conditions when these two operations are done in parallel
-    // and they are causing failures. When we resolve said race
-    // conditions, we'll revert back to parallel remembered set
-    // updating and scanning. See CRs 6677707 and 6677708.
-    if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-      updateRS(worker_i);
-      scanNewRefsRS(oc, worker_i);
-    } else {
-      _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
-      _g1p->record_update_rs_time(worker_i, 0.0);
-      _g1p->record_scan_new_refs_time(worker_i, 0.0);
-    }
-    if (G1UseParallelRSetScanning || (worker_i == 0)) {
-      scanRS(oc, worker_i);
-    } else {
-      _g1p->record_scan_rs_time(worker_i, 0.0);
-    }
+  // We cache the value of 'oc' closure into the appropriate slot in the
+  // _cset_rs_update_cl for this worker
+  assert(worker_i < (int)n_workers(), "sanity");
+  _cset_rs_update_cl[worker_i] = oc;
+
+  // A DirtyCardQueue that is used to hold cards containing references
+  // that point into the collection set. This DCQ is associated with a
+  // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
+  // circumstances (i.e. the pause successfully completes), these cards
+  // are just discarded (there's no need to update the RSets of regions
+  // that were in the collection set - after the pause these regions
+  // are wholly 'free' of live objects. In the event of an evacuation
+  // failure the cards/buffers in this queue set are:
+  // * passed to the DirtyCardQueueSet that is used to manage deferred
+  //   RSet updates, or
+  // * scanned for references that point into the collection set
+  //   and the RSet of the corresponding region in the collection set
+  //   is updated immediately.
+  DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
+
+  assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
+
+  // The two flags below were introduced temporarily to serialize
+  // the updating and scanning of remembered sets. There are some
+  // race conditions when these two operations are done in parallel
+  // and they are causing failures. When we resolve said race
+  // conditions, we'll revert back to parallel remembered set
+  // updating and scanning. See CRs 6677707 and 6677708.
+  if (G1UseParallelRSetUpdating || (worker_i == 0)) {
+    updateRS(&into_cset_dcq, worker_i);
   } else {
-    assert(worker_i == 0, "invariant");
-    updateRS(0);
-    scanNewRefsRS(oc, 0);
-    scanRS(oc, 0);
+    _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
+    _g1p->record_update_rs_time(worker_i, 0.0);
   }
+  if (G1UseParallelRSetScanning || (worker_i == 0)) {
+    scanRS(oc, worker_i);
+  } else {
+    _g1p->record_scan_rs_time(worker_i, 0.0);
+  }
+
+  // We now clear the cached values of _cset_rs_update_cl for this worker
+  _cset_rs_update_cl[worker_i] = NULL;
 }
 
 void HRInto_G1RemSet::
@@ -497,9 +520,9 @@
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
 
-  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  assert(!_traversal_in_progress, "Invariant between iterations.");
+  set_traversal(true);
   if (ParallelGCThreads > 0) {
-    set_par_traversal(true);
     _seq_task->set_par_threads((int)n_workers());
   }
   guarantee( _cards_scanned == NULL, "invariant" );
@@ -519,49 +542,65 @@
   }
 };
 
-class UpdateRSetOopsIntoCSImmediate : public OopClosure {
-  G1CollectedHeap* _g1;
-public:
-  UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
-    if (to->in_collection_set()) {
-      to->rem_set()->add_reference(p, 0);
-    }
-  }
-};
-
-class UpdateRSetOopsIntoCSDeferred : public OopClosure {
+// This closure, applied to a DirtyCardQueueSet, is used to immediately
+// update the RSets for the regions in the CSet. For each card it iterates
+// through the oops which coincide with that card. It scans the reference
+// fields in each oop; when it finds an oop that points into the collection
+// set, the RSet for the region containing the referenced object is updated.
+// Note: _par_traversal_in_progress in the G1RemSet must be FALSE; otherwise
+// the UpdateRSetImmediate closure will cause cards to be enqueued on to
+// the DCQS that we're iterating over, causing an infinite loop.
+class UpdateRSetCardTableEntryIntoCSetClosure: public CardTableEntryClosure {
   G1CollectedHeap* _g1;
   CardTableModRefBS* _ct_bs;
-  DirtyCardQueue* _dcq;
 public:
-  UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    if (_g1->obj_in_cs(obj)) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
+  UpdateRSetCardTableEntryIntoCSetClosure(G1CollectedHeap* g1,
+                                          CardTableModRefBS* bs):
+    _g1(g1), _ct_bs(bs)
+  { }
+
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    // Construct the region representing the card.
+    HeapWord* start = _ct_bs->addr_for(card_ptr);
+    // And find the region containing it.
+    HeapRegion* r = _g1->heap_region_containing(start);
+    assert(r != NULL, "unexpected null");
+
+    // Scan oops in the card looking for references into the collection set
+    HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+    MemRegion scanRegion(start, end);
+
+    UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set());
+    FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl);
+    FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl);
+
+    // We can pass false as the "filter_young" parameter here as:
+    // * we should be in a STW pause,
+    // * the DCQS to which this closure is applied is used to hold
+    //   references that point into the collection set from the prior
+    //   RSet updating,
+    // * the post-write barrier shouldn't be logging updates to young
+    //   regions (but there is a situation where this can happen - see
+    //   the comment in HRInto_G1RemSet::concurrentRefineOneCard below -
+    //   that should not be applicable here), and
+    // * during actual RSet updating, the filtering of cards in young
+    //   regions in HeapRegion::oops_on_card_seq_iterate_careful is
+    //   employed.
+    // As a result, when this closure is applied to "refs into cset"
+    // DCQS, we shouldn't see any cards in young regions.
+    update_rs_cl.set_region(r);
+    HeapWord* stop_point =
+      r->oops_on_card_seq_iterate_careful(scanRegion,
+                                        &filter_then_update_rs_cset_oop_cl,
+                                        false /* filter_young */);
+
+    // Since this is performed in the event of an evacuation failure, we
+    // we shouldn't see a non-null stop point
+    assert(stop_point == NULL, "saw an unallocated region");
+    return true;
   }
 };
 
-template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
-  for (size_t i = 0; i < n_workers(); i++) {
-    for (int j = 0; j < _new_refs[i]->length(); j++) {
-      T* p = (T*) _new_refs[i]->at(j);
-      cl->do_oop(p);
-    }
-  }
-}
-
 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   guarantee( _cards_scanned != NULL, "invariant" );
   _total_cards_scanned = 0;
@@ -580,27 +619,42 @@
   // Set all cards back to clean.
   _g1->cleanUpCardTable();
 
-  if (ParallelGCThreads > 0) {
-    set_par_traversal(false);
-  }
+  set_traversal(false);
+
+  DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
+  int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
 
   if (_g1->evacuation_failed()) {
-    // Restore remembered sets for the regions pointing into
-    // the collection set.
+    // Restore remembered sets for the regions pointing into the collection set.
+
     if (G1DeferredRSUpdate) {
-      DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
-      UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
-      new_refs_iterate(&deferred_update);
+      // If deferred RS updates are enabled then we just need to transfer
+      // the completed buffers from (a) the DirtyCardQueueSet used to hold
+      // cards that contain references that point into the collection set
+      // to (b) the DCQS used to hold the deferred RS updates
+      _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
     } else {
-      UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
-      new_refs_iterate(&immediate_update);
+
+      CardTableModRefBS* bs = (CardTableModRefBS*)_g1->barrier_set();
+      UpdateRSetCardTableEntryIntoCSetClosure update_rs_cset_immediate(_g1, bs);
+
+      int n_completed_buffers = 0;
+      while (into_cset_dcqs.apply_closure_to_completed_buffer(&update_rs_cset_immediate,
+                                                    0, 0, true)) {
+        n_completed_buffers++;
+      }
+      assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
     }
   }
-  for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i]->clear();
-  }
 
-  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  // Free any completed buffers in the DirtyCardQueueSet used to hold cards
+  // which contain references that point into the collection.
+  _g1->into_cset_dirty_card_queue_set().clear();
+  assert(_g1->into_cset_dirty_card_queue_set().completed_buffers_num() == 0,
+         "all buffers should be freed");
+  _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
+
+  assert(!_traversal_in_progress, "Invariant between iterations.");
 }
 
 class UpdateRSObjectClosure: public ObjectClosure {
@@ -652,7 +706,43 @@
 
 static IntHistogram out_of_histo(50, 50);
 
-void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) {
+class TriggerClosure : public OopClosure {
+  bool _trigger;
+public:
+  TriggerClosure() : _trigger(false) { }
+  bool value() const { return _trigger; }
+  template <class T> void do_oop_nv(T* p) { _trigger = true; }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class InvokeIfNotTriggeredClosure: public OopClosure {
+  TriggerClosure* _t;
+  OopClosure* _oc;
+public:
+  InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
+    _t(t), _oc(oc) { }
+  template <class T> void do_oop_nv(T* p) {
+    if (!_t->value()) _oc->do_oop(p);
+  }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class Mux2Closure : public OopClosure {
+  OopClosure* _c1;
+  OopClosure* _c2;
+public:
+  Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
+  template <class T> void do_oop_nv(T* p) {
+    _c1->do_oop(p); _c2->do_oop(p);
+  }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+bool HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
+                                                   bool check_for_refs_into_cset) {
   // Construct the region representing the card.
   HeapWord* start = _ct_bs->addr_for(card_ptr);
   // And find the region containing it.
@@ -669,7 +759,16 @@
 
   UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
   update_rs_oop_cl.set_from(r);
-  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  TriggerClosure trigger_cl;
+  FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
+  InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
+  Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
+
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
+                        (check_for_refs_into_cset ?
+                                (OopClosure*)&mux :
+                                (OopClosure*)&update_rs_oop_cl));
 
   // Undirty the card.
   *card_ptr = CardTableModRefBS::clean_card_val();
@@ -717,11 +816,18 @@
     out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
     _conc_refine_cards++;
   }
+
+  return trigger_cl.value();
 }
 
-void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
+bool HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                              bool check_for_refs_into_cset) {
   // If the card is no longer dirty, nothing to do.
-  if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
+  if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+    // No need to return that this card contains refs that point
+    // into the collection set.
+    return false;
+  }
 
   // Construct the region representing the card.
   HeapWord* start = _ct_bs->addr_for(card_ptr);
@@ -729,7 +835,9 @@
   HeapRegion* r = _g1->heap_region_containing(start);
   if (r == NULL) {
     guarantee(_g1->is_in_permanent(start), "Or else where?");
-    return;  // Not in the G1 heap (might be in perm, for example.)
+    // Again no need to return that this card contains refs that
+    // point into the collection set.
+    return false;  // Not in the G1 heap (might be in perm, for example.)
   }
   // Why do we have to check here whether a card is on a young region,
   // given that we dirty young regions and, as a result, the
@@ -743,7 +851,7 @@
   // and it doesn't happen often, but it can happen. So, the extra
   // check below filters out those cards.
   if (r->is_young()) {
-    return;
+    return false;
   }
   // While we are processing RSet buffers during the collection, we
   // actually don't want to scan any cards on the collection set,
@@ -756,7 +864,7 @@
   // however, that if evacuation fails, we have to scan any objects
   // that were not moved and create any missing entries.
   if (r->in_collection_set()) {
-    return;
+    return false;
   }
 
   // Should we defer processing the card?
@@ -797,8 +905,14 @@
   //                  cache.
   //                  Immediately process res; no need to process card_ptr.
 
+
   jbyte* res = card_ptr;
   bool defer = false;
+
+  // This gets set to true if the card being refined has references
+  // that point into the collection set.
+  bool oops_into_cset = false;
+
   if (_cg1r->use_cache()) {
     jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
     if (res != NULL && (res != card_ptr || defer)) {
@@ -815,14 +929,31 @@
         // Process card pointer we get back from the hot card cache. This
         // will check whether the region containing the card is young
         // _after_ checking that the region has been allocated from.
-        concurrentRefineOneCard_impl(res, worker_i);
+        oops_into_cset = concurrentRefineOneCard_impl(res, worker_i,
+                                                      false /* check_for_refs_into_cset */);
+        // The above call to concurrentRefineOneCard_impl is only
+        // performed if the hot card cache is enabled. This cache is
+        // disabled during an evacuation pause - which is the only
+        // time when we need know if the card contains references
+        // that point into the collection set. Also when the hot card
+        // cache is enabled, this code is executed by the concurrent
+        // refine threads - rather than the GC worker threads - and
+        // concurrentRefineOneCard_impl will return false.
+        assert(!oops_into_cset, "should not see true here");
       }
     }
   }
 
   if (!defer) {
-    concurrentRefineOneCard_impl(card_ptr, worker_i);
+    oops_into_cset =
+      concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset);
+    // We should only be detecting that the card contains references
+    // that point into the collection set if the current thread is
+    // a GC worker thread.
+    assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(),
+           "invalid result at non safepoint");
   }
+  return oops_into_cset;
 }
 
 class HRRSStatsIter: public HeapRegionClosure {
@@ -920,6 +1051,7 @@
 
   }
 }
+
 void HRInto_G1RemSet::prepare_for_verify() {
   if (G1HRRSFlushLogBuffersOnVerify &&
       (VerifyBeforeGC || VerifyAfterGC)
@@ -932,7 +1064,9 @@
     }
     bool cg1r_use_cache = _cg1r->use_cache();
     _cg1r->set_use_cache(false);
-    updateRS(0);
+    DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
+    updateRS(&into_cset_dcq, 0);
+    _g1->into_cset_dirty_card_queue_set().clear();
     _cg1r->set_use_cache(cg1r_use_cache);
 
     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,7 +83,13 @@
   // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   // join and leave around parts that must be atomic wrt GC.  (NULL means
   // being done at a safepoint.)
-  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+  // With some implementations of this routine, when check_for_refs_into_cset
+  // is true, a true result may be returned if the given card contains oops
+  // that have references into the current collection set.
+  virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                       bool check_for_refs_into_cset) {
+    return false;
+  }
 
   // Print any relevant summary info.
   virtual void print_summary_info() {}
@@ -142,24 +148,22 @@
   size_t*             _cards_scanned;
   size_t              _total_cards_scanned;
 
-  // _par_traversal_in_progress is "true" iff a parallel traversal is in
-  // progress.  If so, then cards added to remembered sets should also have
-  // their references into the collection summarized in "_new_refs".
-  bool _par_traversal_in_progress;
-  void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
-  GrowableArray<OopOrNarrowOopStar>** _new_refs;
-  template <class T> void new_refs_iterate_work(OopClosure* cl);
-  void new_refs_iterate(OopClosure* cl) {
-    if (UseCompressedOops) {
-      new_refs_iterate_work<narrowOop>(cl);
-    } else {
-      new_refs_iterate_work<oop>(cl);
-    }
-  }
+  // _traversal_in_progress is "true" iff a traversal is in progress.
+
+  bool _traversal_in_progress;
+  void set_traversal(bool b) { _traversal_in_progress = b; }
+
+  // Used for caching the closure that is responsible for scanning
+  // references into the collection set.
+  OopsInHeapRegionClosure** _cset_rs_update_cl;
 
   // The routine that performs the actual work of refining a dirty
   // card.
-  void concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i);
+  // If check_for_refs_into_refs is true then a true result is returned
+  // if the card contains oops that have references into the current
+  // collection set.
+  bool concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
+                                    bool check_for_refs_into_cset);
 
 protected:
   template <class T> void write_ref_nv(HeapRegion* from, T* p);
@@ -188,7 +192,7 @@
       scanNewRefsRS_work<oop>(oc, worker_i);
     }
   }
-  void updateRS(int worker_i);
+  void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);
   HeapRegion* calculateStartRegion(int i);
 
   HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
@@ -219,7 +223,11 @@
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
                  int worker_num, int claim_val);
 
-  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+  // If check_for_refs_into_cset is true then a true result is returned
+  // if the card contains oops that have references into the current
+  // collection set.
+  virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                       bool check_for_refs_into_cset);
 
   virtual void print_summary_info();
   virtual void prepare_for_verify();
@@ -265,3 +273,16 @@
   //  bool idempotent() { return true; }
   bool apply_to_weak_ref_discovered_field() { return true; }
 };
+
+class UpdateRSetImmediate: public OopsInHeapRegionClosure {
+private:
+  G1RemSet* _g1_rem_set;
+
+  template <class T> void do_oop_work(T* p);
+public:
+  UpdateRSetImmediate(G1RemSet* rs) :
+    _g1_rem_set(rs) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,19 +56,25 @@
     assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
   }
 #endif // ASSERT
-  assert(from == NULL || from->is_in_reserved(p),
-         "p is not in from");
+
+  assert(from == NULL || from->is_in_reserved(p), "p is not in from");
+
   HeapRegion* to = _g1->heap_region_containing(obj);
   // The test below could be optimized by applying a bit op to to and from.
   if (to != NULL && from != NULL && from != to) {
-    // There is a tricky infinite loop if we keep pushing
-    // self forwarding pointers onto our _new_refs list.
-    // The _par_traversal_in_progress flag is true during the collection pause,
-    // false during the evacuation failure handing.
-    if (_par_traversal_in_progress &&
+    // The _traversal_in_progress flag is true during the collection pause,
+    // false during the evacuation failure handling. This should avoid a
+    // potential loop if we were to add the card containing 'p' to the DCQS
+    // that's used to regenerate the remembered sets for the collection set,
+    // in the event of an evacuation failure, here. The UpdateRSImmediate
+    // closure will eventally call this routine.
+    if (_traversal_in_progress &&
         to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push((void*)p);
-      // Deferred updates to the Cset are either discarded (in the normal case),
+
+      assert(_cset_rs_update_cl[tid] != NULL, "should have been set already");
+      _cset_rs_update_cl[tid]->do_oop(p);
+
+      // Deferred updates to the CSet are either discarded (in the normal case),
       // or processed (if an evacuation failure occurs) at the end
       // of the collection.
       // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do().
@@ -89,3 +95,12 @@
   assert(_from != NULL, "from region must be non-NULL");
   _rs->par_write_ref(_from, p, _worker_i);
 }
+
+template <class T> inline void UpdateRSetImmediate::do_oop_work(T* p) {
+  assert(_from->is_in_reserved(p), "paranoia");
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
+    _g1_rem_set->par_write_ref(_from, p, 0);
+  }
+}
+
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -683,6 +683,8 @@
     return NULL;
   }
 
+  assert(!is_young(), "check value of filter_young");
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
   HeapWord* cur = block_start(mr.start());
@@ -788,8 +790,18 @@
   int objs = 0;
   int blocks = 0;
   VerifyLiveClosure vl_cl(g1, use_prev_marking);
+  bool is_humongous = isHumongous();
+  size_t object_num = 0;
   while (p < top()) {
     size_t size = oop(p)->size();
+    if (is_humongous != g1->isHumongous(size)) {
+      gclog_or_tty->print_cr("obj "PTR_FORMAT" is of %shumongous size ("
+                             SIZE_FORMAT" words) in a %shumongous region",
+                             p, g1->isHumongous(size) ? "" : "non-",
+                             size, is_humongous ? "" : "non-");
+       *failures = true;
+    }
+    object_num += 1;
     if (blocks == BLOCK_SAMPLE_INTERVAL) {
       HeapWord* res = block_start_const(p + (size/2));
       if (p != res) {
@@ -855,6 +867,13 @@
     }
   }
 
+  if (is_humongous && object_num > 1) {
+    gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is humongous "
+                           "but has "SIZE_FORMAT", objects",
+                           bottom(), end(), object_num);
+    *failures = true;
+  }
+
   if (p != top()) {
     gclog_or_tty->print_cr("end of last object "PTR_FORMAT" "
                            "does not match top "PTR_FORMAT, p, top());
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -424,7 +424,7 @@
 
 
 SparsePRT::SparsePRT(HeapRegion* hr) :
-  _expanded(false), _next_expanded(NULL)
+  _hr(hr), _expanded(false), _next_expanded(NULL)
 {
   _cur = new RSHashTable(InitialCapacity);
   _next = _cur;
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,8 @@
 binaryTreeDictionary.hpp                freeBlockDictionary.hpp
 binaryTreeDictionary.hpp                freeList.hpp
 
+blockOffsetTable.inline.hpp             concurrentMarkSweepGeneration.hpp
+
 cmsAdaptiveSizePolicy.cpp		cmsAdaptiveSizePolicy.hpp
 cmsAdaptiveSizePolicy.cpp		defNewGeneration.hpp
 cmsAdaptiveSizePolicy.cpp		gcStats.hpp
@@ -85,7 +87,7 @@
 cmsOopClosures.inline.hpp               cmsOopClosures.hpp
 cmsOopClosures.inline.hpp               concurrentMarkSweepGeneration.hpp
 
-cmsPermGen.cpp                          blockOffsetTable.hpp
+cmsPermGen.cpp                          blockOffsetTable.inline.hpp
 cmsPermGen.cpp                          cSpaceCounters.hpp
 cmsPermGen.cpp                          cmsPermGen.hpp
 cmsPermGen.cpp                          collectedHeap.inline.hpp
@@ -121,6 +123,7 @@
 compactibleFreeListSpace.cpp            vmThread.hpp
 
 compactibleFreeListSpace.hpp            binaryTreeDictionary.hpp
+compactibleFreeListSpace.hpp            blockOffsetTable.inline.hpp
 compactibleFreeListSpace.hpp            freeList.hpp
 compactibleFreeListSpace.hpp            promotionInfo.hpp
 compactibleFreeListSpace.hpp            space.hpp
@@ -149,6 +152,7 @@
 concurrentMarkSweepGeneration.cpp       iterator.hpp
 concurrentMarkSweepGeneration.cpp       java.hpp
 concurrentMarkSweepGeneration.cpp       jvmtiExport.hpp
+concurrentMarkSweepGeneration.cpp       memoryService.hpp
 concurrentMarkSweepGeneration.cpp       oop.inline.hpp
 concurrentMarkSweepGeneration.cpp       parNewGeneration.hpp
 concurrentMarkSweepGeneration.cpp       referencePolicy.hpp
@@ -165,6 +169,7 @@
 concurrentMarkSweepGeneration.hpp       gcStats.hpp
 concurrentMarkSweepGeneration.hpp       generation.hpp
 concurrentMarkSweepGeneration.hpp       generationCounters.hpp
+concurrentMarkSweepGeneration.hpp       memoryService.hpp
 concurrentMarkSweepGeneration.hpp       mutexLocker.hpp
 concurrentMarkSweepGeneration.hpp       taskqueue.hpp
 concurrentMarkSweepGeneration.hpp       virtualspace.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -241,6 +241,7 @@
 
 g1MMUTracker.hpp			debug.hpp
 g1MMUTracker.hpp			allocation.hpp
+
 g1RemSet.cpp				bufferingOopClosure.hpp
 g1RemSet.cpp				concurrentG1Refine.hpp
 g1RemSet.cpp				concurrentG1RefineThread.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Thu Sep 09 14:27:59 2010 -0700
@@ -330,7 +330,6 @@
 psPromotionManager.cpp                  psScavenge.inline.hpp
 
 psPromotionManager.hpp                  allocation.hpp
-psPromotionManager.hpp                  prefetchQueue.hpp
 psPromotionManager.hpp                  psPromotionLAB.hpp
 psPromotionManager.hpp                  taskqueue.hpp
 
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -51,9 +51,14 @@
   _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
   _keep_alive_closure(&_scan_weak_ref_closure),
   _promotion_failure_size(0),
-  _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
   _strong_roots_time(0.0), _term_time(0.0)
 {
+  #if TASKQUEUE_STATS
+  _term_attempts = 0;
+  _overflow_refills = 0;
+  _overflow_refill_objs = 0;
+  #endif // TASKQUEUE_STATS
+
   _survivor_chunk_array =
     (ChunkArray*) old_gen()->get_data_recorder(thread_num());
   _hash_seed = 17;  // Might want to take time-based random value.
@@ -100,7 +105,6 @@
     // Push remainder.
     bool ok = work_queue()->push(old);
     assert(ok, "just popped, push must be okay");
-    note_push();
   } else {
     // Restore length so that it can be used if there
     // is a promotion failure and forwarding pointers
@@ -126,7 +130,6 @@
     while (queue->size() > (juint)max_size) {
       oop obj_to_scan;
       if (queue->pop_local(obj_to_scan)) {
-        note_pop();
         if ((HeapWord *)obj_to_scan < young_old_boundary()) {
           if (obj_to_scan->is_objArray() &&
               obj_to_scan->is_forwarded() &&
@@ -271,20 +274,28 @@
                         GrowableArray<oop>**    overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
+
+  ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
+
   inline ParScanThreadState& thread_state(int i);
-  int pushes() { return _pushes; }
-  int pops()   { return _pops; }
-  int steals() { return _steals; }
+
   void reset(bool promotion_failed);
   void flush();
+
+  #if TASKQUEUE_STATS
+  static void
+    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_termination_stats(outputStream* const st = gclog_or_tty);
+  static void
+    print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
+  void reset_stats();
+  #endif // TASKQUEUE_STATS
+
 private:
   ParallelTaskTerminator& _term;
   ParNewGeneration&       _gen;
   Generation&             _next_gen;
-  // staticstics
-  int _pushes;
-  int _pops;
-  int _steals;
 };
 
 
@@ -294,8 +305,7 @@
   GrowableArray<oop>** overflow_stack_set_,
   size_t desired_plab_sz, ParallelTaskTerminator& term)
   : ResourceArray(sizeof(ParScanThreadState), num_threads),
-    _gen(gen), _next_gen(old_gen), _term(term),
-    _pushes(0), _pops(0), _steals(0)
+    _gen(gen), _next_gen(old_gen), _term(term)
 {
   assert(num_threads > 0, "sanity check!");
   // Initialize states.
@@ -323,6 +333,82 @@
   }
 }
 
+#if TASKQUEUE_STATS
+void
+ParScanThreadState::reset_stats()
+{
+  taskqueue_stats().reset();
+  _term_attempts = 0;
+  _overflow_refills = 0;
+  _overflow_refill_objs = 0;
+}
+
+void ParScanThreadStateSet::reset_stats()
+{
+  for (int i = 0; i < length(); ++i) {
+    thread_state(i).reset_stats();
+  }
+}
+
+void
+ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- "
+                   "-------termination-------");
+  st->print_raw_cr("thr     ms        ms       %   "
+                   "    ms       %   attempts");
+  st->print_raw_cr("--- --------- --------- ------ "
+                   "--------- ------ --------");
+}
+
+void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
+{
+  print_termination_stats_hdr(st);
+
+  for (int i = 0; i < length(); ++i) {
+    const ParScanThreadState & pss = thread_state(i);
+    const double elapsed_ms = pss.elapsed_time() * 1000.0;
+    const double s_roots_ms = pss.strong_roots_time() * 1000.0;
+    const double term_ms = pss.term_time() * 1000.0;
+    st->print_cr("%3d %9.2f %9.2f %6.2f "
+                 "%9.2f %6.2f " SIZE_FORMAT_W(8),
+                 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+                 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
+  }
+}
+
+// Print stats related to work queue activity.
+void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Task Stats");
+  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
+  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
+}
+
+void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
+{
+  print_taskqueue_stats_hdr(st);
+
+  TaskQueueStats totals;
+  for (int i = 0; i < length(); ++i) {
+    const ParScanThreadState & pss = thread_state(i);
+    const TaskQueueStats & stats = pss.taskqueue_stats();
+    st->print("%3d ", i); stats.print(st); st->cr();
+    totals += stats;
+
+    if (pss.overflow_refills() > 0) {
+      st->print_cr("    " SIZE_FORMAT_W(10) " overflow refills    "
+                   SIZE_FORMAT_W(10) " overflow objects",
+                   pss.overflow_refills(), pss.overflow_refill_objs());
+    }
+  }
+  st->print("tot "); totals.print(st); st->cr();
+
+  DEBUG_ONLY(totals.verify());
+}
+#endif // TASKQUEUE_STATS
+
 void ParScanThreadStateSet::flush()
 {
   // Work in this loop should be kept as lightweight as
@@ -346,42 +432,8 @@
     // Inform old gen that we're done.
     _next_gen.par_promote_alloc_done(i);
     _next_gen.par_oop_since_save_marks_iterate_done(i);
+  }
 
-    // Flush stats related to work queue activity (push/pop/steal)
-    // This could conceivably become a bottleneck; if so, we'll put the
-    // stat's gathering under the flag.
-    if (PAR_STATS_ENABLED) {
-      _pushes += par_scan_state.pushes();
-      _pops   += par_scan_state.pops();
-      _steals += par_scan_state.steals();
-      if (ParallelGCVerbose) {
-        gclog_or_tty->print("Thread %d complete:\n"
-                            "  Pushes: %7d    Pops: %7d    Steals %7d (in %d attempts)\n",
-                            i, par_scan_state.pushes(), par_scan_state.pops(),
-                            par_scan_state.steals(), par_scan_state.steal_attempts());
-        if (par_scan_state.overflow_pushes() > 0 ||
-            par_scan_state.overflow_refills() > 0) {
-          gclog_or_tty->print("  Overflow pushes: %7d    "
-                              "Overflow refills: %7d for %d objs.\n",
-                              par_scan_state.overflow_pushes(),
-                              par_scan_state.overflow_refills(),
-                              par_scan_state.overflow_refill_objs());
-        }
-
-        double elapsed = par_scan_state.elapsed();
-        double strong_roots = par_scan_state.strong_roots_time();
-        double term = par_scan_state.term_time();
-        gclog_or_tty->print(
-                            "  Elapsed: %7.2f ms.\n"
-                            "    Strong roots: %7.2f ms (%6.2f%%)\n"
-                            "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
-                           elapsed * 1000.0,
-                           strong_roots * 1000.0, (strong_roots*100.0/elapsed),
-                           term * 1000.0, (term*100.0/elapsed),
-                           par_scan_state.term_attempts());
-      }
-    }
-  }
   if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
     // We need to call this even when ResizeOldPLAB is disabled
     // so as to avoid breaking some asserts. While we may be able
@@ -456,15 +508,12 @@
     // We have no local work, attempt to steal from other threads.
 
     // attempt to steal work from promoted.
-    par_scan_state()->note_steal_attempt();
     if (task_queues()->steal(par_scan_state()->thread_num(),
                              par_scan_state()->hash_seed(),
                              obj_to_scan)) {
-      par_scan_state()->note_steal();
       bool res = work_q->push(obj_to_scan);
       assert(res, "Empty queue should have room for a push.");
 
-      par_scan_state()->note_push();
       //   if successful, goto Start.
       continue;
 
@@ -842,17 +891,6 @@
   }
   thread_state_set.reset(promotion_failed());
 
-  if (PAR_STATS_ENABLED && ParallelGCVerbose) {
-    gclog_or_tty->print("Thread totals:\n"
-               "  Pushes: %7d    Pops: %7d    Steals %7d (sum = %7d).\n",
-               thread_state_set.pushes(), thread_state_set.pops(),
-               thread_state_set.steals(),
-               thread_state_set.pops()+thread_state_set.steals());
-  }
-  assert(thread_state_set.pushes() == thread_state_set.pops()
-                                    + thread_state_set.steals(),
-         "Or else the queues are leaky.");
-
   // Process (weak) reference objects found during scavenge.
   ReferenceProcessor* rp = ref_processor();
   IsAliveClosure is_alive(this);
@@ -932,6 +970,11 @@
     gch->print_heap_change(gch_prev_used);
   }
 
+  if (PrintGCDetails && ParallelGCVerbose) {
+    TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
+    TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
+  }
+
   if (UseAdaptiveSizePolicy) {
     size_policy->minor_collection_end(gch->gc_cause());
     size_policy->avg_survived()->sample(from()->used());
@@ -1104,9 +1147,8 @@
         gclog_or_tty->print("queue overflow!\n");
       }
       push_on_overflow_list(old, par_scan_state);
-      par_scan_state->note_overflow_push();
+      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
     }
-    par_scan_state->note_push();
 
     return new_obj;
   }
@@ -1227,9 +1269,8 @@
     if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
       push_on_overflow_list(old, par_scan_state);
-      par_scan_state->note_overflow_push();
+      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
     }
-    par_scan_state->note_push();
 
     return new_obj;
   }
@@ -1466,7 +1507,7 @@
     cur = next;
     n++;
   }
-  par_scan_state->note_overflow_refill(n);
+  TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n));
 #ifndef PRODUCT
   assert(_num_par_pushes >= n, "Too many pops?");
   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -36,9 +36,6 @@
 typedef Padded<OopTaskQueue> ObjToScanQueue;
 typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
 
-// Enable this to get push/pop/steal stats.
-const int PAR_STATS_ENABLED = 0;
-
 class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
  private:
   ParScanWeakRefClosure* _par_cl;
@@ -94,8 +91,11 @@
 
   bool _to_space_full;
 
-  int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
-  int _overflow_pushes, _overflow_refills, _overflow_refill_objs;
+#if TASKQUEUE_STATS
+  size_t _term_attempts;
+  size_t _overflow_refills;
+  size_t _overflow_refill_objs;
+#endif // TASKQUEUE_STATS
 
   // Stats for promotion failure
   size_t _promotion_failure_size;
@@ -181,45 +181,38 @@
   }
   void print_and_clear_promotion_failure_size();
 
-  int pushes() { return _pushes; }
-  int pops()   { return _pops; }
-  int steals() { return _steals; }
-  int steal_attempts() { return _steal_attempts; }
-  int term_attempts()  { return _term_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-  int overflow_refills() { return _overflow_refills; }
-  int overflow_refill_objs() { return _overflow_refill_objs; }
+#if TASKQUEUE_STATS
+  TaskQueueStats & taskqueue_stats() const { return _work_queue->stats; }
+
+  size_t term_attempts() const             { return _term_attempts; }
+  size_t overflow_refills() const          { return _overflow_refills; }
+  size_t overflow_refill_objs() const      { return _overflow_refill_objs; }
 
-  void note_push()  { if (PAR_STATS_ENABLED) _pushes++; }
-  void note_pop()   { if (PAR_STATS_ENABLED) _pops++; }
-  void note_steal() { if (PAR_STATS_ENABLED) _steals++; }
-  void note_steal_attempt() { if (PAR_STATS_ENABLED) _steal_attempts++; }
-  void note_term_attempt()  { if (PAR_STATS_ENABLED) _term_attempts++; }
-  void note_overflow_push() { if (PAR_STATS_ENABLED) _overflow_pushes++; }
-  void note_overflow_refill(int objs) {
-    if (PAR_STATS_ENABLED) {
-      _overflow_refills++;
-      _overflow_refill_objs += objs;
-    }
+  void note_term_attempt()                 { ++_term_attempts; }
+  void note_overflow_refill(size_t objs)   {
+    ++_overflow_refills; _overflow_refill_objs += objs;
   }
 
+  void reset_stats();
+#endif // TASKQUEUE_STATS
+
   void start_strong_roots() {
     _start_strong_roots = os::elapsedTime();
   }
   void end_strong_roots() {
     _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
   }
-  double strong_roots_time() { return _strong_roots_time; }
+  double strong_roots_time() const { return _strong_roots_time; }
   void start_term_time() {
-    note_term_attempt();
+    TASKQUEUE_STATS_ONLY(note_term_attempt());
     _start_term = os::elapsedTime();
   }
   void end_term_time() {
     _term_time += (os::elapsedTime() - _start_term);
   }
-  double term_time() { return _term_time; }
+  double term_time() const { return _term_time; }
 
-  double elapsed() {
+  double elapsed_time() const {
     return os::elapsedTime() - _start;
   }
 };
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -123,7 +123,6 @@
   assert(start_array != NULL && sp != NULL && pm != NULL, "Sanity");
   assert(start_array->covered_region().contains(sp->used_region()),
          "ObjectStartArray does not cover space");
-  bool depth_first = pm->depth_first();
 
   if (sp->not_empty()) {
     oop* sp_top = (oop*)space_top;
@@ -201,21 +200,12 @@
           *first_nonclean_card++ = clean_card;
         }
         // scan oops in objects
-        // hoisted the if (depth_first) check out of the loop
-        if (depth_first){
-          do {
-            oop(bottom_obj)->push_contents(pm);
-            bottom_obj += oop(bottom_obj)->size();
-            assert(bottom_obj <= sp_top, "just checking");
-          } while (bottom_obj < top);
-          pm->drain_stacks_cond_depth();
-        } else {
-          do {
-            oop(bottom_obj)->copy_contents(pm);
-            bottom_obj += oop(bottom_obj)->size();
-            assert(bottom_obj <= sp_top, "just checking");
-          } while (bottom_obj < top);
-        }
+        do {
+          oop(bottom_obj)->push_contents(pm);
+          bottom_obj += oop(bottom_obj)->size();
+          assert(bottom_obj <= sp_top, "just checking");
+        } while (bottom_obj < top);
+        pm->drain_stacks_cond_depth();
         // remember top oop* scanned
         prev_top = top;
       }
@@ -230,7 +220,6 @@
                                                     uint stripe_number) {
   int ssize = 128; // Naked constant!  Work unit = 64k.
   int dirty_card_count = 0;
-  bool depth_first = pm->depth_first();
 
   oop* sp_top = (oop*)space_top;
   jbyte* start_card = byte_for(sp->bottom());
@@ -363,43 +352,22 @@
         const int interval = PrefetchScanIntervalInBytes;
         // scan all objects in the range
         if (interval != 0) {
-          // hoisted the if (depth_first) check out of the loop
-          if (depth_first) {
-            while (p < to) {
-              Prefetch::write(p, interval);
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->push_contents(pm);
-              p += m->size();
-            }
-            pm->drain_stacks_cond_depth();
-          } else {
-            while (p < to) {
-              Prefetch::write(p, interval);
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->copy_contents(pm);
-              p += m->size();
-            }
+          while (p < to) {
+            Prefetch::write(p, interval);
+            oop m = oop(p);
+            assert(m->is_oop_or_null(), "check for header");
+            m->push_contents(pm);
+            p += m->size();
           }
+          pm->drain_stacks_cond_depth();
         } else {
-          // hoisted the if (depth_first) check out of the loop
-          if (depth_first) {
-            while (p < to) {
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->push_contents(pm);
-              p += m->size();
-            }
-            pm->drain_stacks_cond_depth();
-          } else {
-            while (p < to) {
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->copy_contents(pm);
-              p += m->size();
-            }
+          while (p < to) {
+            oop m = oop(p);
+            assert(m->is_oop_or_null(), "check for header");
+            m->push_contents(pm);
+            p += m->size();
           }
+          pm->drain_stacks_cond_depth();
         }
         last_scanned = p;
       }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-//
-// PrefetchQueue is a FIFO queue of variable length (currently 8).
-//
-// We need to examine the performance penalty of variable lengths.
-// We may also want to split this into cpu dependent bits.
-//
-
-const int PREFETCH_QUEUE_SIZE  = 8;
-
-class PrefetchQueue : public CHeapObj {
- private:
-  void* _prefetch_queue[PREFETCH_QUEUE_SIZE];
-  uint  _prefetch_index;
-
- public:
-  int length() { return PREFETCH_QUEUE_SIZE; }
-
-  inline void clear() {
-    for(int i=0; i<PREFETCH_QUEUE_SIZE; i++) {
-      _prefetch_queue[i] = NULL;
-    }
-    _prefetch_index = 0;
-  }
-
-  template <class T> inline void* push_and_pop(T* p) {
-    oop o = oopDesc::load_decode_heap_oop_not_null(p);
-    Prefetch::write(o->mark_addr(), 0);
-    // This prefetch is intended to make sure the size field of array
-    // oops is in cache. It assumes the the object layout is
-    // mark -> klass -> size, and that mark and klass are heapword
-    // sized. If this should change, this prefetch will need updating!
-    Prefetch::write(o->mark_addr() + (HeapWordSize*2), 0);
-    _prefetch_queue[_prefetch_index++] = p;
-    _prefetch_index &= (PREFETCH_QUEUE_SIZE-1);
-    return _prefetch_queue[_prefetch_index];
-  }
-
-  // Stores a NULL pointer in the pop'd location.
-  inline void* pop() {
-    _prefetch_queue[_prefetch_index++] = NULL;
-    _prefetch_index &= (PREFETCH_QUEUE_SIZE-1);
-    return _prefetch_queue[_prefetch_index];
-  }
-};
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -27,7 +27,6 @@
 
 PSPromotionManager**         PSPromotionManager::_manager_array = NULL;
 OopStarTaskQueueSet*         PSPromotionManager::_stack_array_depth = NULL;
-OopTaskQueueSet*             PSPromotionManager::_stack_array_breadth = NULL;
 PSOldGen*                    PSPromotionManager::_old_gen = NULL;
 MutableSpace*                PSPromotionManager::_young_space = NULL;
 
@@ -42,23 +41,14 @@
   _manager_array = NEW_C_HEAP_ARRAY(PSPromotionManager*, ParallelGCThreads+1 );
   guarantee(_manager_array != NULL, "Could not initialize promotion manager");
 
-  if (UseDepthFirstScavengeOrder) {
-    _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads);
-    guarantee(_stack_array_depth != NULL, "Count not initialize promotion manager");
-  } else {
-    _stack_array_breadth = new OopTaskQueueSet(ParallelGCThreads);
-    guarantee(_stack_array_breadth != NULL, "Count not initialize promotion manager");
-  }
+  _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads);
+  guarantee(_stack_array_depth != NULL, "Cound not initialize promotion manager");
 
   // Create and register the PSPromotionManager(s) for the worker threads.
   for(uint i=0; i<ParallelGCThreads; i++) {
     _manager_array[i] = new PSPromotionManager();
     guarantee(_manager_array[i] != NULL, "Could not create PSPromotionManager");
-    if (UseDepthFirstScavengeOrder) {
-      stack_array_depth()->register_queue(i, _manager_array[i]->claimed_stack_depth());
-    } else {
-      stack_array_breadth()->register_queue(i, _manager_array[i]->claimed_stack_breadth());
-    }
+    stack_array_depth()->register_queue(i, _manager_array[i]->claimed_stack_depth());
   }
 
   // The VMThread gets its own PSPromotionManager, which is not available
@@ -93,11 +83,7 @@
   TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats());
   for (uint i = 0; i < ParallelGCThreads + 1; i++) {
     PSPromotionManager* manager = manager_array(i);
-    if (UseDepthFirstScavengeOrder) {
-      assert(manager->claimed_stack_depth()->is_empty(), "should be empty");
-    } else {
-      assert(manager->claimed_stack_breadth()->is_empty(), "should be empty");
-    }
+    assert(manager->claimed_stack_depth()->is_empty(), "should be empty");
     manager->flush_labs();
   }
 }
@@ -105,10 +91,8 @@
 #if TASKQUEUE_STATS
 void
 PSPromotionManager::print_taskqueue_stats(uint i) const {
-  const TaskQueueStats& stats = depth_first() ?
-    _claimed_stack_depth.stats : _claimed_stack_breadth.stats;
   tty->print("%3u ", i);
-  stats.print();
+  _claimed_stack_depth.stats.print();
   tty->cr();
 }
 
@@ -128,8 +112,7 @@
 
 void
 PSPromotionManager::print_stats() {
-  const bool df = UseDepthFirstScavengeOrder;
-  tty->print_cr("== GC Task Stats (%s-First), GC %3d", df ? "Depth" : "Breadth",
+  tty->print_cr("== GC Tasks Stats, GC %3d",
                 Universe::heap()->total_collections());
 
   tty->print("thr "); TaskQueueStats::print_header(1); tty->cr();
@@ -147,9 +130,7 @@
 
 void
 PSPromotionManager::reset_stats() {
-  TaskQueueStats& stats = depth_first() ?
-    claimed_stack_depth()->stats : claimed_stack_breadth()->stats;
-  stats.reset();
+  claimed_stack_depth()->stats.reset();
   _masked_pushes = _masked_steals = 0;
   _arrays_chunked = _array_chunks_processed = 0;
 }
@@ -158,19 +139,13 @@
 PSPromotionManager::PSPromotionManager() {
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-  _depth_first = UseDepthFirstScavengeOrder;
 
   // We set the old lab's start array.
   _old_lab.set_start_array(old_gen()->start_array());
 
   uint queue_size;
-  if (depth_first()) {
-    claimed_stack_depth()->initialize();
-    queue_size = claimed_stack_depth()->max_elems();
-  } else {
-    claimed_stack_breadth()->initialize();
-    queue_size = claimed_stack_breadth()->max_elems();
-  }
+  claimed_stack_depth()->initialize();
+  queue_size = claimed_stack_depth()->max_elems();
 
   _totally_drain = (ParallelGCThreads == 1) || (GCDrainStackTargetSize == 0);
   if (_totally_drain) {
@@ -205,14 +180,11 @@
   _old_lab.initialize(MemRegion(lab_base, (size_t)0));
   _old_gen_is_full = false;
 
-  _prefetch_queue.clear();
-
   TASKQUEUE_STATS_ONLY(reset_stats());
 }
 
 
 void PSPromotionManager::drain_stacks_depth(bool totally_drain) {
-  assert(depth_first(), "invariant");
   assert(claimed_stack_depth()->overflow_stack() != NULL, "invariant");
   totally_drain = totally_drain || _totally_drain;
 
@@ -250,50 +222,6 @@
   assert(tq->overflow_empty(), "Sanity");
 }
 
-void PSPromotionManager::drain_stacks_breadth(bool totally_drain) {
-  assert(!depth_first(), "invariant");
-  assert(claimed_stack_breadth()->overflow_stack() != NULL, "invariant");
-  totally_drain = totally_drain || _totally_drain;
-
-#ifdef ASSERT
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-  MutableSpace* to_space = heap->young_gen()->to_space();
-  MutableSpace* old_space = heap->old_gen()->object_space();
-  MutableSpace* perm_space = heap->perm_gen()->object_space();
-#endif /* ASSERT */
-
-  OverflowTaskQueue<oop>* const tq = claimed_stack_breadth();
-  do {
-    oop obj;
-
-    // Drain overflow stack first, so other threads can steal from
-    // claimed stack while we work.
-    while (tq->pop_overflow(obj)) {
-      obj->copy_contents(this);
-    }
-
-    if (totally_drain) {
-      while (tq->pop_local(obj)) {
-        obj->copy_contents(this);
-      }
-    } else {
-      while (tq->size() > _target_stack_size && tq->pop_local(obj)) {
-        obj->copy_contents(this);
-      }
-    }
-
-    // If we could not find any other work, flush the prefetch queue
-    if (tq->is_empty()) {
-      flush_prefetch_queue();
-    }
-  } while (totally_drain && !tq->taskqueue_empty() || !tq->overflow_empty());
-
-  assert(!totally_drain || tq->taskqueue_empty(), "Sanity");
-  assert(totally_drain || tq->size() <= _target_stack_size, "Sanity");
-  assert(tq->overflow_empty(), "Sanity");
-}
-
 void PSPromotionManager::flush_labs() {
   assert(stacks_empty(), "Attempt to flush lab with live stack");
 
@@ -319,7 +247,7 @@
 // performance.
 //
 
-oop PSPromotionManager::copy_to_survivor_space(oop o, bool depth_first) {
+oop PSPromotionManager::copy_to_survivor_space(oop o) {
   assert(PSScavenge::should_scavenge(&o), "Sanity");
 
   oop new_obj = NULL;
@@ -423,24 +351,20 @@
         assert(young_space()->contains(new_obj), "Attempt to push non-promoted obj");
       }
 
-      if (depth_first) {
-        // Do the size comparison first with new_obj_size, which we
-        // already have. Hopefully, only a few objects are larger than
-        // _min_array_size_for_chunking, and most of them will be arrays.
-        // So, the is->objArray() test would be very infrequent.
-        if (new_obj_size > _min_array_size_for_chunking &&
-            new_obj->is_objArray() &&
-            PSChunkLargeArrays) {
-          // we'll chunk it
-          oop* const masked_o = mask_chunked_array_oop(o);
-          push_depth(masked_o);
-          TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
-        } else {
-          // we'll just push its contents
-          new_obj->push_contents(this);
-        }
+      // Do the size comparison first with new_obj_size, which we
+      // already have. Hopefully, only a few objects are larger than
+      // _min_array_size_for_chunking, and most of them will be arrays.
+      // So, the is->objArray() test would be very infrequent.
+      if (new_obj_size > _min_array_size_for_chunking &&
+          new_obj->is_objArray() &&
+          PSChunkLargeArrays) {
+        // we'll chunk it
+        oop* const masked_o = mask_chunked_array_oop(o);
+        push_depth(masked_o);
+        TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
       } else {
-        push_breadth(new_obj);
+        // we'll just push its contents
+        new_obj->push_contents(this);
       }
     }  else {
       // We lost, someone else "owns" this object
@@ -537,13 +461,7 @@
     // We won any races, we "own" this object.
     assert(obj == obj->forwardee(), "Sanity");
 
-    if (depth_first()) {
-      obj->push_contents(this);
-    } else {
-      // Don't bother incrementing the age, just push
-      // onto the claimed_stack..
-      push_breadth(obj);
-    }
+    obj->push_contents(this);
 
     // Save the mark if needed
     PSScavenge::oop_promotion_failed(obj, obj_mark);
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -48,7 +48,6 @@
  private:
   static PSPromotionManager**         _manager_array;
   static OopStarTaskQueueSet*         _stack_array_depth;
-  static OopTaskQueueSet*             _stack_array_breadth;
   static PSOldGen*                    _old_gen;
   static MutableSpace*                _young_space;
 
@@ -69,12 +68,10 @@
   PSOldPromotionLAB                   _old_lab;
   bool                                _young_gen_is_full;
   bool                                _old_gen_is_full;
-  PrefetchQueue                       _prefetch_queue;
 
   OopStarTaskQueue                    _claimed_stack_depth;
   OverflowTaskQueue<oop>              _claimed_stack_breadth;
 
-  bool                                _depth_first;
   bool                                _totally_drain;
   uint                                _target_stack_size;
 
@@ -87,7 +84,6 @@
 
   inline static PSPromotionManager* manager_array(int index);
   template <class T> inline void claim_or_forward_internal_depth(T* p);
-  template <class T> inline void claim_or_forward_internal_breadth(T* p);
 
   // On the task queues we push reference locations as well as
   // partially-scanned arrays (in the latter case, we push an oop to
@@ -136,19 +132,11 @@
   void process_array_chunk(oop old);
 
   template <class T> void push_depth(T* p) {
-    assert(depth_first(), "pre-condition");
     claimed_stack_depth()->push(p);
   }
 
-  void push_breadth(oop o) {
-    assert(!depth_first(), "pre-condition");
-    claimed_stack_breadth()->push(o);
-  }
-
  protected:
   static OopStarTaskQueueSet* stack_array_depth()   { return _stack_array_depth; }
-  static OopTaskQueueSet*     stack_array_breadth() { return _stack_array_breadth; }
-
  public:
   // Static
   static void initialize();
@@ -163,19 +151,12 @@
     return stack_array_depth()->steal(queue_num, seed, t);
   }
 
-  static bool steal_breadth(int queue_num, int* seed, oop& t) {
-    return stack_array_breadth()->steal(queue_num, seed, t);
-  }
-
   PSPromotionManager();
 
   // Accessors
   OopStarTaskQueue* claimed_stack_depth() {
     return &_claimed_stack_depth;
   }
-  OverflowTaskQueue<oop>* claimed_stack_breadth() {
-    return &_claimed_stack_breadth;
-  }
 
   bool young_gen_is_full()             { return _young_gen_is_full; }
 
@@ -183,18 +164,14 @@
   void set_old_gen_is_full(bool state) { _old_gen_is_full = state; }
 
   // Promotion methods
-  oop copy_to_survivor_space(oop o, bool depth_first);
+  oop copy_to_survivor_space(oop o);
   oop oop_promotion_failed(oop obj, markOop obj_mark);
 
   void reset();
 
   void flush_labs();
   void drain_stacks(bool totally_drain) {
-    if (depth_first()) {
-      drain_stacks_depth(totally_drain);
-    } else {
-      drain_stacks_breadth(totally_drain);
-    }
+    drain_stacks_depth(totally_drain);
   }
  public:
   void drain_stacks_cond_depth() {
@@ -203,22 +180,14 @@
     }
   }
   void drain_stacks_depth(bool totally_drain);
-  void drain_stacks_breadth(bool totally_drain);
 
-  bool depth_first() const {
-    return _depth_first;
-  }
   bool stacks_empty() {
-    return depth_first() ?
-      claimed_stack_depth()->is_empty() :
-      claimed_stack_breadth()->is_empty();
+    return claimed_stack_depth()->is_empty();
   }
 
   inline void process_popped_location_depth(StarTask p);
 
-  inline void flush_prefetch_queue();
   template <class T> inline void claim_or_forward_depth(T* p);
-  template <class T> inline void claim_or_forward_breadth(T* p);
 
   TASKQUEUE_STATS_ONLY(inline void record_steal(StarTask& p);)
 };
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -46,32 +46,7 @@
 }
 
 template <class T>
-inline void PSPromotionManager::claim_or_forward_internal_breadth(T* p) {
-  if (p != NULL) { // XXX: error if p != NULL here
-    oop o = oopDesc::load_decode_heap_oop_not_null(p);
-    if (o->is_forwarded()) {
-      o = o->forwardee();
-    } else {
-      o = copy_to_survivor_space(o, false);
-    }
-    // Card mark
-    if (PSScavenge::is_obj_in_young((HeapWord*) o)) {
-      PSScavenge::card_table()->inline_write_ref_field_gc(p, o);
-    }
-    oopDesc::encode_store_heap_oop_not_null(p, o);
-  }
-}
-
-inline void PSPromotionManager::flush_prefetch_queue() {
-  assert(!depth_first(), "invariant");
-  for (int i = 0; i < _prefetch_queue.length(); i++) {
-    claim_or_forward_internal_breadth((oop*)_prefetch_queue.pop());
-  }
-}
-
-template <class T>
 inline void PSPromotionManager::claim_or_forward_depth(T* p) {
-  assert(depth_first(), "invariant");
   assert(PSScavenge::should_scavenge(p, true), "revisiting object?");
   assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap,
          "Sanity");
@@ -80,36 +55,6 @@
   claim_or_forward_internal_depth(p);
 }
 
-template <class T>
-inline void PSPromotionManager::claim_or_forward_breadth(T* p) {
-  assert(!depth_first(), "invariant");
-  assert(PSScavenge::should_scavenge(p, true), "revisiting object?");
-  assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap,
-         "Sanity");
-  assert(Universe::heap()->is_in(p), "pointer outside heap");
-
-  if (UsePrefetchQueue) {
-    claim_or_forward_internal_breadth((T*)_prefetch_queue.push_and_pop(p));
-  } else {
-    // This option is used for testing.  The use of the prefetch
-    // queue can delay the processing of the objects and thus
-    // change the order of object scans.  For example, remembered
-    // set updates are typically the clearing of the remembered
-    // set (the cards) followed by updates of the remembered set
-    // for young-to-old pointers.  In a situation where there
-    // is an error in the sequence of clearing and updating
-    // (e.g. clear card A, update card A, erroneously clear
-    // card A again) the error can be obscured by a delay
-    // in the update due to the use of the prefetch queue
-    // (e.g., clear card A, erroneously clear card A again,
-    // update card A that was pushed into the prefetch queue
-    // and thus delayed until after the erronous clear).  The
-    // length of the delay is random depending on the objects
-    // in the queue and the delay can be zero.
-    claim_or_forward_internal_breadth(p);
-  }
-}
-
 inline void PSPromotionManager::process_popped_location_depth(StarTask p) {
   if (is_oop_masked(p)) {
     assert(PSChunkLargeArrays, "invariant");
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -157,10 +157,8 @@
     q->enqueue(new PSRefProcTaskProxy(task, i));
   }
   ParallelTaskTerminator terminator(
-    ParallelScavengeHeap::gc_task_manager()->workers(),
-    UseDepthFirstScavengeOrder ?
-        (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth()
-      : (TaskQueueSetSuper*) PSPromotionManager::stack_array_breadth());
+                 ParallelScavengeHeap::gc_task_manager()->workers(),
+                 (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
   if (task.marks_oops_alive() && ParallelGCThreads > 1) {
     for (uint j=0; j<ParallelGCThreads; j++) {
       q->enqueue(new StealTask(&terminator));
@@ -375,10 +373,8 @@
       q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache));
 
       ParallelTaskTerminator terminator(
-        gc_task_manager()->workers(),
-        promotion_manager->depth_first() ?
-            (TaskQueueSetSuper*) promotion_manager->stack_array_depth()
-          : (TaskQueueSetSuper*) promotion_manager->stack_array_breadth());
+                  gc_task_manager()->workers(),
+                  (TaskQueueSetSuper*) promotion_manager->stack_array_depth());
       if (ParallelGCThreads>1) {
         for (uint j=0; j<ParallelGCThreads; j++) {
           q->enqueue(new StealTask(&terminator));
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -65,7 +65,7 @@
   oop o = oopDesc::load_decode_heap_oop_not_null(p);
   oop new_obj = o->is_forwarded()
         ? o->forwardee()
-        : pm->copy_to_survivor_space(o, pm->depth_first());
+        : pm->copy_to_survivor_space(o);
   oopDesc::encode_store_heap_oop_not_null(p, new_obj);
 
   // We cannot mark without test, as some code passes us pointers
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -144,29 +144,15 @@
             "stacks should be empty at this point");
 
   int random_seed = 17;
-  if (pm->depth_first()) {
-    while(true) {
-      StarTask p;
-      if (PSPromotionManager::steal_depth(which, &random_seed, p)) {
-        TASKQUEUE_STATS_ONLY(pm->record_steal(p));
-        pm->process_popped_location_depth(p);
-        pm->drain_stacks_depth(true);
-      } else {
-        if (terminator()->offer_termination()) {
-          break;
-        }
-      }
-    }
-  } else {
-    while(true) {
-      oop obj;
-      if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) {
-        obj->copy_contents(pm);
-        pm->drain_stacks_breadth(true);
-      } else {
-        if (terminator()->offer_termination()) {
-          break;
-        }
+  while(true) {
+    StarTask p;
+    if (PSPromotionManager::steal_depth(which, &random_seed, p)) {
+      TASKQUEUE_STATS_ONLY(pm->record_steal(p));
+      pm->process_popped_location_depth(p);
+      pm->drain_stacks_depth(true);
+    } else {
+      if (terminator()->offer_termination()) {
+        break;
       }
     }
   }
--- a/hotspot/src/share/vm/includeDB_core	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/includeDB_core	Thu Sep 09 14:27:59 2010 -0700
@@ -225,7 +225,6 @@
 arrayOop.cpp                            symbolOop.hpp
 
 arrayOop.hpp                            oop.hpp
-arrayOop.hpp                            universe.hpp
 arrayOop.hpp                            universe.inline.hpp
 
 assembler.cpp                           assembler.hpp
@@ -236,7 +235,6 @@
 assembler.cpp                           os.hpp
 
 assembler.hpp                           allocation.hpp
-assembler.hpp                           allocation.inline.hpp
 assembler.hpp                           debug.hpp
 assembler.hpp                           growableArray.hpp
 assembler.hpp                           oopRecorder.hpp
@@ -330,7 +328,7 @@
 blockOffsetTable.cpp                    iterator.hpp
 blockOffsetTable.cpp                    java.hpp
 blockOffsetTable.cpp                    oop.inline.hpp
-blockOffsetTable.cpp                    space.hpp
+blockOffsetTable.cpp                    space.inline.hpp
 blockOffsetTable.cpp                    universe.hpp
 
 blockOffsetTable.hpp                    globalDefinitions.hpp
@@ -338,6 +336,7 @@
 blockOffsetTable.hpp                    virtualspace.hpp
 
 blockOffsetTable.inline.hpp             blockOffsetTable.hpp
+blockOffsetTable.inline.hpp             safepoint.hpp
 blockOffsetTable.inline.hpp             space.hpp
 
 bytecode.cpp                            bytecode.hpp
@@ -1807,7 +1806,7 @@
 generateOopMap.hpp                      universe.inline.hpp
 
 generation.cpp                          allocation.inline.hpp
-generation.cpp                          blockOffsetTable.hpp
+generation.cpp                          blockOffsetTable.inline.hpp
 generation.cpp                          cardTableRS.hpp
 generation.cpp                          collectedHeap.inline.hpp
 generation.cpp                          copy.hpp
@@ -3436,7 +3435,7 @@
 perfMemory_<os_family>.cpp              resourceArea.hpp
 perfMemory_<os_family>.cpp              vmSymbols.hpp
 
-permGen.cpp                             blockOffsetTable.hpp
+permGen.cpp                             blockOffsetTable.inline.hpp
 permGen.cpp                             cSpaceCounters.hpp
 permGen.cpp                             collectedHeap.inline.hpp
 permGen.cpp                             compactPermGen.hpp
@@ -3805,7 +3804,7 @@
 sizes.hpp                               allocation.hpp
 sizes.hpp                               globalDefinitions.hpp
 
-space.cpp                               blockOffsetTable.hpp
+space.cpp                               blockOffsetTable.inline.hpp
 space.cpp                               copy.hpp
 space.cpp                               defNewGeneration.hpp
 space.cpp                               genCollectedHeap.hpp
@@ -3835,7 +3834,6 @@
 space.hpp                               watermark.hpp
 space.hpp                               workgroup.hpp
 
-space.inline.hpp                        blockOffsetTable.inline.hpp
 space.inline.hpp                        collectedHeap.hpp
 space.inline.hpp                        safepoint.hpp
 space.inline.hpp                        space.hpp
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -421,7 +421,9 @@
 #ifdef ASSERT
   if (istate->_msg != initialize) {
     assert(abs(istate->_stack_base - istate->_stack_limit) == (istate->_method->max_stack() + 1), "bad stack limit");
-  IA32_ONLY(assert(istate->_stack_limit == istate->_thread->last_Java_sp() + 1, "wrong"));
+#ifndef SHARK
+    IA32_ONLY(assert(istate->_stack_limit == istate->_thread->last_Java_sp() + 1, "wrong"));
+#endif // !SHARK
   }
   // Verify linkages.
   interpreterState l = istate;
--- a/hotspot/src/share/vm/memory/allocation.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/memory/allocation.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -58,7 +58,7 @@
 void ResourceObj::operator delete(void* p) {
   assert(((ResourceObj *)p)->allocated_on_C_heap(),
          "delete only allowed for C_HEAP objects");
-  DEBUG_ONLY(((ResourceObj *)p)->_allocation = badHeapOopVal;)
+  DEBUG_ONLY(((ResourceObj *)p)->_allocation = (uintptr_t) badHeapOopVal;)
   FreeHeap(p);
 }
 
@@ -104,7 +104,7 @@
 ResourceObj::~ResourceObj() {
     // allocated_on_C_heap() also checks that encoded (in _allocation) address == this.
     if (!allocated_on_C_heap()) {  // ResourceObj::delete() zaps _allocation for C_heap.
-      _allocation = badHeapOopVal; // zap type
+      _allocation = (uintptr_t) badHeapOopVal; // zap type
     }
 }
 #endif // ASSERT
--- a/hotspot/src/share/vm/memory/blockOffsetTable.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -103,13 +103,13 @@
 //////////////////////////////////////////////////////////////////////
 
 BlockOffsetArray::BlockOffsetArray(BlockOffsetSharedArray* array,
-                                   MemRegion mr, bool init_to_zero) :
+                                   MemRegion mr, bool init_to_zero_) :
   BlockOffsetTable(mr.start(), mr.end()),
-  _array(array),
-  _init_to_zero(init_to_zero)
+  _array(array)
 {
   assert(_bottom <= _end, "arguments out of order");
-  if (!_init_to_zero) {
+  set_init_to_zero(init_to_zero_);
+  if (!init_to_zero_) {
     // initialize cards to point back to mr.start()
     set_remainder_to_point_to_start(mr.start() + N_words, mr.end());
     _array->set_offset_array(0, 0);  // set first card to 0
@@ -121,8 +121,9 @@
 // a right-open interval: [start, end)
 void
 BlockOffsetArray::
-set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) {
+set_remainder_to_point_to_start(HeapWord* start, HeapWord* end, bool reducing) {
 
+  check_reducing_assertion(reducing);
   if (start >= end) {
     // The start address is equal to the end address (or to
     // the right of the end address) so there are not cards
@@ -167,7 +168,7 @@
   size_t end_card = _array->index_for(end-1);
   assert(start ==_array->address_for_index(start_card), "Precondition");
   assert(end ==_array->address_for_index(end_card)+N_words, "Precondition");
-  set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval
+  set_remainder_to_point_to_start_incl(start_card, end_card, reducing); // closed interval
 }
 
 
@@ -175,7 +176,9 @@
 // a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start()
 // above.
 void
-BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) {
+BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card, bool reducing) {
+
+  check_reducing_assertion(reducing);
   if (start_card > end_card) {
     return;
   }
@@ -191,11 +194,11 @@
     size_t reach = start_card - 1 + (power_to_cards_back(i+1) - 1);
     offset = N_words + i;
     if (reach >= end_card) {
-      _array->set_offset_array(start_card_for_region, end_card, offset);
+      _array->set_offset_array(start_card_for_region, end_card, offset, reducing);
       start_card_for_region = reach + 1;
       break;
     }
-    _array->set_offset_array(start_card_for_region, reach, offset);
+    _array->set_offset_array(start_card_for_region, reach, offset, reducing);
     start_card_for_region = reach + 1;
   }
   assert(start_card_for_region > end_card, "Sanity check");
@@ -211,8 +214,10 @@
     return;
   }
   guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card");
+  u_char last_entry = N_words;
   for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
     u_char entry = _array->offset_array(c);
+    guarantee(entry >= last_entry, "Monotonicity");
     if (c - start_card > power_to_cards_back(1)) {
       guarantee(entry > N_words, "Should be in logarithmic region");
     }
@@ -220,11 +225,13 @@
     size_t landing_card = c - backskip;
     guarantee(landing_card >= (start_card - 1), "Inv");
     if (landing_card >= start_card) {
-      guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
+      guarantee(_array->offset_array(landing_card) <= entry, "Monotonicity");
     } else {
-      guarantee(landing_card == start_card - 1, "Tautology");
+      guarantee(landing_card == (start_card - 1), "Tautology");
+      // Note that N_words is the maximum offset value
       guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
     }
+    last_entry = entry;  // remember for monotonicity test
   }
 }
 
@@ -243,7 +250,7 @@
 void
 BlockOffsetArray::do_block_internal(HeapWord* blk_start,
                                     HeapWord* blk_end,
-                                    Action action) {
+                                    Action action, bool reducing) {
   assert(Universe::heap()->is_in_reserved(blk_start),
          "reference must be into the heap");
   assert(Universe::heap()->is_in_reserved(blk_end-1),
@@ -275,18 +282,18 @@
     switch (action) {
       case Action_mark: {
         if (init_to_zero()) {
-          _array->set_offset_array(start_index, boundary, blk_start);
+          _array->set_offset_array(start_index, boundary, blk_start, reducing);
           break;
         } // Else fall through to the next case
       }
       case Action_single: {
-        _array->set_offset_array(start_index, boundary, blk_start);
+        _array->set_offset_array(start_index, boundary, blk_start, reducing);
         // We have finished marking the "offset card". We need to now
         // mark the subsequent cards that this blk spans.
         if (start_index < end_index) {
           HeapWord* rem_st = _array->address_for_index(start_index) + N_words;
           HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
-          set_remainder_to_point_to_start(rem_st, rem_end);
+          set_remainder_to_point_to_start(rem_st, rem_end, reducing);
         }
         break;
       }
@@ -395,7 +402,7 @@
   // Indices for starts of prefix block and suffix block.
   size_t pref_index = _array->index_for(pref_addr);
   if (_array->address_for_index(pref_index) != pref_addr) {
-    // pref_addr deos not begin pref_index
+    // pref_addr does not begin pref_index
     pref_index++;
   }
 
@@ -430,18 +437,18 @@
   if (num_suff_cards > 0) {
     HeapWord* boundary = _array->address_for_index(suff_index);
     // Set the offset card for suffix block
-    _array->set_offset_array(suff_index, boundary, suff_addr);
+    _array->set_offset_array(suff_index, boundary, suff_addr, true /* reducing */);
     // Change any further cards that need changing in the suffix
     if (num_pref_cards > 0) {
       if (num_pref_cards >= num_suff_cards) {
         // Unilaterally fix all of the suffix cards: closed card
         // index interval in args below.
-        set_remainder_to_point_to_start_incl(suff_index + 1, end_index - 1);
+        set_remainder_to_point_to_start_incl(suff_index + 1, end_index - 1, true /* reducing */);
       } else {
         // Unilaterally fix the first (num_pref_cards - 1) following
         // the "offset card" in the suffix block.
         set_remainder_to_point_to_start_incl(suff_index + 1,
-          suff_index + num_pref_cards - 1);
+          suff_index + num_pref_cards - 1, true /* reducing */);
         // Fix the appropriate cards in the remainder of the
         // suffix block -- these are the last num_pref_cards
         // cards in each power block of the "new" range plumbed
@@ -461,7 +468,7 @@
             // is non-null.
             if (left_index <= right_index) {
               _array->set_offset_array(left_index, right_index,
-                                     N_words + i - 1);
+                                     N_words + i - 1, true /* reducing */);
             } else {
               more = false; // we are done
             }
@@ -482,7 +489,7 @@
             more  = false;
           }
           assert(left_index <= right_index, "Error");
-          _array->set_offset_array(left_index, right_index, N_words + i - 1);
+          _array->set_offset_array(left_index, right_index, N_words + i - 1, true /* reducing */);
           i++;
         }
       }
@@ -501,14 +508,13 @@
 // any cards subsequent to the first one.
 void
 BlockOffsetArrayNonContigSpace::mark_block(HeapWord* blk_start,
-                                           HeapWord* blk_end) {
-  do_block_internal(blk_start, blk_end, Action_mark);
+                                           HeapWord* blk_end, bool reducing) {
+  do_block_internal(blk_start, blk_end, Action_mark, reducing);
 }
 
 HeapWord* BlockOffsetArrayNonContigSpace::block_start_unsafe(
   const void* addr) const {
   assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
-
   assert(_bottom <= addr && addr < _end,
          "addr must be covered by this Array");
   // Must read this exactly once because it can be modified by parallel
@@ -542,9 +548,10 @@
     debug_only(HeapWord* last = q);   // for debugging
     q = n;
     n += _sp->block_size(n);
+    assert(n > q, err_msg("Looping at: " INTPTR_FORMAT, n));
   }
-  assert(q <= addr, "wrong order for current and arg");
-  assert(addr <= n, "wrong order for arg and next");
+  assert(q <= addr, err_msg("wrong order for current (" INTPTR_FORMAT ") <= arg (" INTPTR_FORMAT ")", q, addr));
+  assert(addr <= n, err_msg("wrong order for arg (" INTPTR_FORMAT ") <= next (" INTPTR_FORMAT ")", addr, n));
   return q;
 }
 
@@ -727,9 +734,8 @@
   _next_offset_index = end_index + 1;
   // Calculate _next_offset_threshold this way because end_index
   // may be the last valid index in the covered region.
-  _next_offset_threshold = _array->address_for_index(end_index) +
-    N_words;
-  assert(_next_offset_threshold >= blk_end, "Incorrent offset threshold");
+  _next_offset_threshold = _array->address_for_index(end_index) + N_words;
+  assert(_next_offset_threshold >= blk_end, "Incorrect offset threshold");
 
 #ifdef ASSERT
   // The offset can be 0 if the block starts on a boundary.  That
--- a/hotspot/src/share/vm/memory/blockOffsetTable.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -107,6 +107,8 @@
     N_words = 1 << LogN_words
   };
 
+  bool _init_to_zero;
+
   // The reserved region covered by the shared array.
   MemRegion _reserved;
 
@@ -125,17 +127,28 @@
     assert(index < _vs.committed_size(), "index out of range");
     return _offset_array[index];
   }
-  void set_offset_array(size_t index, u_char offset) {
+  // An assertion-checking helper method for the set_offset_array() methods below.
+  void check_reducing_assertion(bool reducing);
+
+  void set_offset_array(size_t index, u_char offset, bool reducing = false) {
+    check_reducing_assertion(reducing);
     assert(index < _vs.committed_size(), "index out of range");
+    assert(!reducing || _offset_array[index] >= offset, "Not reducing");
     _offset_array[index] = offset;
   }
-  void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
+
+  void set_offset_array(size_t index, HeapWord* high, HeapWord* low, bool reducing = false) {
+    check_reducing_assertion(reducing);
     assert(index < _vs.committed_size(), "index out of range");
     assert(high >= low, "addresses out of order");
     assert(pointer_delta(high, low) <= N_words, "offset too large");
+    assert(!reducing || _offset_array[index] >=  (u_char)pointer_delta(high, low),
+           "Not reducing");
     _offset_array[index] = (u_char)pointer_delta(high, low);
   }
-  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset, bool reducing = false) {
+    check_reducing_assertion(reducing);
     assert(index_for(right - 1) < _vs.committed_size(),
            "right address out of range");
     assert(left  < right, "Heap addresses out of order");
@@ -150,12 +163,15 @@
       size_t i = index_for(left);
       const size_t end = i + num_cards;
       for (; i < end; i++) {
+        // Elided until CR 6977974 is fixed properly.
+        // assert(!reducing || _offset_array[i] >= offset, "Not reducing");
         _offset_array[i] = offset;
       }
     }
   }
 
-  void set_offset_array(size_t left, size_t right, u_char offset) {
+  void set_offset_array(size_t left, size_t right, u_char offset, bool reducing = false) {
+    check_reducing_assertion(reducing);
     assert(right < _vs.committed_size(), "right address out of range");
     assert(left  <= right, "indexes out of order");
     size_t num_cards = right - left + 1;
@@ -169,6 +185,8 @@
       size_t i = left;
       const size_t end = i + num_cards;
       for (; i < end; i++) {
+        // Elided until CR 6977974 is fixed properly.
+        // assert(!reducing || _offset_array[i] >= offset, "Not reducing");
         _offset_array[i] = offset;
       }
     }
@@ -212,6 +230,11 @@
 
   void set_bottom(HeapWord* new_bottom);
 
+  // Whether entries should be initialized to zero. Used currently only for
+  // error checking.
+  void set_init_to_zero(bool val) { _init_to_zero = val; }
+  bool init_to_zero() { return _init_to_zero; }
+
   // Updates all the BlockOffsetArray's sharing this shared array to
   // reflect the current "top"'s of their spaces.
   void update_offset_arrays();   // Not yet implemented!
@@ -285,17 +308,23 @@
   // initialized to point backwards to the beginning of the covered region.
   bool _init_to_zero;
 
+  // An assertion-checking helper method for the set_remainder*() methods below.
+  void check_reducing_assertion(bool reducing) { _array->check_reducing_assertion(reducing); }
+
   // Sets the entries
   // corresponding to the cards starting at "start" and ending at "end"
   // to point back to the card before "start": the interval [start, end)
-  // is right-open.
-  void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end);
+  // is right-open. The last parameter, reducing, indicates whether the
+  // updates to individual entries always reduce the entry from a higher
+  // to a lower value. (For example this would hold true during a temporal
+  // regime during which only block splits were updating the BOT.
+  void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end, bool reducing = false);
   // Same as above, except that the args here are a card _index_ interval
   // that is closed: [start_index, end_index]
-  void set_remainder_to_point_to_start_incl(size_t start, size_t end);
+  void set_remainder_to_point_to_start_incl(size_t start, size_t end, bool reducing = false);
 
   // A helper function for BOT adjustment/verification work
-  void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action);
+  void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action, bool reducing = false);
 
  public:
   // The space may not have its bottom and top set yet, which is why the
@@ -303,7 +332,7 @@
   // elements of the array are initialized to zero.  Otherwise, they are
   // initialized to point backwards to the beginning.
   BlockOffsetArray(BlockOffsetSharedArray* array, MemRegion mr,
-                   bool init_to_zero);
+                   bool init_to_zero_);
 
   // Note: this ought to be part of the constructor, but that would require
   // "this" to be passed as a parameter to a member constructor for
@@ -358,6 +387,12 @@
   // If true, initialize array slots with no allocated blocks to zero.
   // Otherwise, make them point back to the front.
   bool init_to_zero() { return _init_to_zero; }
+  // Corresponding setter
+  void set_init_to_zero(bool val) {
+    _init_to_zero = val;
+    assert(_array != NULL, "_array should be non-NULL");
+    _array->set_init_to_zero(val);
+  }
 
   // Debugging
   // Return the index of the last entry in the "active" region.
@@ -424,16 +459,16 @@
   // of BOT is touched. It is assumed (and verified in the
   // non-product VM) that the remaining cards of the block
   // are correct.
-  void mark_block(HeapWord* blk_start, HeapWord* blk_end);
-  void mark_block(HeapWord* blk, size_t size) {
-    mark_block(blk, blk + size);
+  void mark_block(HeapWord* blk_start, HeapWord* blk_end, bool reducing = false);
+  void mark_block(HeapWord* blk, size_t size, bool reducing = false) {
+    mark_block(blk, blk + size, reducing);
   }
 
   // Adjust _unallocated_block to indicate that a particular
   // block has been newly allocated or freed. It is assumed (and
   // verified in the non-product VM) that the BOT is correct for
   // the given block.
-  void allocated(HeapWord* blk_start, HeapWord* blk_end) {
+  void allocated(HeapWord* blk_start, HeapWord* blk_end, bool reducing = false) {
     // Verify that the BOT shows [blk, blk + blk_size) to be one block.
     verify_single_block(blk_start, blk_end);
     if (BlockOffsetArrayUseUnallocatedBlock) {
@@ -441,14 +476,12 @@
     }
   }
 
-  void allocated(HeapWord* blk, size_t size) {
-    allocated(blk, blk + size);
+  void allocated(HeapWord* blk, size_t size, bool reducing = false) {
+    allocated(blk, blk + size, reducing);
   }
 
   void freed(HeapWord* blk_start, HeapWord* blk_end);
-  void freed(HeapWord* blk, size_t size) {
-    freed(blk, blk + size);
-  }
+  void freed(HeapWord* blk, size_t size);
 
   HeapWord* block_start_unsafe(const void* addr) const;
 
@@ -456,7 +489,6 @@
   // start of the block that contains the given address.
   HeapWord* block_start_careful(const void* addr) const;
 
-
   // Verification & debugging: ensure that the offset table reflects
   // the fact that the block [blk_start, blk_end) or [blk, blk + size)
   // is a single block of storage. NOTE: can't const this because of
--- a/hotspot/src/share/vm/memory/blockOffsetTable.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/memory/blockOffsetTable.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,10 +55,22 @@
   return result;
 }
 
+inline void BlockOffsetSharedArray::check_reducing_assertion(bool reducing) {
+    assert(reducing || !SafepointSynchronize::is_at_safepoint() || init_to_zero() ||
+           Thread::current()->is_VM_thread() ||
+           Thread::current()->is_ConcurrentGC_thread() ||
+           ((!Thread::current()->is_ConcurrentGC_thread()) &&
+            ParGCRareEvent_lock->owned_by_self()), "Crack");
+}
 
 //////////////////////////////////////////////////////////////////////////
 // BlockOffsetArrayNonContigSpace inlines
 //////////////////////////////////////////////////////////////////////////
+inline void BlockOffsetArrayNonContigSpace::freed(HeapWord* blk,
+                                                  size_t size) {
+  freed(blk, blk + size);
+}
+
 inline void BlockOffsetArrayNonContigSpace::freed(HeapWord* blk_start,
                                                   HeapWord* blk_end) {
   // Verify that the BOT shows [blk_start, blk_end) to be one block.
--- a/hotspot/src/share/vm/oops/arrayKlassKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/arrayKlassKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -108,10 +108,6 @@
 }
 
 #ifndef SERIALGC
-void arrayKlassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->blueprint()->oop_is_arrayKlass(),"must be an array klass");
-}
-
 void arrayKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->blueprint()->oop_is_arrayKlass(),"must be an array klass");
 }
--- a/hotspot/src/share/vm/oops/compiledICHolderKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/compiledICHolderKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -120,10 +120,6 @@
 }
 
 #ifndef SERIALGC
-void compiledICHolderKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_compiledICHolder(), "must be compiledICHolder");
-}
-
 void compiledICHolderKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_compiledICHolder(), "must be compiledICHolder");
 }
--- a/hotspot/src/share/vm/oops/constMethodKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/constMethodKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -157,10 +157,6 @@
 }
 
 #ifndef SERIALGC
-void constMethodKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_constMethod(), "should be constMethod");
-}
-
 void constMethodKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_constMethod(), "should be constMethod");
 }
--- a/hotspot/src/share/vm/oops/constantPoolKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/constantPoolKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -268,21 +268,6 @@
   return cp->object_size();
 }
 
-void constantPoolKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_constantPool(), "should be constant pool");
-  constantPoolOop cp = (constantPoolOop) obj;
-  if (AnonymousClasses && cp->has_pseudo_string() && cp->tags() != NULL) {
-    oop* base = (oop*)cp->base();
-    for (int i = 0; i < cp->length(); ++i, ++base) {
-      if (cp->tag_at(i).is_string()) {
-        if (PSScavenge::should_scavenge(base)) {
-          pm->claim_or_forward_breadth(base);
-        }
-      }
-    }
-  }
-}
-
 void constantPoolKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_constantPool(), "should be constant pool");
   constantPoolOop cp = (constantPoolOop) obj;
--- a/hotspot/src/share/vm/oops/cpCacheKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/cpCacheKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -166,29 +166,6 @@
 }
 
 #ifndef SERIALGC
-void constantPoolCacheKlass::oop_copy_contents(PSPromotionManager* pm,
-                                               oop obj) {
-  assert(obj->is_constantPoolCache(), "should be constant pool");
-  if (EnableInvokeDynamic) {
-    constantPoolCacheOop cache = (constantPoolCacheOop)obj;
-    // during a scavenge, it is safe to inspect my pool, since it is perm
-    constantPoolOop pool = cache->constant_pool();
-    assert(pool->is_constantPool(), "should be constant pool");
-    if (pool->has_invokedynamic()) {
-      for (int i = 0; i < cache->length(); i++) {
-        ConstantPoolCacheEntry* e = cache->entry_at(i);
-        oop* p = (oop*)&e->_f1;
-        if (e->is_secondary_entry()) {
-          if (PSScavenge::should_scavenge(p))
-            pm->claim_or_forward_breadth(p);
-          assert(!(e->is_vfinal() && PSScavenge::should_scavenge((oop*)&e->_f2)),
-                 "no live oops here");
-        }
-      }
-    }
-  }
-}
-
 void constantPoolCacheKlass::oop_push_contents(PSPromotionManager* pm,
                                                oop obj) {
   assert(obj->is_constantPoolCache(), "should be constant pool");
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1809,18 +1809,7 @@
 }
 
 #ifndef SERIALGC
-void instanceKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  InstanceKlass_OOP_MAP_REVERSE_ITERATE( \
-    obj, \
-    if (PSScavenge::should_scavenge(p)) { \
-      pm->claim_or_forward_breadth(p); \
-    }, \
-    assert_nothing )
-}
-
 void instanceKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   InstanceKlass_OOP_MAP_REVERSE_ITERATE( \
     obj, \
     if (PSScavenge::should_scavenge(p)) { \
@@ -1846,18 +1835,7 @@
   return size_helper();
 }
 
-void instanceKlass::copy_static_fields(PSPromotionManager* pm) {
-  assert(!pm->depth_first(), "invariant");
-  InstanceKlass_OOP_ITERATE( \
-    start_of_static_fields(), static_oop_field_size(), \
-    if (PSScavenge::should_scavenge(p)) { \
-      pm->claim_or_forward_breadth(p); \
-    }, \
-    assert_nothing )
-}
-
 void instanceKlass::push_static_fields(PSPromotionManager* pm) {
-  assert(pm->depth_first(), "invariant");
   InstanceKlass_OOP_ITERATE( \
     start_of_static_fields(), static_oop_field_size(), \
     if (PSScavenge::should_scavenge(p)) { \
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -711,7 +711,6 @@
 
 #ifndef SERIALGC
   // Parallel Scavenge
-  void copy_static_fields(PSPromotionManager* pm);
   void push_static_fields(PSPromotionManager* pm);
 
   // Parallel Old
--- a/hotspot/src/share/vm/oops/instanceKlassKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlassKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -292,41 +292,7 @@
 }
 
 #ifndef SERIALGC
-void instanceKlassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  instanceKlass* ik = instanceKlass::cast(klassOop(obj));
-  ik->copy_static_fields(pm);
-
-  oop* loader_addr = ik->adr_class_loader();
-  if (PSScavenge::should_scavenge(loader_addr)) {
-    pm->claim_or_forward_breadth(loader_addr);
-  }
-
-  oop* pd_addr = ik->adr_protection_domain();
-  if (PSScavenge::should_scavenge(pd_addr)) {
-    pm->claim_or_forward_breadth(pd_addr);
-  }
-
-  oop* hk_addr = ik->adr_host_klass();
-  if (PSScavenge::should_scavenge(hk_addr)) {
-    pm->claim_or_forward_breadth(hk_addr);
-  }
-
-  oop* sg_addr = ik->adr_signers();
-  if (PSScavenge::should_scavenge(sg_addr)) {
-    pm->claim_or_forward_breadth(sg_addr);
-  }
-
-  oop* bsm_addr = ik->adr_bootstrap_method();
-  if (PSScavenge::should_scavenge(bsm_addr)) {
-    pm->claim_or_forward_breadth(bsm_addr);
-  }
-
-  klassKlass::oop_copy_contents(pm, obj);
-}
-
 void instanceKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   instanceKlass* ik = instanceKlass::cast(klassOop(obj));
   ik->push_static_fields(pm);
 
@@ -355,7 +321,7 @@
     pm->claim_or_forward_depth(bsm_addr);
   }
 
-  klassKlass::oop_copy_contents(pm, obj);
+  klassKlass::oop_push_contents(pm, obj);
 }
 
 int instanceKlassKlass::oop_update_pointers(ParCompactionManager* cm, oop obj) {
--- a/hotspot/src/share/vm/oops/instanceRefKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -273,41 +273,8 @@
 
 #ifndef SERIALGC
 template <class T>
-void specialized_oop_copy_contents(instanceRefKlass *ref,
-                                   PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
-  if (PSScavenge::should_scavenge(referent_addr)) {
-    ReferenceProcessor* rp = PSScavenge::reference_processor();
-    if (rp->discover_reference(obj, ref->reference_type())) {
-      // reference already enqueued, referent and next will be traversed later
-      ref->instanceKlass::oop_copy_contents(pm, obj);
-      return;
-    } else {
-      // treat referent as normal oop
-      pm->claim_or_forward_breadth(referent_addr);
-    }
-  }
-  // treat next as normal oop
-  T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  if (PSScavenge::should_scavenge(next_addr)) {
-    pm->claim_or_forward_breadth(next_addr);
-  }
-  ref->instanceKlass::oop_copy_contents(pm, obj);
-}
-
-void instanceRefKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  if (UseCompressedOops) {
-    specialized_oop_copy_contents<narrowOop>(this, pm, obj);
-  } else {
-    specialized_oop_copy_contents<oop>(this, pm, obj);
-  }
-}
-
-template <class T>
 void specialized_oop_push_contents(instanceRefKlass *ref,
                                    PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
   if (PSScavenge::should_scavenge(referent_addr)) {
     ReferenceProcessor* rp = PSScavenge::reference_processor();
--- a/hotspot/src/share/vm/oops/klassKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/klassKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -161,9 +161,6 @@
 }
 
 #ifndef SERIALGC
-void klassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-}
-
 void klassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
 }
 
--- a/hotspot/src/share/vm/oops/klassPS.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/klassPS.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -28,7 +28,6 @@
 
 #ifndef SERIALGC
 #define PARALLEL_GC_DECLS \
-  virtual void oop_copy_contents(PSPromotionManager* pm, oop obj);          \
   virtual void oop_push_contents(PSPromotionManager* pm, oop obj);          \
   /* Parallel Old GC support                                                \
                                                                             \
@@ -43,7 +42,6 @@
 
 // Pure virtual version for klass.hpp
 #define PARALLEL_GC_DECLS_PV \
-  virtual void oop_copy_contents(PSPromotionManager* pm, oop obj) = 0;      \
   virtual void oop_push_contents(PSPromotionManager* pm, oop obj) = 0;      \
   virtual void oop_follow_contents(ParCompactionManager* cm, oop obj) = 0;  \
   virtual int  oop_update_pointers(ParCompactionManager* cm, oop obj) = 0;  \
--- a/hotspot/src/share/vm/oops/methodDataKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/methodDataKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -154,13 +154,6 @@
 
 
 #ifndef SERIALGC
-void methodDataKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert (obj->is_methodData(), "object must be method data");
-  methodDataOop m = methodDataOop(obj);
-  // This should never point into the young gen.
-  assert(!PSScavenge::should_scavenge(m->adr_method()), "Sanity");
-}
-
 void methodDataKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert (obj->is_methodData(), "object must be method data");
   methodDataOop m = methodDataOop(obj);
--- a/hotspot/src/share/vm/oops/methodKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/methodKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -184,10 +184,6 @@
 }
 
 #ifndef SERIALGC
-void methodKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_method(), "should be method");
-}
-
 void methodKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_method(), "should be method");
 }
--- a/hotspot/src/share/vm/oops/objArrayKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -426,18 +426,7 @@
 }
 
 #ifndef SERIALGC
-void objArrayKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  assert(obj->is_objArray(), "obj must be obj array");
-  ObjArrayKlass_OOP_ITERATE( \
-    objArrayOop(obj), p, \
-    if (PSScavenge::should_scavenge(p)) { \
-      pm->claim_or_forward_breadth(p); \
-    })
-}
-
 void objArrayKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   assert(obj->is_objArray(), "obj must be obj array");
   ObjArrayKlass_OOP_ITERATE( \
     objArrayOop(obj), p, \
--- a/hotspot/src/share/vm/oops/objArrayKlassKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/objArrayKlassKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -229,10 +229,6 @@
 }
 
 #ifndef SERIALGC
-void objArrayKlassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->blueprint()->oop_is_objArrayKlass(),"must be an obj array klass");
-}
-
 void objArrayKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->blueprint()->oop_is_objArrayKlass(),"must be an obj array klass");
 }
--- a/hotspot/src/share/vm/oops/oop.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/oop.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -306,7 +306,6 @@
 
 #ifndef SERIALGC
   // Parallel Scavenge
-  void copy_contents(PSPromotionManager* pm);
   void push_contents(PSPromotionManager* pm);
 
   // Parallel Old
--- a/hotspot/src/share/vm/oops/oop.psgc.inline.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/oop.psgc.inline.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -24,15 +24,6 @@
 
 // ParallelScavengeHeap methods
 
-inline void oopDesc::copy_contents(PSPromotionManager* pm) {
-  Klass* klass = blueprint();
-  if (!klass->oop_is_typeArray()) {
-    // It might contain oops beyond the header, so take the virtual call.
-    klass->oop_copy_contents(pm, this);
-  }
-  // Else skip it.  The typeArrayKlass in the header never needs scavenging.
-}
-
 inline void oopDesc::push_contents(PSPromotionManager* pm) {
   Klass* klass = blueprint();
   if (!klass->oop_is_typeArray()) {
--- a/hotspot/src/share/vm/oops/symbolKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/symbolKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -184,10 +184,6 @@
 
 
 #ifndef SERIALGC
-void symbolKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_symbol(), "should be symbol");
-}
-
 void symbolKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_symbol(), "should be symbol");
 }
--- a/hotspot/src/share/vm/oops/typeArrayKlass.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/oops/typeArrayKlass.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -228,10 +228,6 @@
 }
 
 #ifndef SERIALGC
-void typeArrayKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_typeArray(),"must be a type array");
-}
-
 void typeArrayKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_typeArray(),"must be a type array");
 }
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -178,6 +178,9 @@
   product(bool, ReduceBulkZeroing, true,                                    \
           "When bulk-initializing, try to avoid needless zeroing")          \
                                                                             \
+  product(bool, UseFPUForSpilling, false,                                   \
+          "Spill integer registers to FPU instead of stack when possible")  \
+                                                                            \
   develop_pd(intx, RegisterCostAreaRatio,                                   \
           "Spill selection in reg allocator: scale area by (X/64K) before " \
           "adding cost")                                                    \
--- a/hotspot/src/share/vm/opto/coalesce.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/opto/coalesce.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -780,6 +780,14 @@
   // Number of bits free
   uint rm_size = rm.Size();
 
+  if (UseFPUForSpilling && rm.is_AllStack() ) {
+    // Don't coalesce when frequency difference is large
+    Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
+    Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
+    if (src_def_b->_freq > 10*dst_b->_freq )
+      return false;
+  }
+
   // If we can use any stack slot, then effective size is infinite
   if( rm.is_AllStack() ) rm_size += 1000000;
   // Incompatible masks, no way to coalesce
--- a/hotspot/src/share/vm/opto/matcher.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -456,6 +456,23 @@
   *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
    idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
 
+   if (UseFPUForSpilling) {
+     // This mask logic assumes that the spill operations are
+     // symmetric and that the registers involved are the same size.
+     // On sparc for instance we may have to use 64 bit moves will
+     // kill 2 registers when used with F0-F31.
+     idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
+     idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
+#ifdef _LP64
+     idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
+     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
+#else
+     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
+#endif
+   }
+
   // Make up debug masks.  Any spill slot plus callee-save registers.
   // Caller-save registers are assumed to be trashable by the various
   // inline-cache fixup routines.
--- a/hotspot/src/share/vm/opto/reg_split.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/opto/reg_split.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -975,6 +975,19 @@
               insidx++;  // Reset iterator to skip USE side split
               continue;
             }
+
+            if (UseFPUForSpilling && n->is_Call() && !uup && !dup ) {
+              // The use at the call can force the def down so insert
+              // a split before the use to allow the def more freedom.
+              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+              // If it wasn't split bail
+              if (!maxlrg) {
+                return 0;
+              }
+              insidx++;  // Reset iterator to skip USE side split
+              continue;
+            }
+
             // Here is the logic chart which describes USE Splitting:
             // 0 = false or DOWN, 1 = true or UP
             //
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -184,6 +184,8 @@
   { "DefaultMaxRAM",       JDK_Version::jdk_update(6,18), JDK_Version::jdk(7) },
   { "DefaultInitialRAMFraction",
                            JDK_Version::jdk_update(6,18), JDK_Version::jdk(7) },
+  { "UseDepthFirstScavengeOrder",
+                           JDK_Version::jdk_update(6,22), JDK_Version::jdk(7) },
   { NULL, JDK_Version(0), JDK_Version(0) }
 };
 
@@ -3003,10 +3005,6 @@
     CommandLineFlags::printSetFlags();
   }
 
-  if (PrintFlagsFinal) {
-    CommandLineFlags::printFlags();
-  }
-
   // Apply CPU specific policy for the BiasedLocking
   if (UseBiasedLocking) {
     if (!VM_Version::use_biased_locking() &&
--- a/hotspot/src/share/vm/runtime/frame.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/frame.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -215,17 +215,15 @@
   return !nm->is_at_poll_return(pc());
 }
 
-void frame::deoptimize(JavaThread* thread, bool thread_is_known_safe) {
-// Schedule deoptimization of an nmethod activation with this frame.
-
-  // Store the original pc before an patch (or request to self-deopt)
-  // in the published location of the frame.
-
+void frame::deoptimize(JavaThread* thread) {
+  // Schedule deoptimization of an nmethod activation with this frame.
   assert(_cb != NULL && _cb->is_nmethod(), "must be");
   nmethod* nm = (nmethod*)_cb;
 
   // This is a fix for register window patching race
-  if (NeedsDeoptSuspend && !thread_is_known_safe) {
+  if (NeedsDeoptSuspend && Thread::current() != thread) {
+    assert(SafepointSynchronize::is_at_safepoint(),
+           "patching other threads for deopt may only occur at a safepoint");
 
     // It is possible especially with DeoptimizeALot/DeoptimizeRandom that
     // we could see the frame again and ask for it to be deoptimized since
@@ -248,7 +246,11 @@
     // whether to spin or block. It isn't worth it. Just treat it like
     // native and be done with it.
     //
-    JavaThreadState state = thread->thread_state();
+    // Examine the state of the thread at the start of safepoint since
+    // threads that were in native at the start of the safepoint could
+    // come to a halt during the safepoint, changing the current value
+    // of the safepoint_state.
+    JavaThreadState state = thread->safepoint_state()->orig_thread_state();
     if (state == _thread_in_native || state == _thread_in_native_trans) {
       // Since we are at a safepoint the target thread will stop itself
       // before it can return to java as long as we remain at the safepoint.
--- a/hotspot/src/share/vm/runtime/frame.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/frame.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -174,7 +174,7 @@
   address  sender_pc() const;
 
   // Support for deoptimization
-  void deoptimize(JavaThread* thread, bool thread_is_known_safe = false);
+  void deoptimize(JavaThread* thread);
 
   // The frame's original SP, before any extension by an interpreted callee;
   // used for packing debug info into vframeArray objects and vframeArray lookup.
--- a/hotspot/src/share/vm/runtime/globals.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1712,7 +1712,7 @@
   develop(bool, VerifyBlockOffsetArray, false,                              \
           "Do (expensive!) block offset array verification")                \
                                                                             \
-  product(bool, BlockOffsetArrayUseUnallocatedBlock, trueInDebug,           \
+  product(bool, BlockOffsetArrayUseUnallocatedBlock, false,                 \
           "Maintain _unallocated_block in BlockOffsetArray"                 \
           " (currently applicable only to CMS collector)")                  \
                                                                             \
@@ -3092,10 +3092,6 @@
                                                                             \
   product(intx, SafepointSpinBeforeYield, 2000,  "(Unstable)")              \
                                                                             \
-  product(bool, UseDepthFirstScavengeOrder, true,                           \
-          "true: the scavenge order will be depth-first, "                  \
-          "false: the scavenge order will be breadth-first")                \
-                                                                            \
   product(bool, PSChunkLargeArrays, true,                                   \
           "true: process large arrays in chunks")                           \
                                                                             \
--- a/hotspot/src/share/vm/runtime/init.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/init.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -128,6 +128,12 @@
     Universe::verify();   // make sure we're starting with a clean slate
   }
 
+  // All the flags that get adjusted by VM_Version_init and os::init_2
+  // have been set so dump the flags now.
+  if (PrintFlagsFinal) {
+    CommandLineFlags::printFlags();
+  }
+
   return JNI_OK;
 }
 
--- a/hotspot/src/share/vm/runtime/orderAccess.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,8 +25,6 @@
 # include "incls/_precompiled.incl"
 # include "incls/_orderAccess.cpp.incl"
 
-volatile intptr_t OrderAccess::dummy = 0;
-
 void OrderAccess::StubRoutines_fence() {
   // Use a stub if it exists.  It may not exist during bootstrap so do
   // nothing in that case but assert if no fence code exists after threads have been created
--- a/hotspot/src/share/vm/runtime/orderAccess.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,6 +166,12 @@
 // and release must include a sequence point, usually via a volatile memory
 // access.  Other ways to guarantee a sequence point are, e.g., use of
 // indirect calls and linux's __asm__ volatile.
+// Note: as of 6973570, we have replaced the originally static "dummy" field
+// (see above) by a volatile store to the stack. All of the versions of the
+// compilers that we currently use (SunStudio, gcc and VC++) respect the
+// semantics of volatile here. If you build HotSpot using other
+// compilers, you may need to verify that no compiler reordering occurs
+// across the sequence point respresented by the volatile access.
 //
 //
 //                os::is_MP Considered Redundant
@@ -297,10 +303,6 @@
   static void     release_store_ptr_fence(volatile intptr_t* p, intptr_t v);
   static void     release_store_ptr_fence(volatile void*     p, void*    v);
 
-  // In order to force a memory access, implementations may
-  // need a volatile externally visible dummy variable.
-  static volatile intptr_t dummy;
-
  private:
   // This is a helper that invokes the StubRoutines::fence_entry()
   // routine if it exists, It should only be used by platforms that
--- a/hotspot/src/share/vm/runtime/safepoint.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/safepoint.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -782,6 +782,9 @@
 
   JavaThreadState state = _thread->thread_state();
 
+  // Save the state at the start of safepoint processing.
+  _orig_thread_state = state;
+
   // Check for a thread that is suspended. Note that thread resume tries
   // to grab the Threads_lock which we own here, so a thread cannot be
   // resumed during safepoint synchronization.
--- a/hotspot/src/share/vm/runtime/safepoint.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/safepoint.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -185,6 +185,7 @@
 
   JavaThread *                   _thread;
   volatile suspend_type          _type;
+  JavaThreadState                _orig_thread_state;
 
 
  public:
@@ -199,6 +200,7 @@
   JavaThread*  thread() const         { return _thread; }
   suspend_type type() const           { return _type; }
   bool         is_running() const     { return (_type==_running); }
+  JavaThreadState orig_thread_state() const { return _orig_thread_state; }
 
   // Support for safepoint timeout (debugging)
   bool has_called_back() const                   { return _has_called_back; }
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -2493,15 +2493,13 @@
   }
 
   // Must unlock before calling set_code
+
   // Install the generated code.
   if (nm != NULL) {
     method->set_code(method, nm);
     nm->post_compiled_method_load_event();
   } else {
     // CodeCache is full, disable compilation
-    // Ought to log this but compile log is only per compile thread
-    // and we're some non descript Java thread.
-    MutexUnlocker mu(AdapterHandlerLibrary_lock);
     CompileBroker::handle_full_code_cache();
   }
   return nm;
--- a/hotspot/src/share/vm/runtime/thread.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -2110,8 +2110,7 @@
     }
     if (f.id() == thread->must_deopt_id()) {
       thread->clear_must_deopt_id();
-      // Since we know we're safe to deopt the current state is a safe state
-      f.deoptimize(thread, true);
+      f.deoptimize(thread);
     } else {
       fatal("missed deoptimization!");
     }
--- a/hotspot/src/share/vm/services/management.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/services/management.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -1900,16 +1900,15 @@
 
   // Get the GCMemoryManager
   GCMemoryManager* mgr = get_gc_memory_manager_from_jobject(obj, CHECK);
-  if (mgr->last_gc_stat() == NULL) {
-    gc_stat->gc_index = 0;
-    return;
-  }
 
   // Make a copy of the last GC statistics
   // GC may occur while constructing the last GC information
   int num_pools = MemoryService::num_memory_pools();
   GCStatInfo* stat = new GCStatInfo(num_pools);
-  stat->copy_stat(mgr->last_gc_stat());
+  if (mgr->get_last_gc_stat(stat) == 0) {
+    gc_stat->gc_index = 0;
+    return;
+  }
 
   gc_stat->gc_index = stat->gc_index();
   gc_stat->start_time = Management::ticks_to_ms(stat->start_time());
--- a/hotspot/src/share/vm/services/memoryManager.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/services/memoryManager.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -166,17 +166,6 @@
   FREE_C_HEAP_ARRAY(MemoryUsage*, _after_gc_usage_array);
 }
 
-void GCStatInfo::copy_stat(GCStatInfo* stat) {
-  set_index(stat->gc_index());
-  set_start_time(stat->start_time());
-  set_end_time(stat->end_time());
-  assert(_usage_array_size == stat->usage_array_size(), "Must have same array size");
-  for (int i = 0; i < _usage_array_size; i++) {
-    set_before_gc_usage(i, stat->before_gc_usage_for_pool(i));
-    set_after_gc_usage(i, stat->after_gc_usage_for_pool(i));
-  }
-}
-
 void GCStatInfo::set_gc_usage(int pool_index, MemoryUsage usage, bool before_gc) {
   MemoryUsage* gc_usage_array;
   if (before_gc) {
@@ -187,67 +176,129 @@
   gc_usage_array[pool_index] = usage;
 }
 
+void GCStatInfo::clear() {
+  _index = 0;
+  _start_time = 0L;
+  _end_time = 0L;
+  size_t len = _usage_array_size * sizeof(MemoryUsage);
+  memset(_before_gc_usage_array, 0, len);
+  memset(_after_gc_usage_array, 0, len);
+}
+
+
 GCMemoryManager::GCMemoryManager() : MemoryManager() {
   _num_collections = 0;
   _last_gc_stat = NULL;
+  _last_gc_lock = new Mutex(Mutex::leaf, "_last_gc_lock", true);
+  _current_gc_stat = NULL;
   _num_gc_threads = 1;
 }
 
 GCMemoryManager::~GCMemoryManager() {
   delete _last_gc_stat;
+  delete _last_gc_lock;
+  delete _current_gc_stat;
 }
 
 void GCMemoryManager::initialize_gc_stat_info() {
   assert(MemoryService::num_memory_pools() > 0, "should have one or more memory pools");
   _last_gc_stat = new GCStatInfo(MemoryService::num_memory_pools());
+  _current_gc_stat = new GCStatInfo(MemoryService::num_memory_pools());
+  // tracking concurrent collections we need two objects: one to update, and one to
+  // hold the publicly available "last (completed) gc" information.
 }
 
-void GCMemoryManager::gc_begin() {
-  assert(_last_gc_stat != NULL, "Just checking");
-  _accumulated_timer.start();
-  _num_collections++;
-  _last_gc_stat->set_index(_num_collections);
-  _last_gc_stat->set_start_time(Management::timestamp());
+void GCMemoryManager::gc_begin(bool recordGCBeginTime, bool recordPreGCUsage,
+                               bool recordAccumulatedGCTime) {
+  assert(_last_gc_stat != NULL && _current_gc_stat != NULL, "Just checking");
+  if (recordAccumulatedGCTime) {
+    _accumulated_timer.start();
+  }
+  // _num_collections now increases in gc_end, to count completed collections
+  if (recordGCBeginTime) {
+    _current_gc_stat->set_index(_num_collections+1);
+    _current_gc_stat->set_start_time(Management::timestamp());
+  }
 
-  // Keep memory usage of all memory pools
-  for (int i = 0; i < MemoryService::num_memory_pools(); i++) {
-    MemoryPool* pool = MemoryService::get_memory_pool(i);
-    MemoryUsage usage = pool->get_memory_usage();
-    _last_gc_stat->set_before_gc_usage(i, usage);
-    HS_DTRACE_PROBE8(hotspot, mem__pool__gc__begin,
-      name(), strlen(name()),
-      pool->name(), strlen(pool->name()),
-      usage.init_size(), usage.used(),
-      usage.committed(), usage.max_size());
+  if (recordPreGCUsage) {
+    // Keep memory usage of all memory pools
+    for (int i = 0; i < MemoryService::num_memory_pools(); i++) {
+      MemoryPool* pool = MemoryService::get_memory_pool(i);
+      MemoryUsage usage = pool->get_memory_usage();
+      _current_gc_stat->set_before_gc_usage(i, usage);
+      HS_DTRACE_PROBE8(hotspot, mem__pool__gc__begin,
+        name(), strlen(name()),
+        pool->name(), strlen(pool->name()),
+        usage.init_size(), usage.used(),
+        usage.committed(), usage.max_size());
+    }
   }
 }
 
-void GCMemoryManager::gc_end() {
-  _accumulated_timer.stop();
-  _last_gc_stat->set_end_time(Management::timestamp());
-
-  int i;
-  // keep the last gc statistics for all memory pools
-  for (i = 0; i < MemoryService::num_memory_pools(); i++) {
-    MemoryPool* pool = MemoryService::get_memory_pool(i);
-    MemoryUsage usage = pool->get_memory_usage();
-
-    HS_DTRACE_PROBE8(hotspot, mem__pool__gc__end,
-      name(), strlen(name()),
-      pool->name(), strlen(pool->name()),
-      usage.init_size(), usage.used(),
-      usage.committed(), usage.max_size());
-
-    _last_gc_stat->set_after_gc_usage(i, usage);
+// A collector MUST, even if it does not complete for some reason,
+// make a TraceMemoryManagerStats object where countCollection is true,
+// to ensure the current gc stat is placed in _last_gc_stat.
+void GCMemoryManager::gc_end(bool recordPostGCUsage,
+                             bool recordAccumulatedGCTime,
+                             bool recordGCEndTime, bool countCollection) {
+  if (recordAccumulatedGCTime) {
+    _accumulated_timer.stop();
+  }
+  if (recordGCEndTime) {
+    _current_gc_stat->set_end_time(Management::timestamp());
   }
 
-  // Set last collection usage of the memory pools managed by this collector
-  for (i = 0; i < num_memory_pools(); i++) {
-    MemoryPool* pool = get_memory_pool(i);
-    MemoryUsage usage = pool->get_memory_usage();
+  if (recordPostGCUsage) {
+    int i;
+    // keep the last gc statistics for all memory pools
+    for (i = 0; i < MemoryService::num_memory_pools(); i++) {
+      MemoryPool* pool = MemoryService::get_memory_pool(i);
+      MemoryUsage usage = pool->get_memory_usage();
+
+      HS_DTRACE_PROBE8(hotspot, mem__pool__gc__end,
+        name(), strlen(name()),
+        pool->name(), strlen(pool->name()),
+        usage.init_size(), usage.used(),
+        usage.committed(), usage.max_size());
+
+      _current_gc_stat->set_after_gc_usage(i, usage);
+    }
 
-    // Compare with GC usage threshold
-    pool->set_last_collection_usage(usage);
-    LowMemoryDetector::detect_after_gc_memory(pool);
+    // Set last collection usage of the memory pools managed by this collector
+    for (i = 0; i < num_memory_pools(); i++) {
+      MemoryPool* pool = get_memory_pool(i);
+      MemoryUsage usage = pool->get_memory_usage();
+
+      // Compare with GC usage threshold
+      pool->set_last_collection_usage(usage);
+      LowMemoryDetector::detect_after_gc_memory(pool);
+    }
+  }
+  if (countCollection) {
+    _num_collections++;
+    // alternately update two objects making one public when complete
+    {
+      MutexLockerEx ml(_last_gc_lock, Mutex::_no_safepoint_check_flag);
+      GCStatInfo *tmp = _last_gc_stat;
+      _last_gc_stat = _current_gc_stat;
+      _current_gc_stat = tmp;
+      // reset the current stat for diagnosability purposes
+      _current_gc_stat->clear();
+    }
   }
 }
+
+size_t GCMemoryManager::get_last_gc_stat(GCStatInfo* dest) {
+  MutexLockerEx ml(_last_gc_lock, Mutex::_no_safepoint_check_flag);
+  if (_last_gc_stat->gc_index() != 0) {
+    dest->set_index(_last_gc_stat->gc_index());
+    dest->set_start_time(_last_gc_stat->start_time());
+    dest->set_end_time(_last_gc_stat->end_time());
+    assert(dest->usage_array_size() == _last_gc_stat->usage_array_size(),
+           "Must have same array size");
+    size_t len = dest->usage_array_size() * sizeof(MemoryUsage);
+    memcpy(dest->before_gc_usage_array(), _last_gc_stat->before_gc_usage_array(), len);
+    memcpy(dest->after_gc_usage_array(), _last_gc_stat->after_gc_usage_array(), len);
+  }
+  return _last_gc_stat->gc_index();
+}
--- a/hotspot/src/share/vm/services/memoryManager.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/services/memoryManager.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -131,6 +131,9 @@
     return _after_gc_usage_array[pool_index];
   }
 
+  MemoryUsage* before_gc_usage_array() { return _before_gc_usage_array; }
+  MemoryUsage* after_gc_usage_array()  { return _after_gc_usage_array; }
+
   void set_index(size_t index)    { _index = index; }
   void set_start_time(jlong time) { _start_time = time; }
   void set_end_time(jlong time)   { _end_time = time; }
@@ -143,7 +146,7 @@
     set_gc_usage(pool_index, usage, false /* after gc */);
   }
 
-  void copy_stat(GCStatInfo* stat);
+  void clear();
 };
 
 class GCMemoryManager : public MemoryManager {
@@ -153,6 +156,8 @@
   elapsedTimer _accumulated_timer;
   elapsedTimer _gc_timer;         // for measuring every GC duration
   GCStatInfo*  _last_gc_stat;
+  Mutex*       _last_gc_lock;
+  GCStatInfo*  _current_gc_stat;
   int          _num_gc_threads;
 public:
   GCMemoryManager();
@@ -166,11 +171,16 @@
   int    num_gc_threads()               { return _num_gc_threads; }
   void   set_num_gc_threads(int count)  { _num_gc_threads = count; }
 
-  void   gc_begin();
-  void   gc_end();
+  void   gc_begin(bool recordGCBeginTime, bool recordPreGCUsage,
+                  bool recordAccumulatedGCTime);
+  void   gc_end(bool recordPostGCUsage, bool recordAccumulatedGCTime,
+                bool recordGCEndTime, bool countCollection);
 
   void        reset_gc_stat()   { _num_collections = 0; _accumulated_timer.reset(); }
-  GCStatInfo* last_gc_stat()    { return _last_gc_stat; }
+
+  // Copy out _last_gc_stat to the given destination, returning
+  // the collection count. Zero signifies no gc has taken place.
+  size_t get_last_gc_stat(GCStatInfo* dest);
 
   virtual MemoryManager::Name kind() = 0;
 };
--- a/hotspot/src/share/vm/services/memoryService.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/services/memoryService.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -509,7 +509,10 @@
   }
 }
 
-void MemoryService::gc_begin(bool fullGC) {
+void MemoryService::gc_begin(bool fullGC, bool recordGCBeginTime,
+                             bool recordAccumulatedGCTime,
+                             bool recordPreGCUsage, bool recordPeakUsage) {
+
   GCMemoryManager* mgr;
   if (fullGC) {
     mgr = _major_gc_manager;
@@ -517,16 +520,21 @@
     mgr = _minor_gc_manager;
   }
   assert(mgr->is_gc_memory_manager(), "Sanity check");
-  mgr->gc_begin();
+  mgr->gc_begin(recordGCBeginTime, recordPreGCUsage, recordAccumulatedGCTime);
 
   // Track the peak memory usage when GC begins
-  for (int i = 0; i < _pools_list->length(); i++) {
-    MemoryPool* pool = _pools_list->at(i);
-    pool->record_peak_memory_usage();
+  if (recordPeakUsage) {
+    for (int i = 0; i < _pools_list->length(); i++) {
+      MemoryPool* pool = _pools_list->at(i);
+      pool->record_peak_memory_usage();
+    }
   }
 }
 
-void MemoryService::gc_end(bool fullGC) {
+void MemoryService::gc_end(bool fullGC, bool recordPostGCUsage,
+                           bool recordAccumulatedGCTime,
+                           bool recordGCEndTime, bool countCollection) {
+
   GCMemoryManager* mgr;
   if (fullGC) {
     mgr = (GCMemoryManager*) _major_gc_manager;
@@ -536,7 +544,8 @@
   assert(mgr->is_gc_memory_manager(), "Sanity check");
 
   // register the GC end statistics and memory usage
-  mgr->gc_end();
+  mgr->gc_end(recordPostGCUsage, recordAccumulatedGCTime, recordGCEndTime,
+              countCollection);
 }
 
 void MemoryService::oops_do(OopClosure* f) {
@@ -585,12 +594,12 @@
   return obj;
 }
 //
-// GC manager type depends on the type of Generation. Depending the space
-// availablity and vm option the gc uses major gc manager or minor gc
+// GC manager type depends on the type of Generation. Depending on the space
+// availablity and vm options the gc uses major gc manager or minor gc
 // manager or both. The type of gc manager depends on the generation kind.
-// For DefNew, ParNew and ASParNew generation doing scavange gc uses minor
-// gc manager (so _fullGC is set to false ) and for other generation kind
-// DOing mark-sweep-compact uses major gc manager (so _fullGC is set
+// For DefNew, ParNew and ASParNew generation doing scavenge gc uses minor
+// gc manager (so _fullGC is set to false ) and for other generation kinds
+// doing mark-sweep-compact uses major gc manager (so _fullGC is set
 // to true).
 TraceMemoryManagerStats::TraceMemoryManagerStats(Generation::Name kind) {
   switch (kind) {
@@ -611,13 +620,48 @@
     default:
       assert(false, "Unrecognized gc generation kind.");
   }
-  MemoryService::gc_begin(_fullGC);
+  // this has to be called in a stop the world pause and represent
+  // an entire gc pause, start to finish:
+  initialize(_fullGC, true, true, true, true, true, true, true);
+}
+TraceMemoryManagerStats::TraceMemoryManagerStats(bool fullGC,
+                                                 bool recordGCBeginTime,
+                                                 bool recordPreGCUsage,
+                                                 bool recordPeakUsage,
+                                                 bool recordPostGCUsage,
+                                                 bool recordAccumulatedGCTime,
+                                                 bool recordGCEndTime,
+                                                 bool countCollection) {
+  initialize(fullGC, recordGCBeginTime, recordPreGCUsage, recordPeakUsage,
+             recordPostGCUsage, recordAccumulatedGCTime, recordGCEndTime,
+             countCollection);
 }
-TraceMemoryManagerStats::TraceMemoryManagerStats(bool fullGC) {
+
+// for a subclass to create then initialize an instance before invoking
+// the MemoryService
+void TraceMemoryManagerStats::initialize(bool fullGC,
+                                         bool recordGCBeginTime,
+                                         bool recordPreGCUsage,
+                                         bool recordPeakUsage,
+                                         bool recordPostGCUsage,
+                                         bool recordAccumulatedGCTime,
+                                         bool recordGCEndTime,
+                                         bool countCollection) {
   _fullGC = fullGC;
-  MemoryService::gc_begin(_fullGC);
+  _recordGCBeginTime = recordGCBeginTime;
+  _recordPreGCUsage = recordPreGCUsage;
+  _recordPeakUsage = recordPeakUsage;
+  _recordPostGCUsage = recordPostGCUsage;
+  _recordAccumulatedGCTime = recordAccumulatedGCTime;
+  _recordGCEndTime = recordGCEndTime;
+  _countCollection = countCollection;
+
+  MemoryService::gc_begin(_fullGC, _recordGCBeginTime, _recordAccumulatedGCTime,
+                          _recordPreGCUsage, _recordPeakUsage);
 }
 
 TraceMemoryManagerStats::~TraceMemoryManagerStats() {
-  MemoryService::gc_end(_fullGC);
+  MemoryService::gc_end(_fullGC, _recordPostGCUsage, _recordAccumulatedGCTime,
+                        _recordGCEndTime, _countCollection);
 }
+
--- a/hotspot/src/share/vm/services/memoryService.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/services/memoryService.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -149,8 +149,13 @@
   }
   static void track_memory_pool_usage(MemoryPool* pool);
 
-  static void gc_begin(bool fullGC);
-  static void gc_end(bool fullGC);
+  static void gc_begin(bool fullGC, bool recordGCBeginTime,
+                       bool recordAccumulatedGCTime,
+                       bool recordPreGCUsage, bool recordPeakUsage);
+  static void gc_end(bool fullGC, bool recordPostGCUsage,
+                     bool recordAccumulatedGCTime,
+                     bool recordGCEndTime, bool countCollection);
+
 
   static void oops_do(OopClosure* f);
 
@@ -164,8 +169,34 @@
 class TraceMemoryManagerStats : public StackObj {
 private:
   bool         _fullGC;
+  bool         _recordGCBeginTime;
+  bool         _recordPreGCUsage;
+  bool         _recordPeakUsage;
+  bool         _recordPostGCUsage;
+  bool         _recordAccumulatedGCTime;
+  bool         _recordGCEndTime;
+  bool         _countCollection;
+
 public:
-  TraceMemoryManagerStats(bool fullGC);
+  TraceMemoryManagerStats() {}
+  TraceMemoryManagerStats(bool fullGC,
+                          bool recordGCBeginTime = true,
+                          bool recordPreGCUsage = true,
+                          bool recordPeakUsage = true,
+                          bool recordPostGCUsage = true,
+                          bool recordAccumulatedGCTime = true,
+                          bool recordGCEndTime = true,
+                          bool countCollection = true);
+
+  void initialize(bool fullGC,
+                  bool recordGCBeginTime,
+                  bool recordPreGCUsage,
+                  bool recordPeakUsage,
+                  bool recordPostGCUsage,
+                  bool recordAccumulatedGCTime,
+                  bool recordGCEndTime,
+                  bool countCollection);
+
   TraceMemoryManagerStats(Generation::Name kind);
   ~TraceMemoryManagerStats();
 };
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp	Thu Sep 09 14:27:59 2010 -0700
@@ -36,6 +36,14 @@
   "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax"
 };
 
+TaskQueueStats & TaskQueueStats::operator +=(const TaskQueueStats & addend)
+{
+  for (unsigned int i = 0; i < last_stat_id; ++i) {
+    _stats[i] += addend._stats[i];
+  }
+  return *this;
+}
+
 void TaskQueueStats::print_header(unsigned int line, outputStream* const stream,
                                   unsigned int width)
 {
@@ -71,6 +79,29 @@
   }
   #undef FMT
 }
+
+#ifdef ASSERT
+// Invariants which should hold after a TaskQueue has been emptied and is
+// quiescent; they do not hold at arbitrary times.
+void TaskQueueStats::verify() const
+{
+  assert(get(push) == get(pop) + get(steal),
+         err_msg("push=" SIZE_FORMAT " pop=" SIZE_FORMAT " steal=" SIZE_FORMAT,
+                 get(push), get(pop), get(steal)));
+  assert(get(pop_slow) <= get(pop),
+         err_msg("pop_slow=" SIZE_FORMAT " pop=" SIZE_FORMAT,
+                 get(pop_slow), get(pop)));
+  assert(get(steal) <= get(steal_attempt),
+         err_msg("steal=" SIZE_FORMAT " steal_attempt=" SIZE_FORMAT,
+                 get(steal), get(steal_attempt)));
+  assert(get(overflow) == 0 || get(push) != 0,
+         err_msg("overflow=" SIZE_FORMAT " push=" SIZE_FORMAT,
+                 get(overflow), get(push)));
+  assert(get(overflow_max_len) == 0 || get(overflow) != 0,
+         err_msg("overflow_max_len=" SIZE_FORMAT " overflow=" SIZE_FORMAT,
+                 get(overflow_max_len), get(overflow)));
+}
+#endif // ASSERT
 #endif // TASKQUEUE_STATS
 
 int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp	Fri Sep 03 12:49:53 2010 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp	Thu Sep 09 14:27:59 2010 -0700
@@ -59,15 +59,21 @@
   inline void record_steal(bool success);
   inline void record_overflow(size_t new_length);
 
+  TaskQueueStats & operator +=(const TaskQueueStats & addend);
+
   inline size_t get(StatId id) const { return _stats[id]; }
   inline const size_t* get() const   { return _stats; }
 
   inline void reset();
 
+  // Print the specified line of the header (does not include a line separator).
   static void print_header(unsigned int line, outputStream* const stream = tty,
                            unsigned int width = 10);
+  // Print the statistics (does not include a line separator).
   void print(outputStream* const stream = tty, unsigned int width = 10) const;
 
+  DEBUG_ONLY(void verify() const;)
+
 private:
   size_t                    _stats[last_stat_id];
   static const char * const _names[last_stat_id];
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6894807/IsInstanceTest.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,44 @@
+/*
+ * @test
+ * @bug 6894807
+ * @summary No ClassCastException for HashAttributeSet constructors if run with -Xcomp
+ * @compile IsInstanceTest.java
+ * @run shell Test6894807.sh
+*/
+
+public class IsInstanceTest {
+
+    public static void main(String[] args) {
+        BaseInterface baseInterfaceImpl = new BaseInterfaceImpl();
+        for (int i = 0; i < 100000; i++) {
+            if (isInstanceOf(baseInterfaceImpl, ExtendedInterface.class)) {
+                System.out.println("Failed at index:" + i);
+                System.out.println("Arch: "+System.getProperty("os.arch", "")+
+                                   " OS: "+System.getProperty("os.name", "")+
+                                   " OSV: "+System.getProperty("os.version", "")+
+                                   " Cores: "+Runtime.getRuntime().availableProcessors()+
+                                   " JVM: "+System.getProperty("java.version", "")+" "+System.getProperty("sun.arch.data.model", ""));
+                break;
+            }
+        }
+        System.out.println("Done!");
+    }
+
+    public static boolean isInstanceOf(BaseInterface baseInterfaceImpl, Class... baseInterfaceClasses) {
+        for (Class baseInterfaceClass : baseInterfaceClasses) {
+            if (baseInterfaceClass.isInstance(baseInterfaceImpl)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private interface BaseInterface {
+    }
+
+    private interface ExtendedInterface extends BaseInterface {
+    }
+
+    private static class BaseInterfaceImpl implements BaseInterface {
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/6894807/Test6894807.sh	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+if [ "${TESTSRC}" = "" ]
+then TESTSRC=.
+fi
+
+if [ "${TESTJAVA}" = "" ]
+then
+  PARENT=`dirname \`which java\``
+  TESTJAVA=`dirname ${PARENT}`
+  echo "TESTJAVA not set, selecting " ${TESTJAVA}
+  echo "If this is incorrect, try setting the variable manually."
+fi
+
+if [ "${TESTCLASSES}" = "" ]
+then
+  echo "TESTCLASSES not set.  Test cannot execute.  Failed."
+  exit 1
+fi
+
+BIT_FLAG=""
+
+# set platform-dependent variables
+OS=`uname -s`
+case "$OS" in
+  SunOS | Linux )
+    NULL=/dev/null
+    PS=":"
+    FS="/"
+    ## for solaris, linux it's HOME
+    FILE_LOCATION=$HOME
+    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
+    then
+        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT | grep -v '^#'`
+    fi
+    ;;
+  Windows_* )
+    NULL=NUL
+    PS=";"
+    FS="\\"
+    ;;
+  * )
+    echo "Unrecognized system!"
+    exit 1;
+    ;;
+esac
+
+JEMMYPATH=${CPAPPEND}
+CLASSPATH=.${PS}${TESTCLASSES}${PS}${JEMMYPATH} ; export CLASSPATH
+
+THIS_DIR=`pwd`
+
+${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -version
+
+${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -server IsInstanceTest > test.out 2>&1
+
+cat test.out
+
+grep "Failed at index" test.out
+
+if [ $? = 0 ]
+then
+    echo "Test Failed"
+    exit 1
+else
+    echo "Test Passed"
+    exit 0
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/gc/6581734/Test6581734.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Test6581734.java
+ * @bug 6581734
+ * @summary CMS Old Gen's collection usage is zero after GC which is incorrect
+ * @run main/othervm -Xmx512m -verbose:gc -XX:+UseConcMarkSweepGC Test6581734
+ *
+ */
+import java.util.*;
+import java.lang.management.*;
+
+// 6581734 states that memory pool usage via the mbean is wrong
+// for CMS (zero, even after a collection).
+//
+// 6580448 states that the collection count similarly is wrong
+// (stays at zero for CMS collections)
+// -- closed as dup of 6581734 as the same fix resolves both.
+
+
+public class Test6581734 {
+
+    private String poolName = "CMS";
+    private String collectorName = "ConcurrentMarkSweep";
+
+    public static void main(String [] args) {
+
+        Test6581734 t = null;
+        if (args.length==2) {
+            t = new Test6581734(args[0], args[1]);
+        } else {
+            System.out.println("Defaulting to monitor CMS pool and collector.");
+            t = new Test6581734();
+        }
+        t.run();
+    }
+
+    public Test6581734(String pool, String collector) {
+        poolName = pool;
+        collectorName = collector;
+    }
+
+    public Test6581734() {
+    }
+
+    public void run() {
+        // Use some memory, enough that we expect collections should
+        // have happened.
+        // Must run with options to ensure no stop the world full GC,
+        // but e.g. at least one CMS cycle.
+        allocationWork(300*1024*1024);
+        System.out.println("Done allocationWork");
+
+        // Verify some non-zero results are stored.
+        List<MemoryPoolMXBean> pools = ManagementFactory.getMemoryPoolMXBeans();
+        int poolsFound = 0;
+        int poolsWithStats = 0;
+        for (int i=0; i<pools.size(); i++) {
+            MemoryPoolMXBean pool = pools.get(i);
+            String name = pool.getName();
+            System.out.println("found pool: " + name);
+
+            if (name.contains(poolName)) {
+                long usage = pool.getCollectionUsage().getUsed();
+                System.out.println(name + ": usage after GC = " + usage);
+                poolsFound++;
+                if (usage > 0) {
+                    poolsWithStats++;
+                }
+            }
+        }
+        if (poolsFound == 0) {
+            throw new RuntimeException("No matching memory pools found: test with -XX:+UseConcMarkSweepGC");
+        }
+
+        List<GarbageCollectorMXBean> collectors = ManagementFactory.getGarbageCollectorMXBeans();
+        int collectorsFound = 0;
+        int collectorsWithTime= 0;
+        for (int i=0; i<collectors.size(); i++) {
+            GarbageCollectorMXBean collector = collectors.get(i);
+            String name = collector.getName();
+            System.out.println("found collector: " + name);
+            if (name.contains(collectorName)) {
+                collectorsFound++;
+                System.out.println(name + ": collection count = "
+                                   + collector.getCollectionCount());
+                System.out.println(name + ": collection time  = "
+                                   + collector.getCollectionTime());
+                if (collector.getCollectionCount() <= 0) {
+                    throw new RuntimeException("collection count <= 0");
+                }
+                if (collector.getCollectionTime() > 0) {
+                    collectorsWithTime++;
+                }
+            }
+        }
+        // verify:
+        if (poolsWithStats < poolsFound) {
+            throw new RuntimeException("pools found with zero stats");
+        }
+
+        if (collectorsWithTime<collectorsFound) {
+            throw new RuntimeException("collectors found with zero time";
+        }
+        System.out.println("Test passed.");
+    }
+
+    public void allocationWork(long target) {
+
+        long sizeAllocated = 0;
+        List list = new LinkedList();
+        long delay = 50;
+        long count = 0;
+
+        while (sizeAllocated < target) {
+            int size = 1024*1024;
+            byte [] alloc = new byte[size];
+            if (count % 2 == 0) {
+                list.add(alloc);
+                sizeAllocated+=size;
+                System.out.print(".");
+            }
+            try { Thread.sleep(delay); } catch (InterruptedException ie) { }
+            count++;
+        }
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/IFace.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,6 @@
+// A simple interface, to allow an unknown foreign call from a class
+// loaded with LOADER1 to a class loaded with LOADER2.
+public interface IFace {
+  public many_loader[] gen();
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/Loader2.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,52 @@
+import java.io.ByteArrayInputStream;
+import java.io.FileInputStream;
+public class Loader2 extends ClassLoader {
+  int _recur;
+  public void print( String msg ) {
+    for( int i=0; i<_recur; i++ )
+      System.out.print("  ");
+    System.out.println(">>Loader2>> "+msg);
+  }
+
+  protected Class findClass2(String name) throws ClassNotFoundException {
+    print("Fetching the implementation of "+name);
+    int old = _recur;
+    try {
+      FileInputStream fi = new FileInputStream(name+".impl2");
+      byte result[] = new byte[fi.available()];
+      fi.read(result);
+
+      print("DefineClass1 on "+name);
+      _recur++;
+      Class clazz = defineClass(name, result, 0, result.length);
+      _recur = old;
+      print("Returning newly loaded class.");
+      return clazz;
+    } catch (Exception e) {
+      _recur = old;
+      print("Not found on disk.");
+      // If we caught an exception, either the class was not found or
+      // it was unreadable by our process.
+      return null;
+      //throw new ClassNotFoundException(e.toString());
+    }
+  }
+
+  protected synchronized Class loadClass(String name, boolean resolve) throws ClassNotFoundException  {
+    // Attempt a disk load first
+    Class c = findClass2(name);
+    if( c == null ) {
+      // check if the class has already been loaded
+      print("Checking for prior loaded class "+name);
+      c = findLoadedClass(name);
+      print("Letting super-loader load "+name);
+      int old = _recur;
+      _recur++;
+      c = super.loadClass(name, false);
+      _recur=old;
+    }
+    if (resolve) { print("Resolving class "+name); resolveClass(c); }
+    print("Returning clazz "+c.getClassLoader()+":"+name);
+    return c;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/Test6626217.sh	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,101 @@
+#   
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
+#
+
+#
+# @test @(#)Test6626217.sh
+# @bug 6626217
+# @summary Loader-constraint table allows arrays instead of only the base-classes
+# @run shell Test6626217.sh
+#
+
+if [ "${TESTSRC}" = "" ]
+  then TESTSRC=.
+fi
+
+if [ "${TESTJAVA}" = "" ]
+then
+  PARENT=`dirname \`which java\``
+  TESTJAVA=`dirname ${PARENT}`
+  echo "TESTJAVA not set, selecting " ${TESTJAVA}
+  echo "If this is incorrect, try setting the variable manually."
+fi
+
+if [ "${TESTCLASSES}" = "" ]
+then
+  echo "TESTCLASSES not set.  Test cannot execute.  Failed."
+  exit 1
+fi
+
+BIT_FLAG=""
+
+# set platform-dependent variables
+OS=`uname -s`
+case "$OS" in
+  SunOS | Linux )
+    NULL=/dev/null
+    PS=":"
+    FS="/"
+    RM=/bin/rm
+    CP=/bin/cp
+    MV=/bin/mv
+    ## for solaris, linux it's HOME
+    FILE_LOCATION=$HOME
+    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" ]
+    then
+        BIT_FLAG=`cat ${FILE_LOCATION}${FS}JDK64BIT`
+    fi
+    ;;
+  Windows_* )
+    NULL=NUL
+    PS=";"
+    FS="\\"
+    RM=rm
+    CP=cp
+    MV=mv
+    ;;
+  * )
+    echo "Unrecognized system!"
+    exit 1;
+    ;;
+esac
+
+JEMMYPATH=${CPAPPEND}
+CLASSPATH=.${PS}${TESTCLASSES}${PS}${JEMMYPATH} ; export CLASSPATH
+
+THIS_DIR=`pwd`
+
+JAVA=${TESTJAVA}${FS}bin${FS}java
+JAVAC=${TESTJAVA}${FS}bin${FS}javac
+
+${JAVA} ${BIT_FLAG} -version
+
+# Current directory is scratch directory, copy all the test source there
+# (for the subsequent moves to work).
+${CP} ${TESTSRC}${FS}*  ${THIS_DIR}
+
+# A Clean Compile: this line will probably fail within jtreg as have a clean dir:
+${RM} -f *.class *.impl many_loader.java
+
+# Compile all the usual suspects, including the default 'many_loader'
+${CP} many_loader1.java.foo many_loader.java
+${JAVAC} -source 1.4 -target 1.4 -Xlint *.java
+
+# Rename the class files, so the custom loader (and not the system loader) will find it
+${MV} from_loader2.class from_loader2.impl2
+
+# Compile the next version of 'many_loader'
+${MV} many_loader.class many_loader.impl1
+${CP} many_loader2.java.foo many_loader.java
+${JAVAC} -source 1.4 -target 1.4 -Xlint many_loader.java
+
+# Rename the class file, so the custom loader (and not the system loader) will find it
+${MV} many_loader.class many_loader.impl2
+${MV} many_loader.impl1 many_loader.class
+${RM} many_loader.java
+
+${JAVA} ${BIT_FLAG} -Xverify -Xint -cp . bug_21227 >test.out 2>&1
+grep "violates loader constraints" test.out
+exit $?
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/You_Have_Been_P0wned.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,11 @@
+
+// I can cast any old thing I want to this type object:
+public class You_Have_Been_P0wned {
+  // Make a bunch of int-fields so I can peek & poke it
+  int _a;
+  int _b;
+  int _c;
+  int _d;
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/bug_21227.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,61 @@
+
+import java.lang.reflect.*;
+import java.security.*;
+
+abstract public class bug_21227 {
+
+  // Jam anything you want in here, it will be cast to a You_Have_Been_P0wned
+  public static Object _p0wnee;
+
+  public static void main(String argv[]) throws ClassNotFoundException, InstantiationException, IllegalAccessException {
+    System.out.println("Warmup");
+
+    // Make a Class 'many_loader' under the default loader
+    bug_21227 bug = new many_loader();
+
+    // Some classes under a new Loader, LOADER2, including another version of 'many_loader'
+    ClassLoader LOADER2 = new Loader2();
+    Class clazz2 = LOADER2.loadClass("from_loader2");
+    IFace iface = (IFace)clazz2.newInstance();
+
+    // Set the victim, a String of length 6
+    String s = "victim";
+    _p0wnee = s;
+
+    // Go cast '_p0wnee' to type You_Have_Been_P0wned
+    many_loader[] x2 = bug.make(iface);
+
+    many_loader b = x2[0];
+
+    // Make it clear that the runtime type many_loader (what we get from the
+    // array X2) varies from the static type of many_loader.
+    Class cl1 = b.getClass();
+    ClassLoader ld1 = cl1.getClassLoader();
+    Class cl2 = many_loader.class;
+    ClassLoader ld2 = cl2.getClassLoader();
+    System.out.println("bug.make()  "+ld1+":"+cl1);
+    System.out.println("many_loader "+ld2+":"+cl2);
+
+    // Read the victims guts out
+    You_Have_Been_P0wned q = b._p0wnee;
+    System.out.println("q._a = 0x"+Integer.toHexString(q._a));
+    System.out.println("q._b = 0x"+Integer.toHexString(q._b));
+    System.out.println("q._c = 0x"+Integer.toHexString(q._c));
+    System.out.println("q._d = 0x"+Integer.toHexString(q._d));
+
+    System.out.println("I will now crash the VM:");
+    // On 32-bit HotSpot Java6 this sets the victim String length shorter, then crashes the VM
+    //q._c = 3;
+    q._a = -1;
+
+    System.out.println(s);
+
+  }
+
+  // I need to compile (hence call in a loop) a function which returns a value
+  // loaded from classloader other than the system one.  The point of this
+  // call is to give me an abstract 'hook' into a function loaded with a
+  // foreign loader.
+  public abstract many_loader[] make( IFace iface ); // abstract factory
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/from_loader2.java	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,9 @@
+// A simple class to extend an abstract class and get loaded with different
+// loaders.  This class is loaded via LOADER2.
+public class from_loader2 implements IFace {
+  public many_loader[] gen() {
+    many_loader[] x = new many_loader[1];
+    x[0] = new many_loader();
+    return x;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/many_loader1.java.foo	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,23 @@
+// A simple class to extend an abstract class and get loaded with different
+// loaders.  This class is loaded via LOADER1.  A similar named class will
+// be loaded via LOADER2.
+public class many_loader extends bug_21227 {
+  public You_Have_Been_P0wned _p0wnee;
+
+  // I need to compile (hence call in a loop) a function which returns a value
+  // loaded from classloader other than the system one.  The point of this
+  // call is to give me an abstract 'hook' into a function loaded with a
+  // foreign loader.
+
+  // The original 'make(boolean)' returns a bug_21227.  The VM will inject a
+  // synthetic method to up-cast the returned 'from_loader1' into a
+  // 'bug_21227'.
+  public many_loader[] make( IFace iface ) { 
+    // This function needs to return a value known to be loaded from LOADER2.
+    // Since I need to use a yet different loader, I need to make an unknown
+    // foreign call.  In this case I'll be using an interface to make the
+    // unknown call, with but a single implementor so the compiler can do the
+    // upcast statically.
+    return iface==null ? null : iface.gen(); 
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/runtime/6626217/many_loader2.java.foo	Thu Sep 09 14:27:59 2010 -0700
@@ -0,0 +1,19 @@
+// A simple class to extend an abstract class and get loaded with different
+// loaders.  This class is loaded via LOADER2.  A similar named class will
+// be loaded via LOADER1.
+public class many_loader extends bug_21227 {
+  final Object _ref_to_be_p0wned;
+
+  many_loader() {
+    _ref_to_be_p0wned = bug_21227._p0wnee;
+    System.out.println("Gonna hack this thing: " + _ref_to_be_p0wned.toString() );
+  }
+
+  // I need to compile (hence call in a loop) a function which returns a value
+  // loaded from classloader other than the system one.  The point of this
+  // call is to give me an abstract 'hook' into a function loaded with a
+  // foreign loader.
+  public many_loader[] make( IFace iface ) { 
+    throw new Error("do not call me");
+  }
+}