Merge
authorjwilhelm
Thu, 16 Oct 2014 15:59:39 +0000
changeset 27156 c35391bc4c57
parent 27148 a4b542d56e01 (current diff)
parent 27155 9059987242ca (diff)
child 27170 d9327404f387
Merge
hotspot/src/share/vm/runtime/globals.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -131,7 +131,10 @@
   storage->set_mapping_changed_listener(&_listener);
 }
 
-void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) {
+void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
+  if (zero_filled) {
+    return;
+  }
   // We need to clear the bitmap on commit, removing any existing information.
   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
   _bm->clearRange(mr);
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -127,7 +127,7 @@
 
   void set_bitmap(CMBitMap* bm) { _bm = bm; }
 
-  virtual void on_commit(uint start_idx, size_t num_regions);
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 };
 
 class CMBitMap : public CMBitMapRO {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -109,7 +109,7 @@
 
 class G1BlockOffsetSharedArrayMappingChangedListener : public G1MappingChangedListener {
  public:
-  virtual void on_commit(uint start_idx, size_t num_regions) {
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
     // Nothing to do. The BOT is hard-wired to be part of the HeapRegion, and we cannot
     // retrieve it here since this would cause firing of several asserts. The code
     // executed after commit of a region already needs to do some re-initialization of
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -33,7 +33,10 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
-void G1CardCountsMappingChangedListener::on_commit(uint start_idx, size_t num_regions) {
+void G1CardCountsMappingChangedListener::on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
+  if (zero_filled) {
+    return;
+  }
   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_idx), num_regions * HeapRegion::GrainWords);
   _counts->clear_range(mr);
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CardCounts.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -42,7 +42,7 @@
  public:
   void set_cardcounts(G1CardCounts* counts) { _counts = counts; }
 
-  virtual void on_commit(uint start_idx, size_t num_regions);
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 };
 
 // Table to track the number of times a card has been refined. Once
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -389,7 +389,9 @@
   OtherRegionsTable::invalidate(start_idx, num_regions);
 }
 
-void G1RegionMappingChangedListener::on_commit(uint start_idx, size_t num_regions) {
+void G1RegionMappingChangedListener::on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
+  // The from card cache is not the memory that is actually committed. So we cannot
+  // take advantage of the zero_filled parameter.
   reset_from_card_cache(start_idx, num_regions);
 }
 
@@ -3610,7 +3612,7 @@
                                                                   cl.candidate_humongous());
   _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
 
-  if (_has_humongous_reclaim_candidates) {
+  if (_has_humongous_reclaim_candidates || G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
     clear_humongous_is_live_table();
   }
 }
@@ -4096,7 +4098,7 @@
     _hrm.verify_optional();
     verify_region_sets_optional();
 
-    TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
+    TASKQUEUE_STATS_ONLY(if (PrintTaskqueue) print_taskqueue_stats());
     TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
 
     print_heap_after_gc();
@@ -4666,7 +4668,7 @@
       _g1h->g1_policy()->record_thread_age_table(pss.age_table());
       _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
 
-      if (ParallelGCVerbose) {
+      if (PrintTerminationStats) {
         MutexLocker x(stats_lock());
         pss.print_termination_stats(worker_id);
       }
@@ -5760,7 +5762,7 @@
 
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       // The individual threads will set their evac-failure closures.
-      if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
+      if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
       // These tasks use ShareHeap::_process_strong_tasks
       assert(UseDynamicNumberOfGCThreads ||
              workers()->active_workers() == workers()->total_workers(),
@@ -6272,9 +6274,10 @@
         g1h->humongous_region_is_always_live(region_idx)) {
 
       if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
-        gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+        gclog_or_tty->print_cr("Live humongous %d region %d size "SIZE_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
                                r->is_humongous(),
                                region_idx,
+                               obj->size()*HeapWordSize,
                                r->rem_set()->occupied(),
                                r->rem_set()->strong_code_roots_list_length(),
                                next_bitmap->isMarked(r->bottom()),
@@ -6291,8 +6294,9 @@
                       r->bottom()));
 
     if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
-      gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+      gclog_or_tty->print_cr("Reclaim humongous region %d size "SIZE_FORMAT" start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
                              r->is_humongous(),
+                             obj->size()*HeapWordSize,
                              r->bottom(),
                              region_idx,
                              r->region_num(),
@@ -6331,7 +6335,8 @@
 void G1CollectedHeap::eagerly_reclaim_humongous_regions() {
   assert_at_safepoint(true);
 
-  if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) {
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC ||
+      (!_has_humongous_reclaim_candidates && !G1TraceReclaimDeadHumongousObjectsAtYoungGC)) {
     g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0);
     return;
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -172,7 +172,7 @@
  private:
   void reset_from_card_cache(uint start_idx, size_t num_regions);
  public:
-  virtual void on_commit(uint start_idx, size_t num_regions);
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 };
 
 class G1CollectedHeap : public SharedHeap {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -69,7 +69,7 @@
   virtual void commit_regions(uintptr_t start_idx, size_t num_regions) {
     _storage.commit(start_idx * _pages_per_region, num_regions * _pages_per_region);
     _commit_map.set_range(start_idx, start_idx + num_regions);
-    fire_on_commit(start_idx, num_regions);
+    fire_on_commit(start_idx, num_regions, true);
   }
 
   virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions) {
@@ -115,12 +115,14 @@
       assert(!_commit_map.at(i), err_msg("Trying to commit storage at region "INTPTR_FORMAT" that is already committed", i));
       uintptr_t idx = region_idx_to_page_idx(i);
       uint old_refcount = _refcounts.get_by_index(idx);
+      bool zero_filled = false;
       if (old_refcount == 0) {
         _storage.commit(idx, 1);
+        zero_filled = true;
       }
       _refcounts.set_by_index(idx, old_refcount + 1);
       _commit_map.set_bit(i);
-      fire_on_commit(i, 1);
+      fire_on_commit(i, 1, zero_filled);
     }
   }
 
@@ -139,9 +141,9 @@
   }
 };
 
-void G1RegionToSpaceMapper::fire_on_commit(uint start_idx, size_t num_regions) {
+void G1RegionToSpaceMapper::fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
   if (_listener != NULL) {
-    _listener->on_commit(start_idx, num_regions);
+    _listener->on_commit(start_idx, num_regions, zero_filled);
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -33,7 +33,9 @@
  public:
   // Fired after commit of the memory, i.e. the memory this listener is registered
   // for can be accessed.
-  virtual void on_commit(uint start_idx, size_t num_regions) = 0;
+  // Zero_filled indicates that the memory can be considered as filled with zero bytes
+  // when called.
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled) = 0;
 };
 
 // Maps region based commit/uncommit requests to the underlying page sized virtual
@@ -51,7 +53,7 @@
 
   G1RegionToSpaceMapper(ReservedSpace rs, size_t commit_granularity, size_t region_granularity, MemoryType type);
 
-  void fire_on_commit(uint start_idx, size_t num_regions);
+  void fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled);
  public:
   MemRegion reserved() { return _storage.reserved(); }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -125,7 +125,8 @@
 }
 #endif
 
-void G1SATBCardTableLoggingModRefBSChangedListener::on_commit(uint start_idx, size_t num_regions) {
+void G1SATBCardTableLoggingModRefBSChangedListener::on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
+  // Default value for a clean card on the card table is -1. So we cannot take advantage of the zero_filled parameter.
   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_idx), num_regions * HeapRegion::GrainWords);
   _card_table->clear(mr);
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -136,7 +136,7 @@
 
   void set_card_table(G1SATBCardTableLoggingModRefBS* card_table) { _card_table = card_table; }
 
-  virtual void on_commit(uint start_idx, size_t num_regions);
+  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 };
 
 // Adds card-table logging to the post-barrier.
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -1065,10 +1065,8 @@
     gch->print_heap_change(gch_prev_used);
   }
 
-  if (PrintGCDetails && ParallelGCVerbose) {
-    TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
-    TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
-  }
+  TASKQUEUE_STATS_ONLY(if (PrintTerminationStats) thread_state_set.print_termination_stats());
+  TASKQUEUE_STATS_ONLY(if (PrintTaskqueue) thread_state_set.print_taskqueue_stats());
 
   if (UseAdaptiveSizePolicy) {
     size_policy->minor_collection_end(gch->gc_cause());
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -91,7 +91,7 @@
 bool PSPromotionManager::post_scavenge(YoungGCTracer& gc_tracer) {
   bool promotion_failure_occurred = false;
 
-  TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats());
+  TASKQUEUE_STATS_ONLY(if (PrintTaskqueue) print_taskqueue_stats());
   for (uint i = 0; i < ParallelGCThreads + 1; i++) {
     PSPromotionManager* manager = manager_array(i);
     assert(manager->claimed_stack_depth()->is_empty(), "should be empty");
@@ -106,16 +106,9 @@
 
 #if TASKQUEUE_STATS
 void
-PSPromotionManager::print_taskqueue_stats(uint i) const {
-  tty->print("%3u ", i);
-  _claimed_stack_depth.stats.print();
-  tty->cr();
-}
-
-void
-PSPromotionManager::print_local_stats(uint i) const {
+PSPromotionManager::print_local_stats(outputStream* const out, uint i) const {
   #define FMT " " SIZE_FORMAT_W(10)
-  tty->print_cr("%3u" FMT FMT FMT FMT, i, _masked_pushes, _masked_steals,
+  out->print_cr("%3u" FMT FMT FMT FMT, i, _masked_pushes, _masked_steals,
                 _arrays_chunked, _array_chunks_processed);
   #undef FMT
 }
@@ -127,20 +120,24 @@
 };
 
 void
-PSPromotionManager::print_stats() {
-  tty->print_cr("== GC Tasks Stats, GC %3d",
+PSPromotionManager::print_taskqueue_stats(outputStream* const out) {
+  out->print_cr("== GC Tasks Stats, GC %3d",
                 Universe::heap()->total_collections());
 
-  tty->print("thr "); TaskQueueStats::print_header(1); tty->cr();
-  tty->print("--- "); TaskQueueStats::print_header(2); tty->cr();
+  TaskQueueStats totals;
+  out->print("thr "); TaskQueueStats::print_header(1, out); out->cr();
+  out->print("--- "); TaskQueueStats::print_header(2, out); out->cr();
   for (uint i = 0; i < ParallelGCThreads + 1; ++i) {
-    manager_array(i)->print_taskqueue_stats(i);
+    TaskQueueStats& next = manager_array(i)->_claimed_stack_depth.stats;
+    out->print("%3d ", i); next.print(out); out->cr();
+    totals += next;
   }
+  out->print("tot "); totals.print(out); out->cr();
 
   const uint hlines = sizeof(pm_stats_hdr) / sizeof(pm_stats_hdr[0]);
-  for (uint i = 0; i < hlines; ++i) tty->print_cr("%s", pm_stats_hdr[i]);
+  for (uint i = 0; i < hlines; ++i) out->print_cr("%s", pm_stats_hdr[i]);
   for (uint i = 0; i < ParallelGCThreads + 1; ++i) {
-    manager_array(i)->print_local_stats(i);
+    manager_array(i)->print_local_stats(out, i);
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -68,9 +68,8 @@
   size_t                              _arrays_chunked;
   size_t                              _array_chunks_processed;
 
-  void print_taskqueue_stats(uint i) const;
-  void print_local_stats(uint i) const;
-  static void print_stats();
+  void print_local_stats(outputStream* const out, uint i) const;
+  static void print_taskqueue_stats(outputStream* const out = gclog_or_tty);
 
   void reset_stats();
 #endif // TASKQUEUE_STATS
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Thu Oct 16 15:59:39 2014 +0000
@@ -3826,6 +3826,110 @@
   // Final sync IdealKit and GraphKit.
   final_sync(ideal);
 }
+/*
+ * Determine if the G1 pre-barrier can be removed. The pre-barrier is
+ * required by SATB to make sure all objects live at the start of the
+ * marking are kept alive, all reference updates need to any previous
+ * reference stored before writing.
+ *
+ * If the previous value is NULL there is no need to save the old value.
+ * References that are NULL are filtered during runtime by the barrier
+ * code to avoid unnecessary queuing.
+ *
+ * However in the case of newly allocated objects it might be possible to
+ * prove that the reference about to be overwritten is NULL during compile
+ * time and avoid adding the barrier code completely.
+ *
+ * The compiler needs to determine that the object in which a field is about
+ * to be written is newly allocated, and that no prior store to the same field
+ * has happened since the allocation.
+ *
+ * Returns true if the pre-barrier can be removed
+ */
+bool GraphKit::g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr,
+                                         BasicType bt, uint adr_idx) {
+  intptr_t offset = 0;
+  Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase);
+
+  if (offset == Type::OffsetBot) {
+    return false; // cannot unalias unless there are precise offsets
+  }
+
+  if (alloc == NULL) {
+    return false; // No allocation found
+  }
+
+  intptr_t size_in_bytes = type2aelembytes(bt);
+
+  Node* mem = memory(adr_idx); // start searching here...
+
+  for (int cnt = 0; cnt < 50; cnt++) {
+
+    if (mem->is_Store()) {
+
+      Node* st_adr = mem->in(MemNode::Address);
+      intptr_t st_offset = 0;
+      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
+
+      if (st_base == NULL) {
+        break; // inscrutable pointer
+      }
+
+      // Break we have found a store with same base and offset as ours so break
+      if (st_base == base && st_offset == offset) {
+        break;
+      }
+
+      if (st_offset != offset && st_offset != Type::OffsetBot) {
+        const int MAX_STORE = BytesPerLong;
+        if (st_offset >= offset + size_in_bytes ||
+            st_offset <= offset - MAX_STORE ||
+            st_offset <= offset - mem->as_Store()->memory_size()) {
+          // Success:  The offsets are provably independent.
+          // (You may ask, why not just test st_offset != offset and be done?
+          // The answer is that stores of different sizes can co-exist
+          // in the same sequence of RawMem effects.  We sometimes initialize
+          // a whole 'tile' of array elements with a single jint or jlong.)
+          mem = mem->in(MemNode::Memory);
+          continue; // advance through independent store memory
+        }
+      }
+
+      if (st_base != base
+          && MemNode::detect_ptr_independence(base, alloc, st_base,
+                                              AllocateNode::Ideal_allocation(st_base, phase),
+                                              phase)) {
+        // Success:  The bases are provably independent.
+        mem = mem->in(MemNode::Memory);
+        continue; // advance through independent store memory
+      }
+    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+
+      InitializeNode* st_init = mem->in(0)->as_Initialize();
+      AllocateNode* st_alloc = st_init->allocation();
+
+      // Make sure that we are looking at the same allocation site.
+      // The alloc variable is guaranteed to not be null here from earlier check.
+      if (alloc == st_alloc) {
+        // Check that the initialization is storing NULL so that no previous store
+        // has been moved up and directly write a reference
+        Node* captured_store = st_init->find_captured_store(offset,
+                                                            type2aelembytes(T_OBJECT),
+                                                            phase);
+        if (captured_store == NULL || captured_store == st_init->zero_memory()) {
+          return true;
+        }
+      }
+    }
+
+    // Unless there is an explicit 'continue', we must bail out here,
+    // because 'mem' is an inscrutable memory state (e.g., a call).
+    break;
+  }
+
+  return false;
+}
 
 // G1 pre/post barriers
 void GraphKit::g1_write_barrier_pre(bool do_load,
@@ -3846,6 +3950,12 @@
     assert(adr != NULL, "where are loading from?");
     assert(pre_val == NULL, "loaded already?");
     assert(val_type != NULL, "need a type");
+
+    if (use_ReduceInitialCardMarks()
+        && g1_can_remove_pre_barrier(&_gvn, adr, bt, alias_idx)) {
+      return;
+    }
+
   } else {
     // In this case both val_type and alias_idx are unused.
     assert(pre_val != NULL, "must be loaded already");
@@ -3927,6 +4037,65 @@
   final_sync(ideal);
 }
 
+/*
+ * G1 similar to any GC with a Young Generation requires a way to keep track of
+ * references from Old Generation to Young Generation to make sure all live
+ * objects are found. G1 also requires to keep track of object references
+ * between different regions to enable evacuation of old regions, which is done
+ * as part of mixed collections. References are tracked in remembered sets and
+ * is continuously updated as reference are written to with the help of the
+ * post-barrier.
+ *
+ * To reduce the number of updates to the remembered set the post-barrier
+ * filters updates to fields in objects located in the Young Generation,
+ * the same region as the reference, when the NULL is being written or
+ * if the card is already marked as dirty by an earlier write.
+ *
+ * Under certain circumstances it is possible to avoid generating the
+ * post-barrier completely if it is possible during compile time to prove
+ * the object is newly allocated and that no safepoint exists between the
+ * allocation and the store.
+ *
+ * In the case of slow allocation the allocation code must handle the barrier
+ * as part of the allocation in the case the allocated object is not located
+ * in the nursery, this would happen for humongous objects. This is similar to
+ * how CMS is required to handle this case, see the comments for the method
+ * CollectedHeap::new_store_pre_barrier and OptoRuntime::new_store_pre_barrier.
+ * A deferred card mark is required for these objects and handled in the above
+ * mentioned methods.
+ *
+ * Returns true if the post barrier can be removed
+ */
+bool GraphKit::g1_can_remove_post_barrier(PhaseTransform* phase, Node* store,
+                                          Node* adr) {
+  intptr_t      offset = 0;
+  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
+
+  if (offset == Type::OffsetBot) {
+    return false; // cannot unalias unless there are precise offsets
+  }
+
+  if (alloc == NULL) {
+     return false; // No allocation found
+  }
+
+  // Start search from Store node
+  Node* mem = store->in(MemNode::Control);
+  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+
+    InitializeNode* st_init = mem->in(0)->as_Initialize();
+    AllocateNode*  st_alloc = st_init->allocation();
+
+    // Make sure we are looking at the same allocation
+    if (alloc == st_alloc) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 //
 // Update the card table and add card address to the queue
 //
@@ -3979,6 +4148,20 @@
     return;
   }
 
+  if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) {
+    // We can skip marks on a freshly-allocated object in Eden.
+    // Keep this code in sync with new_store_pre_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
+    return;
+  }
+
+  if (use_ReduceInitialCardMarks()
+      && g1_can_remove_post_barrier(&_gvn, oop_store, adr)) {
+    return;
+  }
+
   if (!use_precise) {
     // All card marks for a (non-array) instance are in one place:
     adr = obj;
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -771,6 +771,10 @@
                     Node* index, Node* index_adr,
                     Node* buffer, const TypeFunc* tf);
 
+  bool g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr, BasicType bt, uint adr_idx);
+
+  bool g1_can_remove_post_barrier(PhaseTransform* phase, Node* store, Node* adr);
+
   public:
   // Helper function to round double arguments before a call
   void round_double_arguments(ciMethod* dest_method);
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Oct 16 15:59:39 2014 +0000
@@ -1533,8 +1533,11 @@
   product(bool, UseParNewGC, false,                                         \
           "Use parallel threads in the new generation")                     \
                                                                             \
-  product(bool, ParallelGCVerbose, false,                                   \
-          "Verbose output for parallel gc")                                 \
+  product(bool, PrintTaskqueue, false,                                      \
+          "Print taskqueue statistics for parallel collectors")             \
+                                                                            \
+  product(bool, PrintTerminationStats, false,                               \
+          "Print termination statistics for parallel collectors")           \
                                                                             \
   product(uintx, ParallelGCBufferWastePct, 10,                              \
           "Wasted fraction of parallel allocation buffer")                  \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/gc/g1/TestG1TraceReclaimDeadHumongousObjectsAtYoungGC.java	Thu Oct 16 15:59:39 2014 +0000
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestG1TraceReclaimDeadHumongousObjectsAtYoungGC
+ * @bug 8058801
+ * @summary Ensure that the output for a G1TraceReclaimDeadHumongousObjectsAtYoungGC
+ * includes the expected necessary messages.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import java.util.LinkedList;
+
+public class TestG1TraceReclaimDeadHumongousObjectsAtYoungGC {
+  public static void main(String[] args) throws Exception {
+    testGCLogs();
+    testHumongousObjectGCLogs();
+  }
+
+  private static void testGCLogs() throws Exception {
+
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-Xms128M",
+                                               "-Xmx128M",
+                                               "-Xmn16M",
+                                               "-XX:G1HeapRegionSize=1M",
+                                               "-XX:+PrintGC",
+                                               "-XX:+UnlockExperimentalVMOptions",
+                                               "-XX:G1LogLevel=finest",
+                                               "-XX:+G1TraceReclaimDeadHumongousObjectsAtYoungGC",
+                                               GCTest.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+    // As G1ReclaimDeadHumongousObjectsAtYoungGC is set(default), below logs should be displayed.
+    // And GCTest doesn't have humongous objects, so values should be zero.
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldContain("[Humongous Total: 0]");
+    output.shouldContain("[Humongous Candidate: 0]");
+    output.shouldContain("[Humongous Reclaimed: 0]");
+
+    output.shouldHaveExitValue(0);
+  }
+
+  private static void testHumongousObjectGCLogs() throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-Xms128M",
+                                               "-Xmx128M",
+                                               "-Xmn16M",
+                                               "-XX:G1HeapRegionSize=1M",
+                                               "-XX:+PrintGC",
+                                               "-XX:+UnlockExperimentalVMOptions",
+                                               "-XX:G1LogLevel=finest",
+                                               "-XX:+G1TraceReclaimDeadHumongousObjectsAtYoungGC",
+                                               GCWithHumongousObjectTest.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+    // As G1ReclaimDeadHumongousObjectsAtYoungGC is set(default), below logs should be displayed.
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldContain("[Humongous Total");
+    output.shouldContain("[Humongous Candidate");
+    output.shouldContain("[Humongous Reclaimed");
+
+    // As G1TraceReclaimDeadHumongousObjectsAtYoungGC is set and GCWithHumongousObjectTest has humongous objects,
+    // these logs should be displayed.
+    output.shouldContain("Live humongous");
+    output.shouldContain("Reclaim humongous region");
+    output.shouldHaveExitValue(0);
+  }
+
+  static class GCTest {
+    private static byte[] garbage;
+
+    public static void main(String [] args) {
+      System.out.println("Creating garbage");
+      // create 128MB of garbage. This should result in at least one GC
+      for (int i = 0; i < 1024; i++) {
+        garbage = new byte[128 * 1024];
+      }
+      System.out.println("Done");
+    }
+  }
+
+  static class GCWithHumongousObjectTest {
+
+    public static final int M = 1024*1024;
+    public static LinkedList<Object> garbageList = new LinkedList<Object>();
+    // A large object referenced by a static.
+    static int[] filler = new int[10 * M];
+
+    public static void genGarbage() {
+      for (int i = 0; i < 32*1024; i++) {
+        garbageList.add(new int[100]);
+      }
+      garbageList.clear();
+    }
+
+    public static void main(String[] args) {
+
+      int[] large = new int[M];
+      Object ref = large;
+
+      System.out.println("Creating garbage");
+      for (int i = 0; i < 100; i++) {
+        // A large object that will be reclaimed eagerly.
+        large = new int[6*M];
+        genGarbage();
+        // Make sure that the compiler cannot completely remove
+        // the allocation of the large object until here.
+        System.out.println(large);
+      }
+
+      // Keep the reference to the first object alive.
+      System.out.println(ref);
+      System.out.println("Done");
+    }
+  }
+}
--- a/hotspot/test/gc/g1/TestSummarizeRSetStatsThreads.java	Wed Oct 15 14:00:41 2014 +0200
+++ b/hotspot/test/gc/g1/TestSummarizeRSetStatsThreads.java	Thu Oct 16 15:59:39 2014 +0000
@@ -53,8 +53,8 @@
 
     // a zero in refinement thread numbers indicates that the value in ParallelGCThreads should be used.
     // Additionally use at least one thread.
-    int expectedNumRefinementThreads = refinementThreads == 0 ? workerThreads : refinementThreads;
-    expectedNumRefinementThreads = Math.max(1, expectedNumRefinementThreads);
+    int expectedNumRefinementThreads = refinementThreads;
+
     // create the pattern made up of n copies of a floating point number pattern
     String numberPattern = String.format("%0" + expectedNumRefinementThreads + "d", 0)
       .replace("0", "\\s+\\d+\\.\\d+");
@@ -73,9 +73,9 @@
       return;
     }
     // different valid combinations of number of refinement and gc worker threads
-    runTest(0, 0);
-    runTest(0, 5);
-    runTest(5, 0);
+    runTest(1, 1);
+    runTest(1, 5);
+    runTest(5, 1);
     runTest(10, 10);
     runTest(1, 2);
     runTest(4, 3);