Merge
authorkamg
Fri, 06 May 2011 11:25:16 -0400
changeset 9433 474a6834bc11
parent 9432 004656c45380 (current diff)
parent 9426 b1ae0063e63a (diff)
child 9450 80b5a03ca4c1
Merge
hotspot/src/share/vm/runtime/arguments.cpp
hotspot/src/share/vm/runtime/globals.hpp
--- a/hotspot/src/os/linux/vm/globals_linux.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/os/linux/vm/globals_linux.hpp	Fri May 06 11:25:16 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@
 // Defines Linux-specific default values. The flags are available on all
 // platforms, but they may have different default values on other platforms.
 //
-define_pd_global(bool, UseLargePages, false);
+define_pd_global(bool, UseLargePages, true);
 define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Fri May 06 11:25:16 2011 -0400
@@ -2914,16 +2914,21 @@
 
 static size_t _large_page_size = 0;
 
-bool os::large_page_init() {
+void os::large_page_init() {
   if (!UseLargePages) {
     UseHugeTLBFS = false;
     UseSHM = false;
-    return false;
+    return;
   }
 
   if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
-    // Our user has not expressed a preference, so we'll try both.
-    UseHugeTLBFS = UseSHM = true;
+    // If UseLargePages is specified on the command line try both methods,
+    // if it's default, then try only HugeTLBFS.
+    if (FLAG_IS_DEFAULT(UseLargePages)) {
+      UseHugeTLBFS = true;
+    } else {
+      UseHugeTLBFS = UseSHM = true;
+    }
   }
 
   if (LargePageSizeInBytes) {
@@ -2978,7 +2983,6 @@
     _page_sizes[1] = default_page_size;
     _page_sizes[2] = 0;
   }
-
   UseHugeTLBFS = UseHugeTLBFS &&
                  Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
 
@@ -2988,12 +2992,6 @@
   UseLargePages = UseHugeTLBFS || UseSHM;
 
   set_coredump_filter();
-
-  // Large page support is available on 2.6 or newer kernel, some vendors
-  // (e.g. Redhat) have backported it to their 2.4 based distributions.
-  // We optimistically assume the support is available. If later it turns out
-  // not true, VM will automatically switch to use regular page size.
-  return true;
 }
 
 #ifndef SHM_HUGETLB
@@ -4118,7 +4116,7 @@
 #endif
   }
 
-  FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
+  os::large_page_init();
 
   // initialize suspend/resume support - must do this before signal_sets_init()
   if (SR_initialize() != 0) {
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp	Fri May 06 11:25:16 2011 -0400
@@ -3336,11 +3336,11 @@
   return true;
 }
 
-bool os::large_page_init() {
+void os::large_page_init() {
   if (!UseLargePages) {
     UseISM = false;
     UseMPSS = false;
-    return false;
+    return;
   }
 
   // print a warning if any large page related flag is specified on command line
@@ -3361,7 +3361,6 @@
             Solaris::mpss_sanity_check(warn_on_failure, &_large_page_size);
 
   UseLargePages = UseISM || UseMPSS;
-  return UseLargePages;
 }
 
 bool os::Solaris::set_mpss_range(caddr_t start, size_t bytes, size_t align) {
@@ -4992,7 +4991,7 @@
 #endif
 }
 
-  FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
+  os::large_page_init();
 
   // Check minimum allowable stack size for thread creation and to initialize
   // the java system classes, including StackOverflowError - depends on page
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Fri May 06 11:25:16 2011 -0400
@@ -2762,8 +2762,8 @@
   _hToken = NULL;
 }
 
-bool os::large_page_init() {
-  if (!UseLargePages) return false;
+void os::large_page_init() {
+  if (!UseLargePages) return;
 
   // print a warning if any large page related flag is specified on command line
   bool warn_on_failure = !FLAG_IS_DEFAULT(UseLargePages) ||
@@ -2808,7 +2808,7 @@
   }
 
   cleanup_after_large_page_init();
-  return success;
+  UseLargePages = success;
 }
 
 // On win32, one cannot release just a part of reserved memory, it's an
@@ -3561,7 +3561,7 @@
 #endif
 }
 
-  FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
+  os::large_page_init();
 
   // Setup Windows Exceptions
 
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri May 06 11:25:16 2011 -0400
@@ -826,6 +826,14 @@
 void ConcurrentMark::checkpointRootsInitialPost() {
   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 
+  // If we force an overflow during remark, the remark operation will
+  // actually abort and we'll restart concurrent marking. If we always
+  // force an oveflow during remark we'll never actually complete the
+  // marking phase. So, we initilize this here, at the start of the
+  // cycle, so that at the remaining overflow number will decrease at
+  // every remark and we'll eventually not need to cause one.
+  force_overflow_stw()->init();
+
   // For each region note start of marking.
   NoteStartOfMarkHRClosure startcl;
   g1h->heap_region_iterate(&startcl);
@@ -893,27 +901,37 @@
 }
 
 /*
-   Notice that in the next two methods, we actually leave the STS
-   during the barrier sync and join it immediately afterwards. If we
-   do not do this, this then the following deadlock can occur: one
-   thread could be in the barrier sync code, waiting for the other
-   thread to also sync up, whereas another one could be trying to
-   yield, while also waiting for the other threads to sync up too.
-
-   Because the thread that does the sync barrier has left the STS, it
-   is possible to be suspended for a Full GC or an evacuation pause
-   could occur. This is actually safe, since the entering the sync
-   barrier is one of the last things do_marking_step() does, and it
-   doesn't manipulate any data structures afterwards.
-*/
+ * Notice that in the next two methods, we actually leave the STS
+ * during the barrier sync and join it immediately afterwards. If we
+ * do not do this, the following deadlock can occur: one thread could
+ * be in the barrier sync code, waiting for the other thread to also
+ * sync up, whereas another one could be trying to yield, while also
+ * waiting for the other threads to sync up too.
+ *
+ * Note, however, that this code is also used during remark and in
+ * this case we should not attempt to leave / enter the STS, otherwise
+ * we'll either hit an asseert (debug / fastdebug) or deadlock
+ * (product). So we should only leave / enter the STS if we are
+ * operating concurrently.
+ *
+ * Because the thread that does the sync barrier has left the STS, it
+ * is possible to be suspended for a Full GC or an evacuation pause
+ * could occur. This is actually safe, since the entering the sync
+ * barrier is one of the last things do_marking_step() does, and it
+ * doesn't manipulate any data structures afterwards.
+ */
 
 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   if (verbose_low())
     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 
-  ConcurrentGCThread::stsLeave();
+  if (concurrent()) {
+    ConcurrentGCThread::stsLeave();
+  }
   _first_overflow_barrier_sync.enter();
-  ConcurrentGCThread::stsJoin();
+  if (concurrent()) {
+    ConcurrentGCThread::stsJoin();
+  }
   // at this point everyone should have synced up and not be doing any
   // more work
 
@@ -923,7 +941,12 @@
   // let task 0 do this
   if (task_num == 0) {
     // task 0 is responsible for clearing the global data structures
-    clear_marking_state();
+    // We should be here because of an overflow. During STW we should
+    // not clear the overflow flag since we rely on it being true when
+    // we exit this method to abort the pause and restart concurent
+    // marking.
+    clear_marking_state(concurrent() /* clear_overflow */);
+    force_overflow()->update();
 
     if (PrintGC) {
       gclog_or_tty->date_stamp(PrintGCDateStamps);
@@ -940,15 +963,45 @@
   if (verbose_low())
     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 
-  ConcurrentGCThread::stsLeave();
+  if (concurrent()) {
+    ConcurrentGCThread::stsLeave();
+  }
   _second_overflow_barrier_sync.enter();
-  ConcurrentGCThread::stsJoin();
+  if (concurrent()) {
+    ConcurrentGCThread::stsJoin();
+  }
   // at this point everything should be re-initialised and ready to go
 
   if (verbose_low())
     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 }
 
+#ifndef PRODUCT
+void ForceOverflowSettings::init() {
+  _num_remaining = G1ConcMarkForceOverflow;
+  _force = false;
+  update();
+}
+
+void ForceOverflowSettings::update() {
+  if (_num_remaining > 0) {
+    _num_remaining -= 1;
+    _force = true;
+  } else {
+    _force = false;
+  }
+}
+
+bool ForceOverflowSettings::should_force() {
+  if (_force) {
+    _force = false;
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif // !PRODUCT
+
 void ConcurrentMark::grayRoot(oop p) {
   HeapWord* addr = (HeapWord*) p;
   // We can't really check against _heap_start and _heap_end, since it
@@ -1117,6 +1170,7 @@
   _restart_for_overflow = false;
 
   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+  force_overflow_conc()->init();
   set_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
@@ -1845,7 +1899,7 @@
   while (!_cleanup_list.is_empty()) {
     HeapRegion* hr = _cleanup_list.remove_head();
     assert(hr != NULL, "the list was not empty");
-    hr->rem_set()->clear();
+    hr->par_clear();
     tmp_free_list.add_as_tail(hr);
 
     // Instead of adding one region at a time to the secondary_free_list,
@@ -2703,12 +2757,16 @@
 
 }
 
-void ConcurrentMark::clear_marking_state() {
+void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
   _markStack.clear_overflow();
   _regionStack.setEmpty();
   _regionStack.clear_overflow();
-  clear_has_overflown();
+  if (clear_overflow) {
+    clear_has_overflown();
+  } else {
+    assert(has_overflown(), "pre-condition");
+  }
   _finger = _heap_start;
 
   for (int i = 0; i < (int)_max_task_num; ++i) {
@@ -4279,6 +4337,15 @@
     }
   }
 
+  // If we are about to wrap up and go into termination, check if we
+  // should raise the overflow flag.
+  if (do_termination && !has_aborted()) {
+    if (_cm->force_overflow()->should_force()) {
+      _cm->set_has_overflown();
+      regular_clock_call();
+    }
+  }
+
   // We still haven't aborted. Now, let's try to get into the
   // termination protocol.
   if (do_termination && !has_aborted()) {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri May 06 11:25:16 2011 -0400
@@ -316,6 +316,19 @@
   void setEmpty()   { _index = 0; clear_overflow(); }
 };
 
+class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
+private:
+#ifndef PRODUCT
+  uintx _num_remaining;
+  bool _force;
+#endif // !defined(PRODUCT)
+
+public:
+  void init() PRODUCT_RETURN;
+  void update() PRODUCT_RETURN;
+  bool should_force() PRODUCT_RETURN_( return false; );
+};
+
 // this will enable a variety of different statistics per GC task
 #define _MARKING_STATS_       0
 // this will enable the higher verbose levels
@@ -462,6 +475,9 @@
 
   WorkGang* _parallel_workers;
 
+  ForceOverflowSettings _force_overflow_conc;
+  ForceOverflowSettings _force_overflow_stw;
+
   void weakRefsWork(bool clear_all_soft_refs);
 
   void swapMarkBitMaps();
@@ -470,7 +486,7 @@
   // task local ones; should be called during initial mark.
   void reset();
   // It resets all the marking data structures.
-  void clear_marking_state();
+  void clear_marking_state(bool clear_overflow = true);
 
   // It should be called to indicate which phase we're in (concurrent
   // mark or remark) and how many threads are currently active.
@@ -547,6 +563,22 @@
   void enter_first_sync_barrier(int task_num);
   void enter_second_sync_barrier(int task_num);
 
+  ForceOverflowSettings* force_overflow_conc() {
+    return &_force_overflow_conc;
+  }
+
+  ForceOverflowSettings* force_overflow_stw() {
+    return &_force_overflow_stw;
+  }
+
+  ForceOverflowSettings* force_overflow() {
+    if (concurrent()) {
+      return force_overflow_conc();
+    } else {
+      return force_overflow_stw();
+    }
+  }
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri May 06 11:25:16 2011 -0400
@@ -3975,6 +3975,9 @@
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
                                                oop old) {
+  assert(obj_in_cs(old),
+         err_msg("obj: "PTR_FORMAT" should still be in the CSet",
+                 (HeapWord*) old));
   markOop m = old->mark();
   oop forward_ptr = old->forward_to_atomic(old);
   if (forward_ptr == NULL) {
@@ -3997,7 +4000,13 @@
     }
     return old;
   } else {
-    // Someone else had a place to copy it.
+    // Forward-to-self failed. Either someone else managed to allocate
+    // space for this object (old != forward_ptr) or they beat us in
+    // self-forwarding it (old == forward_ptr).
+    assert(old == forward_ptr || !obj_in_cs(forward_ptr),
+           err_msg("obj: "PTR_FORMAT" forwarded to: "PTR_FORMAT" "
+                   "should not be in the CSet",
+                   (HeapWord*) old, (HeapWord*) forward_ptr));
     return forward_ptr;
   }
 }
@@ -4308,11 +4317,10 @@
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop(heap_oop);
-    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(obj)),
-           "shouldn't still be in the CSet if evacuation didn't fail.");
     HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr))
+    if (_g1->is_in_g1_reserved(addr)) {
       _cm->grayRoot(oop(addr));
+    }
   }
 }
 
@@ -4961,36 +4969,45 @@
 
 #ifndef PRODUCT
 class G1VerifyCardTableCleanup: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CardTableModRefBS* _ct_bs;
 public:
-  G1VerifyCardTableCleanup(CardTableModRefBS* ct_bs)
-    : _ct_bs(ct_bs) { }
+  G1VerifyCardTableCleanup(G1CollectedHeap* g1h, CardTableModRefBS* ct_bs)
+    : _g1h(g1h), _ct_bs(ct_bs) { }
   virtual bool doHeapRegion(HeapRegion* r) {
-    MemRegion mr(r->bottom(), r->end());
     if (r->is_survivor()) {
-      _ct_bs->verify_dirty_region(mr);
+      _g1h->verify_dirty_region(r);
     } else {
-      _ct_bs->verify_clean_region(mr);
+      _g1h->verify_not_dirty_region(r);
     }
     return false;
   }
 };
 
+void G1CollectedHeap::verify_not_dirty_region(HeapRegion* hr) {
+  // All of the region should be clean.
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set();
+  MemRegion mr(hr->bottom(), hr->end());
+  ct_bs->verify_not_dirty_region(mr);
+}
+
+void G1CollectedHeap::verify_dirty_region(HeapRegion* hr) {
+  // We cannot guarantee that [bottom(),end()] is dirty.  Threads
+  // dirty allocated blocks as they allocate them. The thread that
+  // retires each region and replaces it with a new one will do a
+  // maximal allocation to fill in [pre_dummy_top(),end()] but will
+  // not dirty that area (one less thing to have to do while holding
+  // a lock). So we can only verify that [bottom(),pre_dummy_top()]
+  // is dirty.
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set();
+  MemRegion mr(hr->bottom(), hr->pre_dummy_top());
+  ct_bs->verify_dirty_region(mr);
+}
+
 void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
-  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set();
   for (HeapRegion* hr = head; hr != NULL; hr = hr->get_next_young_region()) {
-    // We cannot guarantee that [bottom(),end()] is dirty.  Threads
-    // dirty allocated blocks as they allocate them. The thread that
-    // retires each region and replaces it with a new one will do a
-    // maximal allocation to fill in [pre_dummy_top(),end()] but will
-    // not dirty that area (one less thing to have to do while holding
-    // a lock). So we can only verify that [bottom(),pre_dummy_top()]
-    // is dirty. Also note that verify_dirty_region() requires
-    // mr.start() and mr.end() to be card aligned and pre_dummy_top()
-    // is not guaranteed to be.
-    MemRegion mr(hr->bottom(),
-                 ct_bs->align_to_card_boundary(hr->pre_dummy_top()));
-    ct_bs->verify_dirty_region(mr);
+    verify_dirty_region(hr);
   }
 }
 
@@ -5033,7 +5050,7 @@
   g1_policy()->record_clear_ct_time( elapsed * 1000.0);
 #ifndef PRODUCT
   if (G1VerifyCTCleanup || VerifyAfterGC) {
-    G1VerifyCardTableCleanup cleanup_verifier(ct_bs);
+    G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
     heap_region_iterate(&cleanup_verifier);
   }
 #endif
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri May 06 11:25:16 2011 -0400
@@ -970,6 +970,8 @@
   // The number of regions available for "regular" expansion.
   size_t expansion_regions() { return _expansion_regions; }
 
+  void verify_not_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
+  void verify_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
   void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
   void verify_dirty_young_regions() PRODUCT_RETURN;
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri May 06 11:25:16 2011 -0400
@@ -157,7 +157,6 @@
   void set_try_claimed() { _try_claimed = true; }
 
   void scanCard(size_t index, HeapRegion *r) {
-    _cards_done++;
     DirtyCardToOopClosure* cl =
       r->new_dcto_closure(_oc,
                          CardTableModRefBS::Precise,
@@ -168,17 +167,14 @@
     HeapWord* card_start = _bot_shared->address_for_index(index);
     HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
     Space *sp = SharedHeap::heap()->space_containing(card_start);
-    MemRegion sm_region;
-    if (ParallelGCThreads > 0) {
-      // first find the used area
-      sm_region = sp->used_region_at_save_marks();
-    } else {
-      // The closure is not idempotent.  We shouldn't look at objects
-      // allocated during the GC.
-      sm_region = sp->used_region_at_save_marks();
-    }
+    MemRegion sm_region = sp->used_region_at_save_marks();
     MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
-    if (!mr.is_empty()) {
+    if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
+      // We make the card as "claimed" lazily (so races are possible
+      // but they're benign), which reduces the number of duplicate
+      // scans (the rsets of the regions in the cset can intersect).
+      _ct_bs->set_card_claimed(index);
+      _cards_done++;
       cl->do_MemRegion(mr);
     }
   }
@@ -199,6 +195,9 @@
     HeapRegionRemSet* hrrs = r->rem_set();
     if (hrrs->iter_is_complete()) return false; // All done.
     if (!_try_claimed && !hrrs->claim_iter()) return false;
+    // If we ever free the collection set concurrently, we should also
+    // clear the card table concurrently therefore we won't need to
+    // add regions of the collection set to the dirty cards region.
     _g1h->push_dirty_cards_region(r);
     // If we didn't return above, then
     //   _try_claimed || r->claim_iter()
@@ -230,15 +229,10 @@
         _g1h->push_dirty_cards_region(card_region);
       }
 
-       // If the card is dirty, then we will scan it during updateRS.
-      if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
-        // We make the card as "claimed" lazily (so races are possible but they're benign),
-        // which reduces the number of duplicate scans (the rsets of the regions in the cset
-        // can intersect).
-        if (!_ct_bs->is_card_claimed(card_index)) {
-          _ct_bs->set_card_claimed(card_index);
-          scanCard(card_index, card_region);
-        }
+      // If the card is dirty, then we will scan it during updateRS.
+      if (!card_region->in_collection_set() &&
+          !_ct_bs->is_card_dirty(card_index)) {
+        scanCard(card_index, card_region);
       }
     }
     if (!_try_claimed) {
@@ -246,8 +240,6 @@
     }
     return false;
   }
-  // Set all cards back to clean.
-  void cleanup() {_g1h->cleanUpCardTable();}
   size_t cards_done() { return _cards_done;}
   size_t cards_looked_up() { return _cards;}
 };
@@ -566,8 +558,9 @@
     update_rs_cl.set_region(r);
     HeapWord* stop_point =
       r->oops_on_card_seq_iterate_careful(scanRegion,
-                                        &filter_then_update_rs_cset_oop_cl,
-                                        false /* filter_young */);
+                                          &filter_then_update_rs_cset_oop_cl,
+                                          false /* filter_young */,
+                                          NULL  /* card_ptr */);
 
     // Since this is performed in the event of an evacuation failure, we
     // we shouldn't see a non-null stop point
@@ -735,12 +728,6 @@
                                 (OopClosure*)&mux :
                                 (OopClosure*)&update_rs_oop_cl));
 
-  // Undirty the card.
-  *card_ptr = CardTableModRefBS::clean_card_val();
-  // We must complete this write before we do any of the reads below.
-  OrderAccess::storeload();
-  // And process it, being careful of unallocated portions of TLAB's.
-
   // The region for the current card may be a young region. The
   // current card may have been a card that was evicted from the
   // card cache. When the card was inserted into the cache, we had
@@ -749,7 +736,7 @@
   // and tagged as young.
   //
   // We wish to filter out cards for such a region but the current
-  // thread, if we're running conucrrently, may "see" the young type
+  // thread, if we're running concurrently, may "see" the young type
   // change at any time (so an earlier "is_young" check may pass or
   // fail arbitrarily). We tell the iteration code to perform this
   // filtering when it has been determined that there has been an actual
@@ -759,7 +746,8 @@
   HeapWord* stop_point =
     r->oops_on_card_seq_iterate_careful(dirtyRegion,
                                         &filter_then_update_rs_oop_cl,
-                                        filter_young);
+                                        filter_young,
+                                        card_ptr);
 
   // If stop_point is non-null, then we encountered an unallocated region
   // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri May 06 11:25:16 2011 -0400
@@ -311,7 +311,11 @@
                                                                             \
   develop(bool, G1ExitOnExpansionFailure, false,                            \
           "Raise a fatal VM exit out of memory failure in the event "       \
-          " that heap expansion fails due to running out of swap.")
+          " that heap expansion fails due to running out of swap.")         \
+                                                                            \
+  develop(uintx, G1ConcMarkForceOverflow, 0,                                \
+          "The number of times we'll force an overflow during "             \
+          "concurrent marking")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri May 06 11:25:16 2011 -0400
@@ -376,6 +376,17 @@
   if (clear_space) clear(SpaceDecorator::Mangle);
 }
 
+void HeapRegion::par_clear() {
+  assert(used() == 0, "the region should have been already cleared");
+  assert(capacity() == (size_t) HeapRegion::GrainBytes,
+         "should be back to normal");
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->clear();
+  CardTableModRefBS* ct_bs =
+                   (CardTableModRefBS*)G1CollectedHeap::heap()->barrier_set();
+  ct_bs->clear(MemRegion(bottom(), end()));
+}
+
 // <PREDICTION>
 void HeapRegion::calc_gc_efficiency() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -600,7 +611,15 @@
 HeapRegion::
 oops_on_card_seq_iterate_careful(MemRegion mr,
                                  FilterOutOfRegionClosure* cl,
-                                 bool filter_young) {
+                                 bool filter_young,
+                                 jbyte* card_ptr) {
+  // Currently, we should only have to clean the card if filter_young
+  // is true and vice versa.
+  if (filter_young) {
+    assert(card_ptr != NULL, "pre-condition");
+  } else {
+    assert(card_ptr == NULL, "pre-condition");
+  }
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
   // If we're within a stop-world GC, then we might look at a card in a
@@ -626,6 +645,15 @@
 
   assert(!is_young(), "check value of filter_young");
 
+  // We can only clean the card here, after we make the decision that
+  // the card is not young. And we only clean the card if we have been
+  // asked to (i.e., card_ptr != NULL).
+  if (card_ptr != NULL) {
+    *card_ptr = CardTableModRefBS::clean_card_val();
+    // We must complete this write before we do any of the reads below.
+    OrderAccess::storeload();
+  }
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
   HeapWord* cur = block_start(mr.start());
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri May 06 11:25:16 2011 -0400
@@ -584,6 +584,7 @@
 
   // Reset HR stuff to default values.
   void hr_clear(bool par, bool clear_space);
+  void par_clear();
 
   void initialize(MemRegion mr, bool clear_space, bool mangle_space);
 
@@ -802,12 +803,16 @@
   HeapWord*
   object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
 
-  // In this version - if filter_young is true and the region
-  // is a young region then we skip the iteration.
+  // filter_young: if true and the region is a young region then we
+  // skip the iteration.
+  // card_ptr: if not NULL, and we decide that the card is not young
+  // and we iterate over it, we'll clean the card before we start the
+  // iteration.
   HeapWord*
   oops_on_card_seq_iterate_careful(MemRegion mr,
                                    FilterOutOfRegionClosure* cl,
-                                   bool filter_young);
+                                   bool filter_young,
+                                   jbyte* card_ptr);
 
   // A version of block start that is guaranteed to find *some* block
   // boundary at or before "p", but does not object iteration, and may
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Fri May 06 11:25:16 2011 -0400
@@ -224,6 +224,12 @@
   const size_t alignment = virtual_space()->alignment();
   size_t aligned_bytes  = align_size_up(bytes, alignment);
   size_t aligned_expand_bytes = align_size_up(MinHeapDeltaBytes, alignment);
+
+  if (UseNUMA) {
+    // With NUMA we use round-robin page allocation for the old gen. Expand by at least
+    // providing a page per lgroup. Alignment is larger or equal to the page size.
+    aligned_expand_bytes = MAX2(aligned_expand_bytes, alignment * os::numa_get_groups_num());
+  }
   if (aligned_bytes == 0){
     // The alignment caused the number of bytes to wrap.  An expand_by(0) will
     // return true with the implication that and expansion was done when it
--- a/hotspot/src/share/vm/interpreter/linkResolver.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/interpreter/linkResolver.cpp	Fri May 06 11:25:16 2011 -0400
@@ -327,6 +327,7 @@
 
   // 1. check if klass is not interface
   if (resolved_klass->is_interface()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Found interface %s, but class was expected", Klass::cast(resolved_klass())->external_name());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -413,6 +414,7 @@
 
  // check if klass is interface
   if (!resolved_klass->is_interface()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Found class %s, but interface was expected", Klass::cast(resolved_klass())->external_name());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@@ -534,6 +536,7 @@
 
   // check for errors
   if (is_static != fd.is_static()) {
+    ResourceMark rm(THREAD);
     char msg[200];
     jio_snprintf(msg, sizeof(msg), "Expected %s field %s.%s", is_static ? "static" : "non-static", Klass::cast(resolved_klass())->external_name(), fd.name()->as_C_string());
     THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), msg);
@@ -631,6 +634,7 @@
 
   // check if static
   if (!resolved_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Expected static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                                       resolved_method->name(),
@@ -671,6 +675,7 @@
 
   // check if not static
   if (resolved_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf),
                  "Expecting non-static method %s",
@@ -717,6 +722,7 @@
 
   // check if not static
   if (sel_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                                                                                              resolved_method->name(),
@@ -757,6 +763,7 @@
 
   // check if not static
   if (resolved_method->is_static()) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
                                                                                                              resolved_method->name(),
@@ -873,6 +880,7 @@
 
   // check if receiver klass implements the resolved interface
   if (!recv_klass->is_subtype_of(resolved_klass())) {
+    ResourceMark rm(THREAD);
     char buf[200];
     jio_snprintf(buf, sizeof(buf), "Class %s does not implement the requested interface %s",
                  (Klass::cast(recv_klass()))->external_name(),
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp	Fri May 06 11:25:16 2011 -0400
@@ -652,43 +652,37 @@
 }
 
 #ifndef PRODUCT
-class GuaranteeNotModClosure: public MemRegionClosure {
-  CardTableModRefBS* _ct;
-public:
-  GuaranteeNotModClosure(CardTableModRefBS* ct) : _ct(ct) {}
-  void do_MemRegion(MemRegion mr) {
-    jbyte* entry = _ct->byte_for(mr.start());
-    guarantee(*entry != CardTableModRefBS::clean_card,
-              "Dirty card in region that should be clean");
+void CardTableModRefBS::verify_region(MemRegion mr,
+                                      jbyte val, bool val_equals) {
+  jbyte* start    = byte_for(mr.start());
+  jbyte* end      = byte_for(mr.last());
+  bool   failures = false;
+  for (jbyte* curr = start; curr <= end; ++curr) {
+    jbyte curr_val = *curr;
+    bool failed = (val_equals) ? (curr_val != val) : (curr_val == val);
+    if (failed) {
+      if (!failures) {
+        tty->cr();
+        tty->print_cr("== CT verification failed: ["PTR_FORMAT","PTR_FORMAT"]");
+        tty->print_cr("==   %sexpecting value: %d",
+                      (val_equals) ? "" : "not ", val);
+        failures = true;
+      }
+      tty->print_cr("==   card "PTR_FORMAT" ["PTR_FORMAT","PTR_FORMAT"], "
+                    "val: %d", curr, addr_for(curr),
+                    (HeapWord*) (((size_t) addr_for(curr)) + card_size),
+                    (int) curr_val);
+    }
   }
-};
-
-void CardTableModRefBS::verify_clean_region(MemRegion mr) {
-  GuaranteeNotModClosure blk(this);
-  non_clean_card_iterate_serial(mr, &blk);
+  guarantee(!failures, "there should not have been any failures");
 }
 
-// To verify a MemRegion is entirely dirty this closure is passed to
-// dirty_card_iterate. If the region is dirty do_MemRegion will be
-// invoked only once with a MemRegion equal to the one being
-// verified.
-class GuaranteeDirtyClosure: public MemRegionClosure {
-  CardTableModRefBS* _ct;
-  MemRegion _mr;
-  bool _result;
-public:
-  GuaranteeDirtyClosure(CardTableModRefBS* ct, MemRegion mr)
-    : _ct(ct), _mr(mr), _result(false) {}
-  void do_MemRegion(MemRegion mr) {
-    _result = _mr.equals(mr);
-  }
-  bool result() const { return _result; }
-};
+void CardTableModRefBS::verify_not_dirty_region(MemRegion mr) {
+  verify_region(mr, dirty_card, false /* val_equals */);
+}
 
 void CardTableModRefBS::verify_dirty_region(MemRegion mr) {
-  GuaranteeDirtyClosure blk(this, mr);
-  dirty_card_iterate(mr, &blk);
-  guarantee(blk.result(), "Non-dirty cards in region that should be dirty");
+  verify_region(mr, dirty_card, true /* val_equals */);
 }
 #endif
 
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp	Fri May 06 11:25:16 2011 -0400
@@ -475,7 +475,10 @@
   void verify();
   void verify_guard();
 
-  void verify_clean_region(MemRegion mr) PRODUCT_RETURN;
+  // val_equals -> it will check that all cards covered by mr equal val
+  // !val_equals -> it will check that all cards covered by mr do not equal val
+  void verify_region(MemRegion mr, jbyte val, bool val_equals) PRODUCT_RETURN;
+  void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
   void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
 
   static size_t par_chunk_heapword_alignment() {
--- a/hotspot/src/share/vm/memory/collectorPolicy.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp	Fri May 06 11:25:16 2011 -0400
@@ -265,8 +265,6 @@
   MaxHeapSize = align_size_up(MaxHeapSize, max_alignment());
 
   always_do_update_barrier = UseConcMarkSweepGC;
-  BlockOffsetArrayUseUnallocatedBlock =
-      BlockOffsetArrayUseUnallocatedBlock || ParallelGCThreads > 0;
 
   // Check validity of heap flags
   assert(OldSize     % min_alignment() == 0, "old space alignment");
--- a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp	Fri May 06 11:25:16 2011 -0400
@@ -100,12 +100,6 @@
   // Pass along the argument to the superclass.
   ModRefBarrierSet(int max_covered_regions) :
     BarrierSet(max_covered_regions) {}
-
-#ifndef PRODUCT
-  // Verifies that the given region contains no modified references.
-  virtual void verify_clean_region(MemRegion mr) = 0;
-#endif
-
 };
 
 #endif // SHARE_VM_MEMORY_MODREFBARRIERSET_HPP
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Fri May 06 11:25:16 2011 -0400
@@ -1423,6 +1423,11 @@
       }
     }
   }
+  if (UseNUMA) {
+    if (FLAG_IS_DEFAULT(MinHeapDeltaBytes)) {
+      FLAG_SET_DEFAULT(MinHeapDeltaBytes, 64*M);
+    }
+  }
 }
 
 void Arguments::set_g1_gc_flags() {
@@ -2376,7 +2381,6 @@
       _gc_log_filename = strdup(tail);
       FLAG_SET_CMDLINE(bool, PrintGC, true);
       FLAG_SET_CMDLINE(bool, PrintGCTimeStamps, true);
-      FLAG_SET_CMDLINE(bool, TraceClassUnloading, true);
 
     // JNI hooks
     } else if (match_option(option, "-Xcheck", &tail)) {
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Fri May 06 11:25:16 2011 -0400
@@ -1827,7 +1827,7 @@
   develop(bool, VerifyBlockOffsetArray, false,                              \
           "Do (expensive!) block offset array verification")                \
                                                                             \
-  product(bool, BlockOffsetArrayUseUnallocatedBlock, false,                 \
+  diagnostic(bool, BlockOffsetArrayUseUnallocatedBlock, false,              \
           "Maintain _unallocated_block in BlockOffsetArray"                 \
           " (currently applicable only to CMS collector)")                  \
                                                                             \
--- a/hotspot/src/share/vm/runtime/os.hpp	Thu May 05 21:20:21 2011 -0400
+++ b/hotspot/src/share/vm/runtime/os.hpp	Fri May 06 11:25:16 2011 -0400
@@ -274,7 +274,7 @@
   static char*  reserve_memory_special(size_t size, char* addr = NULL,
                 bool executable = false);
   static bool   release_memory_special(char* addr, size_t bytes);
-  static bool   large_page_init();
+  static void   large_page_init();
   static size_t large_page_size();
   static bool   can_commit_large_page_memory();
   static bool   can_execute_large_page_memory();