jdk-sandbox: comparison src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

equal deleted inserted replaced

-:d4ddf19c2624
+:bd9dba789919
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
+#include "utilities/quickSort.hpp"
 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
 // Dirty card queues are always active, so we create them with their
 // active field set to true.
 PtrQueue(qset, true /* active */)
 _completed_buffers_tail = NULL;
 _num_cards = 0;
 return result;
 }
+class G1RefineBufferedCards : public StackObj {
+BufferNode* const _node;
+CardTable::CardValue** const _node_buffer;
+const size_t _node_buffer_size;
+const uint _worker_id;
+size_t* _total_refined_cards;
+G1RemSet* const _g1rs;
+static inline int compare_card(const CardTable::CardValue* p1,
+const CardTable::CardValue* p2) {
+return p2 - p1;
+}
+// Sorts the cards from start_index to _node_buffer_size in *decreasing*
+// address order. Tests showed that this order is preferable to not sorting
+// or increasing address order.
+void sort_cards(size_t start_index) {
+QuickSort::sort(&_node_buffer[start_index],
+_node_buffer_size - start_index,
+compare_card,
+false);
+}
+// Returns the index to the first clean card in the buffer.
+size_t clean_cards() {
+const size_t start = _node->index();
+assert(start <= _node_buffer_size, "invariant");
+// Two-fingered compaction algorithm similar to the filtering mechanism in
+// SATBMarkQueue. The main difference is that clean_card_before_refine()
+// could change the buffer element in-place.
+// We don't check for SuspendibleThreadSet::should_yield(), because
+// cleaning and redirtying the cards is fast.
+CardTable::CardValue** src = &_node_buffer[start];
+CardTable::CardValue** dst = &_node_buffer[_node_buffer_size];
+assert(src <= dst, "invariant");
+for ( ; src < dst; ++src) {
+// Search low to high for a card to keep.
+if (_g1rs->clean_card_before_refine(src)) {
+// Found keeper.  Search high to low for a card to discard.
+while (src < --dst) {
+if (!_g1rs->clean_card_before_refine(dst)) {
+*dst = *src;         // Replace discard with keeper.
+break;
+}
+}
+// If discard search failed (src == dst), the outer loop will also end.
+}
+}
+// dst points to the first retained clean card, or the end of the buffer
+// if all the cards were discarded.
+const size_t first_clean = dst - _node_buffer;
+assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
+// Discarded cards are considered as refined.
+*_total_refined_cards += first_clean - start;
+return first_clean;
+}
+bool refine_cleaned_cards(size_t start_index) {
+bool result = true;
+size_t i = start_index;
+for ( ; i < _node_buffer_size; ++i) {
+if (SuspendibleThreadSet::should_yield()) {
+redirty_unrefined_cards(i);
+result = false;
+break;
+}
+_g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
+}
+_node->set_index(i);
+*_total_refined_cards += i - start_index;
+return result;
+}
+void redirty_unrefined_cards(size_t start) {
+for ( ; start < _node_buffer_size; ++start) {
+*_node_buffer[start] = G1CardTable::dirty_card_val();
+}
+}
+public:
+G1RefineBufferedCards(BufferNode* node,
+size_t node_buffer_size,
+uint worker_id,
+size_t* total_refined_cards) :
+_node(node),
+_node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
+_node_buffer_size(node_buffer_size),
+_worker_id(worker_id),
+_total_refined_cards(total_refined_cards),
+_g1rs(G1CollectedHeap::heap()->rem_set()) {}
+bool refine() {
+size_t first_clean_index = clean_cards();
+if (first_clean_index == _node_buffer_size) {
+_node->set_index(first_clean_index);
+return true;
+}
+// This fence serves two purposes. First, the cards must be cleaned
+// before processing the contents. Second, we can't proceed with
+// processing a region until after the read of the region's top in
+// collect_and_clean_cards(), for synchronization with possibly concurrent
+// humongous object allocation (see comment at the StoreStore fence before
+// setting the regions' tops in humongous allocation path).
+// It's okay that reading region's top and reading region's type were racy
+// wrto each other. We need both set, in any order, to proceed.
+OrderAccess::fence();
+sort_cards(first_clean_index);
+return refine_cleaned_cards(first_clean_index);
+}
+};
 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
 uint worker_id,
 size_t* total_refined_cards) {
-G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
+G1RefineBufferedCards buffered_cards(node,
-size_t size = buffer_size();
+buffer_size(),
-void** buffer = BufferNode::make_buffer_from_node(node);
+worker_id,
-size_t i = node->index();
+total_refined_cards);
-assert(i <= size, "invariant");
+return buffered_cards.refine();
-for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
-CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
-rem_set->refine_card_concurrently(cp, worker_id);
-}
-*total_refined_cards += (i - node->index());
-node->set_index(i);
-return i == size;
 }
 #ifndef ASSERT
 #define assert_fully_consumed(node, buffer_size)
 #else

changeset 59233	bd9dba789919
parent 58508	d6058bd73982
child 59290	97d13893ec3c