7121547: G1: High number mispredicted branches while iterating over the marking bitmap
Summary: There is a high number of mispredicted branches associated with calling BitMap::iteratate() from within CMBitMapRO::iterate(). Implement a version of CMBitMapRO::iterate() directly using inline-able routines.
Reviewed-by: tonyp, iveresov
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Jan 13 01:55:22 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Jan 13 13:27:48 2012 -0800
@@ -104,17 +104,6 @@
return (int) (diff >> _shifter);
}
-bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
- HeapWord* left = MAX2(_bmStartWord, mr.start());
- HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
- if (right > left) {
- // Right-open interval [leftOffset, rightOffset).
- return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
- } else {
- return true;
- }
-}
-
void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
size_t from_start_index,
HeapWord* to_start_word,
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Jan 13 01:55:22 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Jan 13 13:27:48 2012 -0800
@@ -84,8 +84,8 @@
}
// iteration
- bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
- bool iterate(BitMapClosure* cl, MemRegion mr);
+ inline bool iterate(BitMapClosure* cl, MemRegion mr);
+ inline bool iterate(BitMapClosure* cl);
// Return the address corresponding to the next marked bit at or after
// "addr", and before "limit", if "limit" is non-NULL. If there is no
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp Fri Jan 13 01:55:22 2012 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp Fri Jan 13 13:27:48 2012 -0800
@@ -28,6 +28,35 @@
#include "gc_implementation/g1/concurrentMark.hpp"
#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+ HeapWord* start_addr = MAX2(startWord(), mr.start());
+ HeapWord* end_addr = MIN2(endWord(), mr.end());
+
+ if (end_addr > start_addr) {
+ // Right-open interval [start-offset, end-offset).
+ BitMap::idx_t start_offset = heapWordToOffset(start_addr);
+ BitMap::idx_t end_offset = heapWordToOffset(end_addr);
+
+ start_offset = _bm.get_next_one_offset(start_offset, end_offset);
+ while (start_offset < end_offset) {
+ HeapWord* obj_addr = offsetToHeapWord(start_offset);
+ oop obj = (oop) obj_addr;
+ if (!cl->do_bit(start_offset)) {
+ return false;
+ }
+ HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr);
+ BitMap::idx_t next_offset = heapWordToOffset(next_addr);
+ start_offset = _bm.get_next_one_offset(next_offset, end_offset);
+ }
+ }
+ return true;
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
+ MemRegion mr(startWord(), sizeInWords());
+ return iterate(cl, mr);
+}
+
inline void CMTask::push(oop obj) {
HeapWord* objAddr = (HeapWord*) obj;
assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
--- a/hotspot/src/share/vm/utilities/bitMap.inline.hpp Fri Jan 13 01:55:22 2012 -0800
+++ b/hotspot/src/share/vm/utilities/bitMap.inline.hpp Fri Jan 13 13:27:48 2012 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -178,8 +178,30 @@
for (; !(res & 1); res_offset++) {
res = res >> 1;
}
- assert(res_offset >= l_offset &&
- res_offset < r_offset, "just checking");
+
+#ifdef ASSERT
+ // In the following assert, if r_offset is not bitamp word aligned,
+ // checking that res_offset is strictly less than r_offset is too
+ // strong and will trip the assert.
+ //
+ // Consider the case where l_offset is bit 15 and r_offset is bit 17
+ // of the same map word, and where bits [15:16:17:18] == [00:00:00:01].
+ // All the bits in the range [l_offset:r_offset) are 0.
+ // The loop that calculates res_offset, above, would yield the offset
+ // of bit 18 because it's in the same map word as l_offset and there
+ // is a set bit in that map word above l_offset (i.e. res != NoBits).
+ //
+ // In this case, however, we can assert is that res_offset is strictly
+ // less than size() since we know that there is at least one set bit
+ // at an offset above, but in the same map word as, r_offset.
+ // Otherwise, if r_offset is word aligned then it will not be in the
+ // same map word as l_offset (unless it equals l_offset). So either
+ // there won't be a set bit between l_offset and the end of it's map
+ // word (i.e. res == NoBits), or res_offset will be less than r_offset.
+
+ idx_t limit = is_word_aligned(r_offset) ? r_offset : size();
+ assert(res_offset >= l_offset && res_offset < limit, "just checking");
+#endif // ASSERT
return MIN2(res_offset, r_offset);
}
// skip over all word length 0-bit runs