--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp Mon Aug 31 23:29:02 2015 +0200
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "gc/shared/memset_with_concurrent_readers.hpp"
+#include "runtime/prefetch.inline.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+#if INCLUDE_ALL_GCS
+
+// An implementation of memset, for use when there may be concurrent
+// readers of the region being stored into.
+//
+// We can't use the standard library memset if it is implemented using
+// block initializing stores. Doing so can result in concurrent readers
+// seeing spurious zeros.
+//
+// We can't use the obvious C/C++ for-loop, because the compiler may
+// recognize the idiomatic loop and optimize it into a call to the
+// standard library memset; we've seen exactly this happen with, for
+// example, Solaris Studio 12.3. Hence the use of inline assembly
+// code, hiding loops from the compiler's optimizer.
+//
+// We don't attempt to use the standard library memset when it is safe
+// to do so. We could conservatively do so by detecting the presence
+// of block initializing stores (VM_Version::has_blk_init()), but the
+// implementation provided here should be sufficient.
+
+inline void fill_subword(void* start, void* end, int value) {
+ STATIC_ASSERT(BytesPerWord == 8);
+ assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
+ // Dispatch on (end - start).
+ void* pc;
+ __asm__ volatile(
+ // offset := (7 - (end - start)) + 3
+ // 3 instructions from rdpc to DISPATCH
+ " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
+ " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
+ " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
+ " rd %pc, %[pc]\n\t" // dispatch on scaled offset
+ " jmpl %[pc]+%[offset], %g0\n\t"
+ " nop\n\t"
+ // DISPATCH: no direct reference, but without it the store block may be elided.
+ "1:\n\t"
+ " stb %[value], [%[end]-7]\n\t" // end[-7] = value
+ " stb %[value], [%[end]-6]\n\t"
+ " stb %[value], [%[end]-5]\n\t"
+ " stb %[value], [%[end]-4]\n\t"
+ " stb %[value], [%[end]-3]\n\t"
+ " stb %[value], [%[end]-2]\n\t"
+ " stb %[value], [%[end]-1]\n\t" // end[-1] = value
+ : /* no outputs */
+ [pc] "&=r" (pc) // temp
+ : [offset] "&+r" (start),
+ [end] "r" (end),
+ [value] "r" (value)
+ : "memory");
+}
+
+void memset_with_concurrent_readers(void* to, int value, size_t size) {
+ Prefetch::write(to, 0);
+ void* end = static_cast<char*>(to) + size;
+ if (size >= BytesPerWord) {
+ // Fill any partial word prefix.
+ uintx* aligned_to = static_cast<uintx*>(align_ptr_up(to, BytesPerWord));
+ fill_subword(to, aligned_to, value);
+
+ // Compute fill word.
+ STATIC_ASSERT(BitsPerByte == 8);
+ STATIC_ASSERT(BitsPerWord == 64);
+ uintx xvalue = value & 0xff;
+ xvalue |= (xvalue << 8);
+ xvalue |= (xvalue << 16);
+ xvalue |= (xvalue << 32);
+
+ uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
+ assert(aligned_to <= aligned_end, "invariant");
+
+ // for ( ; aligned_to < aligned_end; ++aligned_to) {
+ // *aligned_to = xvalue;
+ // }
+ uintptr_t temp;
+ __asm__ volatile(
+ // Unroll loop x8.
+ " sub %[aend], %[ato], %[temp]\n\t"
+ " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words
+ " ba %xcc, 2f\n\t" // goto TEST always
+ " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
+ // LOOP:
+ "1:\n\t" // unrolled x8 store loop top
+ " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to
+ " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
+ " stx %[xvalue], [%[ato]-56]\n\t"
+ " stx %[xvalue], [%[ato]-48]\n\t"
+ " stx %[xvalue], [%[ato]-40]\n\t"
+ " stx %[xvalue], [%[ato]-32]\n\t"
+ " stx %[xvalue], [%[ato]-24]\n\t"
+ " stx %[xvalue], [%[ato]-16]\n\t"
+ " stx %[xvalue], [%[ato]-8]\n\t"
+ // TEST:
+ "2:\n\t"
+ " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining
+ " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration
+ // Fill remaining < 8 full words.
+ // Dispatch on (aligned_end - aligned_to).
+ // offset := (7 - (aligned_end - aligned_to)) + 3
+ // 3 instructions from rdpc to DISPATCH
+ " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
+ " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4
+ " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size
+ " rd %pc, %[temp]\n\t" // dispatch on scaled offset
+ " jmpl %[temp]+%[ato], %g0\n\t"
+ " nop\n\t"
+ // DISPATCH: no direct reference, but without it the store block may be elided.
+ "3:\n\t"
+ " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
+ " stx %[xvalue], [%[aend]-48]\n\t"
+ " stx %[xvalue], [%[aend]-40]\n\t"
+ " stx %[xvalue], [%[aend]-32]\n\t"
+ " stx %[xvalue], [%[aend]-24]\n\t"
+ " stx %[xvalue], [%[aend]-16]\n\t"
+ " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue
+ : /* no outputs */
+ [temp] "&=r" (temp)
+ : [ato] "&+r" (aligned_to),
+ [aend] "r" (aligned_end),
+ [xvalue] "r" (xvalue)
+ : "cc", "memory");
+ to = aligned_end; // setup for suffix
+ }
+ // Fill any partial word suffix. Also the prefix if size < BytesPerWord.
+ fill_subword(to, end, value);
+}
+
+#endif // INCLUDE_ALL_GCS
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Aug 31 23:29:02 2015 +0200
@@ -85,27 +85,6 @@
_supports_cx8 = has_v9();
_supports_atomic_getset4 = true; // swap instruction
- // There are Fujitsu Sparc64 CPUs which support blk_init as well so
- // we have to take this check out of the 'is_niagara()' block below.
- if (has_blk_init()) {
- // When using CMS or G1, we cannot use memset() in BOT updates
- // because the sun4v/CMT version in libc_psr uses BIS which
- // exposes "phantom zeros" to concurrent readers. See 6948537.
- if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
- FLAG_SET_DEFAULT(UseMemSetInBOT, false);
- }
- // Issue a stern warning if the user has explicitly set
- // UseMemSetInBOT (it is known to cause issues), but allow
- // use for experimentation and debugging.
- if (UseConcMarkSweepGC || UseG1GC) {
- if (UseMemSetInBOT) {
- assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
- warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
- " on sun4v; please understand that you are using at your own risk!");
- }
- }
- }
-
if (is_niagara()) {
// Indirect branch is the same cost as direct
if (FLAG_IS_DEFAULT(UseInlineCaches)) {
--- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp Mon Aug 31 23:29:02 2015 +0200
@@ -27,6 +27,7 @@
#include "gc/g1/g1BlockOffsetTable.hpp"
#include "gc/g1/heapRegion.hpp"
+#include "gc/shared/memset_with_concurrent_readers.hpp"
#include "gc/shared/space.hpp"
inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
@@ -68,15 +69,7 @@
check_index(right, "right index out of range");
assert(left <= right, "indexes out of order");
size_t num_cards = right - left + 1;
- if (UseMemSetInBOT) {
- memset(&_offset_array[left], offset, num_cards);
- } else {
- size_t i = left;
- const size_t end = i + num_cards;
- for (; i < end; i++) {
- _offset_array[i] = offset;
- }
- }
+ memset_with_concurrent_readers(&_offset_array[left], offset, num_cards);
}
// Variant of index_for that does not check the index for validity.
--- a/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp Mon Aug 31 23:29:02 2015 +0200
@@ -27,6 +27,7 @@
#include "gc/g1/g1SATBCardTableModRefBS.hpp"
#include "gc/g1/heapRegion.hpp"
#include "gc/g1/satbQueue.hpp"
+#include "gc/shared/memset_with_concurrent_readers.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/atomic.inline.hpp"
#include "runtime/mutexLocker.hpp"
@@ -108,15 +109,7 @@
jbyte *const first = byte_for(mr.start());
jbyte *const last = byte_after(mr.last());
- // Below we may use an explicit loop instead of memset() because on
- // certain platforms memset() can give concurrent readers phantom zeros.
- if (UseMemSetInBOT) {
- memset(first, g1_young_gen, last - first);
- } else {
- for (jbyte* i = first; i < last; i++) {
- *i = g1_young_gen;
- }
- }
+ memset_with_concurrent_readers(first, g1_young_gen, last - first);
}
#ifndef PRODUCT
--- a/hotspot/src/share/vm/gc/shared/blockOffsetTable.hpp Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/gc/shared/blockOffsetTable.hpp Mon Aug 31 23:29:02 2015 +0200
@@ -25,9 +25,12 @@
#ifndef SHARE_VM_GC_SHARED_BLOCKOFFSETTABLE_HPP
#define SHARE_VM_GC_SHARED_BLOCKOFFSETTABLE_HPP
+#include "gc/shared/memset_with_concurrent_readers.hpp"
#include "memory/memRegion.hpp"
#include "memory/virtualspace.hpp"
+#include "runtime/globals.hpp"
#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
// The CollectedHeap type requires subtypes to implement a method
// "block_start". For some subtypes, notably generational
@@ -126,6 +129,19 @@
VirtualSpace _vs;
u_char* _offset_array; // byte array keeping backwards offsets
+ void fill_range(size_t start, size_t num_cards, u_char offset) {
+ void* start_ptr = &_offset_array[start];
+#if INCLUDE_ALL_GCS
+ // If collector is concurrent, special handling may be needed.
+ assert(!UseG1GC, "Shouldn't be here when using G1");
+ if (UseConcMarkSweepGC) {
+ memset_with_concurrent_readers(start_ptr, offset, num_cards);
+ return;
+ }
+#endif // INCLUDE_ALL_GCS
+ memset(start_ptr, offset, num_cards);
+ }
+
protected:
// Bounds checking accessors:
// For performance these have to devolve to array accesses in product builds.
@@ -160,20 +176,7 @@
assert(left < right, "Heap addresses out of order");
size_t num_cards = pointer_delta(right, left) >> LogN_words;
- // Below, we may use an explicit loop instead of memset()
- // because on certain platforms memset() can give concurrent
- // readers "out-of-thin-air," phantom zeros; see 6948537.
- if (UseMemSetInBOT) {
- memset(&_offset_array[index_for(left)], offset, num_cards);
- } else {
- size_t i = index_for(left);
- const size_t end = i + num_cards;
- for (; i < end; i++) {
- // Elided until CR 6977974 is fixed properly.
- // assert(!reducing || _offset_array[i] >= offset, "Not reducing");
- _offset_array[i] = offset;
- }
- }
+ fill_range(index_for(left), num_cards, offset);
}
void set_offset_array(size_t left, size_t right, u_char offset, bool reducing = false) {
@@ -182,20 +185,7 @@
assert(left <= right, "indexes out of order");
size_t num_cards = right - left + 1;
- // Below, we may use an explicit loop instead of memset
- // because on certain platforms memset() can give concurrent
- // readers "out-of-thin-air," phantom zeros; see 6948537.
- if (UseMemSetInBOT) {
- memset(&_offset_array[left], offset, num_cards);
- } else {
- size_t i = left;
- const size_t end = i + num_cards;
- for (; i < end; i++) {
- // Elided until CR 6977974 is fixed properly.
- // assert(!reducing || _offset_array[i] >= offset, "Not reducing");
- _offset_array[i] = offset;
- }
- }
+ fill_range(left, num_cards, offset);
}
void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/shared/memset_with_concurrent_readers.cpp Mon Aug 31 23:29:02 2015 +0200
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include <string.h>
+#include "gc/shared/memset_with_concurrent_readers.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+#include "utilities/ostream.hpp"
+
+#if INCLUDE_ALL_GCS
+
+// Unit test
+#ifdef ASSERT
+
+static unsigned line_byte(const char* line, size_t i) {
+ return unsigned(line[i]) & 0xFF;
+}
+
+// Verify memset_with_concurrent_readers mimics memset.
+// We don't attempt to verify the concurrent reader case.
+void test_memset_with_concurrent_readers() {
+ const size_t chunk_size = 8 * BytesPerWord;
+ const unsigned chunk_count = 4;
+ const size_t block_size = (chunk_count + 4) * chunk_size;
+ char block[block_size];
+ char clear_block[block_size];
+ char set_block[block_size];
+
+ // block format:
+ // 0: unused leading chunk
+ // 1: chunk written from start index to end of chunk
+ // ... nchunks fully written chunks
+ // N: chunk written from start of chunk to end index
+ // N+1: unused trailing chunk
+
+ const int clear_value = 0;
+ const int set_value = 0xAC;
+
+ memset(clear_block, clear_value, block_size);
+ memset(set_block, set_value, block_size);
+
+ for (unsigned nchunks = 0; nchunks <= chunk_count; ++nchunks) {
+ for (size_t start = 1; start <= chunk_size; ++start) {
+ for (size_t end = 0; end <= chunk_size; ++end) {
+ size_t set_start = chunk_size + start;
+ size_t set_end = (2 + nchunks) * chunk_size + end;
+ size_t set_size = set_end - set_start;
+
+ memset(block, clear_value, block_size);
+ memset_with_concurrent_readers(&block[set_start], set_value, set_size);
+ bool head_clear = !memcmp(clear_block, block, set_start);
+ bool middle_set = !memcmp(set_block, block + set_start, set_size);
+ bool tail_clear = !memcmp(clear_block, block + set_end, block_size - set_end);
+ if (!(head_clear && middle_set && tail_clear)) {
+ tty->print_cr("*** memset_with_concurrent_readers failed: "
+ "set start " SIZE_FORMAT ", set end " SIZE_FORMAT,
+ set_start, set_end);
+ for (unsigned chunk = 0; chunk < (block_size / chunk_size); ++chunk) {
+ for (unsigned line = 0; line < (chunk_size / BytesPerWord); ++line) {
+ const char* lp = &block[chunk * chunk_size + line * BytesPerWord];
+ tty->print_cr("%d,%d: %2x %2x %2x %2x %2x %2x %2x %2x",
+ chunk, line,
+ line_byte(lp, 0), line_byte(lp, 1),
+ line_byte(lp, 2), line_byte(lp, 3),
+ line_byte(lp, 4), line_byte(lp, 5),
+ line_byte(lp, 6), line_byte(lp, 7));
+ }
+ }
+ assert(head_clear, "leading byte not clear");
+ assert(middle_set, "memset byte not set");
+ assert(tail_clear, "trailing bye not clear");
+ }
+ }
+ }
+ }
+}
+
+#endif // end unit test
+
+#endif // INCLUDE_ALL_GCS
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc/shared/memset_with_concurrent_readers.hpp Mon Aug 31 23:29:02 2015 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SRC_SHARE_VM_GC_SHARED_MEMSETWITHCONCURRENTREADERS_HPP
+#define SRC_SHARE_VM_GC_SHARED_MEMSETWITHCONCURRENTREADERS_HPP
+
+#include <stddef.h>
+#include <string.h>
+#include "utilities/macros.hpp"
+
+// Only used by concurrent collectors.
+#if INCLUDE_ALL_GCS
+
+// Fill a block of memory with value, like memset, but with the
+// understanding that there may be concurrent readers of that memory.
+void memset_with_concurrent_readers(void* to, int value, size_t size);
+
+#ifdef TARGET_ARCH_sparc
+
+// SPARC requires special handling. See SPARC-specific definition.
+
+#else
+// All others just use memset.
+
+inline void memset_with_concurrent_readers(void* to, int value, size_t size) {
+ ::memset(to, value, size);
+}
+
+#endif // End of target dispatch.
+
+#endif // INCLUDE_ALL_GCS
+
+#endif // include guard
--- a/hotspot/src/share/vm/prims/jni.cpp Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/prims/jni.cpp Mon Aug 31 23:29:02 2015 +0200
@@ -3869,6 +3869,7 @@
void TestBufferingOopClosure_test();
void TestCodeCacheRemSet_test();
void FreeRegionList_test();
+void test_memset_with_concurrent_readers();
#endif
void execute_internal_vm_tests() {
@@ -3910,6 +3911,7 @@
if (UseG1GC) {
run_unit_test(FreeRegionList_test());
}
+ run_unit_test(test_memset_with_concurrent_readers());
#endif
tty->print_cr("All internal VM tests passed");
}
--- a/hotspot/src/share/vm/runtime/globals.hpp Mon Aug 31 21:46:33 2015 +0300
+++ b/hotspot/src/share/vm/runtime/globals.hpp Mon Aug 31 23:29:02 2015 +0200
@@ -638,10 +638,6 @@
experimental(bool, AlwaysSafeConstructors, false, \
"Force safe construction, as if all fields are final.") \
\
- /* Temporary: See 6948537 */ \
- experimental(bool, UseMemSetInBOT, true, \
- "(Unstable) uses memset in BOT updates in GC code") \
- \
diagnostic(bool, UnlockDiagnosticVMOptions, trueInDebug, \
"Enable normal processing of flags relating to field diagnostics")\
\