# HG changeset patch # User herrick # Date 1561677282 14400 # Node ID dd706e28e6cc9a381f279a8ca001d040a24c798c # Parent 4a31db8d42bdd31e4e43f48d724db791a4d5534e# Parent 73f1c84ca264eb6c461f6ee010f42975ec480488 Merge diff -r 4a31db8d42bd -r dd706e28e6cc .hgtags --- a/.hgtags Thu Jun 27 17:44:18 2019 -0400 +++ b/.hgtags Thu Jun 27 19:14:42 2019 -0400 @@ -567,3 +567,5 @@ 2f4e214781a1d597ed36bf5a36f20928c6c82996 jdk-14+1 0692b67f54621991ba7afbf23e55b788f3555e69 jdk-13+26 43627549a488b7d0b4df8fad436e36233df89877 jdk-14+2 +b7f68ddec66f996ae3aad03291d129ca9f02482d jdk-13+27 +e64383344f144217c36196c3c8a2df8f588a2af3 jdk-14+3 diff -r 4a31db8d42bd -r dd706e28e6cc make/Docs.gmk diff -r 4a31db8d42bd -r dd706e28e6cc make/common/FindTests.gmk --- a/make/common/FindTests.gmk Thu Jun 27 17:44:18 2019 -0400 +++ b/make/common/FindTests.gmk Thu Jun 27 19:14:42 2019 -0400 @@ -62,10 +62,8 @@ # If this file is deemed outdated, it will automatically get regenerated # by this rule before being included below. -# -# When calling TestMake.gmk, override the log level to avoid any kind of debug -# output being captured into the generated makefile. -$(FIND_TESTS_CACHE_FILE): $(JTREG_ROOT_FILES) $(JTREG_GROUP_FILES) +$(FIND_TESTS_CACHE_FILE): $(JTREG_ROOT_FILES) $(JTREG_GROUP_FILES) \ + $(TOPDIR)/test/make/TestMake.gmk $(call MakeTargetDir) ( $(foreach root, $(JTREG_TESTROOTS), \ $(PRINTF) "\n$(root)_JTREG_TEST_GROUPS := " ; \ @@ -73,10 +71,11 @@ $($(root)_JTREG_GROUP_FILES) \ | $(SORT) -u | $(TR) '\n' ' ' ; \ ) \ - $(PRINTF) "\nMAKE_TEST_TARGETS := " ; \ - $(MAKE) -s --no-print-directory $(MAKE_ARGS) LOG_LEVEL=warn \ - SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \ ) > $@ + $(PRINTF) "\nMAKE_TEST_TARGETS := " >> $@ + $(MAKE) -s --no-print-directory $(MAKE_ARGS) \ + SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \ + TARGETS_FILE=$@ -include $(FIND_TESTS_CACHE_FILE) diff -r 4a31db8d42bd -r dd706e28e6cc make/common/ProcessMarkdown.gmk --- a/make/common/ProcessMarkdown.gmk Thu Jun 27 17:44:18 2019 -0400 +++ b/make/common/ProcessMarkdown.gmk Thu Jun 27 19:14:42 2019 -0400 @@ -103,7 +103,7 @@ $$(call LogInfo, Post-processing markdown file $2) $$(call MakeDir, $$(SUPPORT_OUTPUTDIR)/markdown $$($1_$2_TARGET_DIR)) $$(call ExecuteWithLog, $$(SUPPORT_OUTPUTDIR)/markdown/$$($1_$2_MARKER)_post, \ - $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE)) + ( $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE) ) ) endif $1 += $$($1_$2_OUTPUT_FILE) diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -63,27 +63,25 @@ return; } - // rscratch1 can be passed as src or dst, so don't use it. - RegSet savedRegs = RegSet::of(rscratch2, rheapbase); + assert_different_registers(rscratch1, rscratch2, src.base()); + assert_different_registers(rscratch1, rscratch2, dst); + + RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2); Label done; - assert_different_registers(rheapbase, rscratch2, dst); - assert_different_registers(rheapbase, rscratch2, src.base()); - - __ push(savedRegs, sp); // Load bad mask into scratch register. - __ ldr(rheapbase, address_bad_mask_from_thread(rthread)); + __ ldr(rscratch1, address_bad_mask_from_thread(rthread)); __ lea(rscratch2, src); __ ldr(dst, src); // Test reference against bad mask. If mask bad, then we need to fix it up. - __ tst(dst, rheapbase); + __ tst(dst, rscratch1); __ br(Assembler::EQ, done); __ enter(); - __ push(RegSet::range(r0,r28) - RegSet::of(dst), sp); + __ push(savedRegs, sp); if (c_rarg0 != dst) { __ mov(c_rarg0, dst); @@ -91,13 +89,15 @@ __ mov(c_rarg1, rscratch2); int step = 4 * wordSize; - __ mov(rscratch1, -step); + __ mov(rscratch2, -step); __ sub(sp, sp, step); for (int i = 28; i >= 4; i -= 4) { __ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), - as_FloatRegister(i+3), __ T1D, Address(__ post(sp, rscratch1))); + as_FloatRegister(i+3), __ T1D, Address(__ post(sp, rscratch2))); } + __ st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2), + as_FloatRegister(3), __ T1D, Address(sp)); __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); @@ -111,13 +111,10 @@ __ mov(dst, r0); } - __ pop(RegSet::range(r0,r28) - RegSet::of(dst), sp); + __ pop(savedRegs, sp); __ leave(); __ bind(done); - - // Restore tmps - __ pop(savedRegs, sp); } #ifdef ASSERT diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1383,7 +1383,12 @@ // save regs before copy_memory __ push(RegSet::of(d, count), sp); } - copy_memory(aligned, s, d, count, rscratch1, size); + { + // UnsafeCopyMemory page error: continue after ucm + bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + copy_memory(aligned, s, d, count, rscratch1, size); + } if (is_oop) { __ pop(RegSet::of(d, count), sp); @@ -1455,7 +1460,12 @@ // save regs before copy_memory __ push(RegSet::of(d, count), sp); } - copy_memory(aligned, s, d, count, rscratch1, -size); + { + // UnsafeCopyMemory page error: continue after ucm + bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + copy_memory(aligned, s, d, count, rscratch1, -size); + } if (is_oop) { __ pop(RegSet::of(d, count), sp); if (VerifyOops) @@ -5816,6 +5826,10 @@ } }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -886,8 +886,8 @@ } // Get mirror and store it in the frame as GC root for this Method* - __ load_mirror(rscratch1, rmethod); - __ stp(rscratch1, zr, Address(sp, 4 * wordSize)); + __ load_mirror(r10, rmethod); + __ stp(r10, zr, Address(sp, 4 * wordSize)); __ ldr(rcpool, Address(rmethod, Method::const_offset())); __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset())); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/arm/stubGenerator_arm.cpp --- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -928,7 +928,7 @@ // Scratches 'count', R3. // R4-R10 are preserved (saved/restored). // - int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) { + int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool unsafe_copy = false) { assert (from == R0 && to == R1 && count == R2, "adjust the implementation below"); const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration @@ -954,107 +954,111 @@ Label L_skip_pld; - // predecrease to exit when there is less than count_per_loop - __ sub_32(count, count, count_per_loop); - - if (pld_offset != 0) { - pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; - - prefetch(from, to, 0); - - if (prefetch_before) { - // If prefetch is done ahead, final PLDs that overflow the - // copied area can be easily avoided. 'count' is predecreased - // by the prefetch distance to optimize the inner loop and the - // outer loop skips the PLD. - __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count); - - // skip prefetch for small copies - __ b(L_skip_pld, lt); - } - - int offset = ArmCopyCacheLineSize; - while (offset <= pld_offset) { - prefetch(from, to, offset); - offset += ArmCopyCacheLineSize; - }; - } - { - // 32-bit ARM note: we have tried implementing loop unrolling to skip one - // PLD with 64 bytes cache line but the gain was not significant. - - Label L_copy_loop; - __ align(OptoLoopAlignment); - __ BIND(L_copy_loop); - - if (prefetch_before) { - prefetch(from, to, bytes_per_loop + pld_offset); - __ BIND(L_skip_pld); - } - - if (split_read) { - // Split the register set in two sets so that there is less - // latency between LDM and STM (R3-R6 available while R7-R10 - // still loading) and less register locking issue when iterating - // on the first LDM. - __ ldmia(from, RegisterSet(R3, R6), writeback); - __ ldmia(from, RegisterSet(R7, R10), writeback); - } else { - __ ldmia(from, RegisterSet(R3, R10), writeback); + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + // predecrease to exit when there is less than count_per_loop + __ sub_32(count, count, count_per_loop); + + if (pld_offset != 0) { + pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; + + prefetch(from, to, 0); + + if (prefetch_before) { + // If prefetch is done ahead, final PLDs that overflow the + // copied area can be easily avoided. 'count' is predecreased + // by the prefetch distance to optimize the inner loop and the + // outer loop skips the PLD. + __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count); + + // skip prefetch for small copies + __ b(L_skip_pld, lt); + } + + int offset = ArmCopyCacheLineSize; + while (offset <= pld_offset) { + prefetch(from, to, offset); + offset += ArmCopyCacheLineSize; + }; } - __ subs_32(count, count, count_per_loop); - - if (prefetch_after) { - prefetch(from, to, pld_offset, bytes_per_loop); - } - - if (split_write) { - __ stmia(to, RegisterSet(R3, R6), writeback); - __ stmia(to, RegisterSet(R7, R10), writeback); - } else { - __ stmia(to, RegisterSet(R3, R10), writeback); - } - - __ b(L_copy_loop, ge); - - if (prefetch_before) { - // the inner loop may end earlier, allowing to skip PLD for the last iterations - __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); - __ b(L_skip_pld, ge); + { + // 32-bit ARM note: we have tried implementing loop unrolling to skip one + // PLD with 64 bytes cache line but the gain was not significant. + + Label L_copy_loop; + __ align(OptoLoopAlignment); + __ BIND(L_copy_loop); + + if (prefetch_before) { + prefetch(from, to, bytes_per_loop + pld_offset); + __ BIND(L_skip_pld); + } + + if (split_read) { + // Split the register set in two sets so that there is less + // latency between LDM and STM (R3-R6 available while R7-R10 + // still loading) and less register locking issue when iterating + // on the first LDM. + __ ldmia(from, RegisterSet(R3, R6), writeback); + __ ldmia(from, RegisterSet(R7, R10), writeback); + } else { + __ ldmia(from, RegisterSet(R3, R10), writeback); + } + + __ subs_32(count, count, count_per_loop); + + if (prefetch_after) { + prefetch(from, to, pld_offset, bytes_per_loop); + } + + if (split_write) { + __ stmia(to, RegisterSet(R3, R6), writeback); + __ stmia(to, RegisterSet(R7, R10), writeback); + } else { + __ stmia(to, RegisterSet(R3, R10), writeback); + } + + __ b(L_copy_loop, ge); + + if (prefetch_before) { + // the inner loop may end earlier, allowing to skip PLD for the last iterations + __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); + __ b(L_skip_pld, ge); + } } - } - BLOCK_COMMENT("Remaining bytes:"); - // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes - - // __ add(count, count, ...); // addition useless for the bit tests - assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); - - __ tst(count, 16 / bytes_per_count); - __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes - __ stmia(to, RegisterSet(R3, R6), writeback, ne); - - __ tst(count, 8 / bytes_per_count); - __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes - __ stmia(to, RegisterSet(R3, R4), writeback, ne); - - if (bytes_per_count <= 4) { - __ tst(count, 4 / bytes_per_count); - __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes - __ str(R3, Address(to, 4, post_indexed), ne); - } - - if (bytes_per_count <= 2) { - __ tst(count, 2 / bytes_per_count); - __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes - __ strh(R3, Address(to, 2, post_indexed), ne); - } - - if (bytes_per_count == 1) { - __ tst(count, 1); - __ ldrb(R3, Address(from, 1, post_indexed), ne); - __ strb(R3, Address(to, 1, post_indexed), ne); + BLOCK_COMMENT("Remaining bytes:"); + // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes + + // __ add(count, count, ...); // addition useless for the bit tests + assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); + + __ tst(count, 16 / bytes_per_count); + __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes + __ stmia(to, RegisterSet(R3, R6), writeback, ne); + + __ tst(count, 8 / bytes_per_count); + __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes + __ stmia(to, RegisterSet(R3, R4), writeback, ne); + + if (bytes_per_count <= 4) { + __ tst(count, 4 / bytes_per_count); + __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes + __ str(R3, Address(to, 4, post_indexed), ne); + } + + if (bytes_per_count <= 2) { + __ tst(count, 2 / bytes_per_count); + __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes + __ strh(R3, Address(to, 2, post_indexed), ne); + } + + if (bytes_per_count == 1) { + __ tst(count, 1); + __ ldrb(R3, Address(from, 1, post_indexed), ne); + __ strb(R3, Address(to, 1, post_indexed), ne); + } } __ pop(RegisterSet(R4,R10)); @@ -1083,7 +1087,7 @@ // Scratches 'count', R3. // ARM R4-R10 are preserved (saved/restored). // - int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) { + int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count, bool unsafe_copy = false) { assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below"); const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration @@ -1099,102 +1103,105 @@ __ push(RegisterSet(R4,R10)); - __ sub_32(count, count, count_per_loop); - - const bool prefetch_before = pld_offset < 0; - const bool prefetch_after = pld_offset > 0; - - Label L_skip_pld; - - if (pld_offset != 0) { - pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; - - prefetch(end_from, end_to, -wordSize); - - if (prefetch_before) { - __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count); - __ b(L_skip_pld, lt); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + __ sub_32(count, count, count_per_loop); + + const bool prefetch_before = pld_offset < 0; + const bool prefetch_after = pld_offset > 0; + + Label L_skip_pld; + + if (pld_offset != 0) { + pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; + + prefetch(end_from, end_to, -wordSize); + + if (prefetch_before) { + __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count); + __ b(L_skip_pld, lt); + } + + int offset = ArmCopyCacheLineSize; + while (offset <= pld_offset) { + prefetch(end_from, end_to, -(wordSize + offset)); + offset += ArmCopyCacheLineSize; + }; } - int offset = ArmCopyCacheLineSize; - while (offset <= pld_offset) { - prefetch(end_from, end_to, -(wordSize + offset)); - offset += ArmCopyCacheLineSize; - }; - } - - { - // 32-bit ARM note: we have tried implementing loop unrolling to skip one - // PLD with 64 bytes cache line but the gain was not significant. - - Label L_copy_loop; - __ align(OptoLoopAlignment); - __ BIND(L_copy_loop); - - if (prefetch_before) { - prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset)); - __ BIND(L_skip_pld); + { + // 32-bit ARM note: we have tried implementing loop unrolling to skip one + // PLD with 64 bytes cache line but the gain was not significant. + + Label L_copy_loop; + __ align(OptoLoopAlignment); + __ BIND(L_copy_loop); + + if (prefetch_before) { + prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset)); + __ BIND(L_skip_pld); + } + + if (split_read) { + __ ldmdb(end_from, RegisterSet(R7, R10), writeback); + __ ldmdb(end_from, RegisterSet(R3, R6), writeback); + } else { + __ ldmdb(end_from, RegisterSet(R3, R10), writeback); + } + + __ subs_32(count, count, count_per_loop); + + if (prefetch_after) { + prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop); + } + + if (split_write) { + __ stmdb(end_to, RegisterSet(R7, R10), writeback); + __ stmdb(end_to, RegisterSet(R3, R6), writeback); + } else { + __ stmdb(end_to, RegisterSet(R3, R10), writeback); + } + + __ b(L_copy_loop, ge); + + if (prefetch_before) { + __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); + __ b(L_skip_pld, ge); + } } - - if (split_read) { - __ ldmdb(end_from, RegisterSet(R7, R10), writeback); - __ ldmdb(end_from, RegisterSet(R3, R6), writeback); - } else { - __ ldmdb(end_from, RegisterSet(R3, R10), writeback); - } - - __ subs_32(count, count, count_per_loop); - - if (prefetch_after) { - prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop); + BLOCK_COMMENT("Remaining bytes:"); + // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes + + // __ add(count, count, ...); // addition useless for the bit tests + assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); + + __ tst(count, 16 / bytes_per_count); + __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes + __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne); + + __ tst(count, 8 / bytes_per_count); + __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes + __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne); + + if (bytes_per_count <= 4) { + __ tst(count, 4 / bytes_per_count); + __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes + __ str(R3, Address(end_to, -4, pre_indexed), ne); } - if (split_write) { - __ stmdb(end_to, RegisterSet(R7, R10), writeback); - __ stmdb(end_to, RegisterSet(R3, R6), writeback); - } else { - __ stmdb(end_to, RegisterSet(R3, R10), writeback); + if (bytes_per_count <= 2) { + __ tst(count, 2 / bytes_per_count); + __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes + __ strh(R3, Address(end_to, -2, pre_indexed), ne); } - __ b(L_copy_loop, ge); - - if (prefetch_before) { - __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); - __ b(L_skip_pld, ge); + if (bytes_per_count == 1) { + __ tst(count, 1); + __ ldrb(R3, Address(end_from, -1, pre_indexed), ne); + __ strb(R3, Address(end_to, -1, pre_indexed), ne); } } - BLOCK_COMMENT("Remaining bytes:"); - // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes - - // __ add(count, count, ...); // addition useless for the bit tests - assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); - - __ tst(count, 16 / bytes_per_count); - __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes - __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne); - - __ tst(count, 8 / bytes_per_count); - __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes - __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne); - - if (bytes_per_count <= 4) { - __ tst(count, 4 / bytes_per_count); - __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes - __ str(R3, Address(end_to, -4, pre_indexed), ne); - } - - if (bytes_per_count <= 2) { - __ tst(count, 2 / bytes_per_count); - __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes - __ strh(R3, Address(end_to, -2, pre_indexed), ne); - } - - if (bytes_per_count == 1) { - __ tst(count, 1); - __ ldrb(R3, Address(end_from, -1, pre_indexed), ne); - __ strb(R3, Address(end_to, -1, pre_indexed), ne); - } - __ pop(RegisterSet(R4,R10)); return count_per_loop; @@ -1749,17 +1756,21 @@ // // Notes: // shifts 'from' and 'to' - void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) { + void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry, bool unsafe_copy = false) { assert_different_registers(from, to, count, tmp); - __ align(OptoLoopAlignment); - Label L_small_loop; - __ BIND(L_small_loop); - store_one(tmp, to, bytes_per_count, forward, al, tmp2); - __ BIND(entry); // entry point - __ subs(count, count, 1); - load_one(tmp, from, bytes_per_count, forward, ge, tmp2); - __ b(L_small_loop, ge); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + __ align(OptoLoopAlignment); + Label L_small_loop; + __ BIND(L_small_loop); + store_one(tmp, to, bytes_per_count, forward, al, tmp2); + __ BIND(entry); // entry point + __ subs(count, count, 1); + load_one(tmp, from, bytes_per_count, forward, ge, tmp2); + __ b(L_small_loop, ge); + } } // Aligns 'to' by reading one word from 'from' and writting its part to 'to'. @@ -1876,7 +1887,7 @@ // // Scratches 'from', 'count', R3 and R12. // R4-R10 saved for use. - int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) { + int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward, bool unsafe_copy = false) { const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect @@ -1886,60 +1897,64 @@ // then the remainder of 'to' divided by wordSize is one of elements of {seq}. __ push(RegisterSet(R4,R10)); - load_one(Rval, from, wordSize, forward); - - switch (bytes_per_count) { - case 2: - min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); - break; - case 1: - { - Label L1, L2, L3; - int min_copy1, min_copy2, min_copy3; - - Label L_loop_finished; - - if (forward) { - __ tbz(to, 0, L2); - __ tbz(to, 1, L1); - - __ BIND(L3); - min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L1); - min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L2); - min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); - } else { - __ tbz(to, 0, L2); - __ tbnz(to, 1, L3); - - __ BIND(L1); - min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L3); - min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L2); - min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + load_one(Rval, from, wordSize, forward); + + switch (bytes_per_count) { + case 2: + min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + break; + case 1: + { + Label L1, L2, L3; + int min_copy1, min_copy2, min_copy3; + + Label L_loop_finished; + + if (forward) { + __ tbz(to, 0, L2); + __ tbz(to, 1, L1); + + __ BIND(L3); + min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L1); + min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L2); + min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + } else { + __ tbz(to, 0, L2); + __ tbnz(to, 1, L3); + + __ BIND(L1); + min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L3); + min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L2); + min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + } + + min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3); + + __ BIND(L_loop_finished); + + break; } - - min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3); - - __ BIND(L_loop_finished); - - break; + default: + ShouldNotReachHere(); + break; } - default: - ShouldNotReachHere(); - break; } - __ pop(RegisterSet(R4,R10)); return min_copy; @@ -1963,6 +1978,13 @@ } #endif // !PRODUCT + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + __ mov(R0, 0); + __ ret(); + return start_pc; + } + // // Generate stub for primitive array copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned. @@ -2033,8 +2055,13 @@ from_is_aligned = true; } - int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward); - assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count"); + int count_required_to_align = 0; + { + // UnsafeCopyMemoryMark page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward); + assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count"); + } // now 'from' is aligned @@ -2064,9 +2091,9 @@ int min_copy; if (forward) { - min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count); + min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/); } else { - min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count); + min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/); } assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count"); @@ -2077,7 +2104,7 @@ __ ret(); { - copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */); + copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */, !aligned /*add UnsafeCopyMemory entry*/); if (status) { __ mov(R0, 0); // OK @@ -2088,7 +2115,7 @@ if (! to_is_aligned) { __ BIND(L_unaligned_dst); - int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward); + int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward, !aligned /*add UnsafeCopyMemory entry*/); assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count"); if (status) { @@ -2873,6 +2900,9 @@ status = true; // generate a status compatible with C1 calls #endif + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + // these need always status in case they are called from generic_arraycopy StubRoutines::_jbyte_disjoint_arraycopy = generate_primitive_copy(false, "jbyte_disjoint_arraycopy", true, 1, true); StubRoutines::_jshort_disjoint_arraycopy = generate_primitive_copy(false, "jshort_disjoint_arraycopy", true, 2, true); @@ -3055,6 +3085,10 @@ } }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 32 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/ppc/stubGenerator_ppc.cpp --- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -952,6 +952,20 @@ // need to copy backwards } + // This is common errorexit stub for UnsafeCopyMemory. + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + Register tmp1 = R6_ARG4; + // probably copy stub would have changed value reset it. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp1, VM_Version::_dscr_val); + __ mtdscr(tmp1); + } + __ li(R3_RET, 0); // return 0 + __ blr(); + return start_pc; + } + // The guideline in the implementations of generate_disjoint_xxx_copy // (xxx=byte,short,int,long,oop) is to copy as many elements as possible with // single instructions, but to avoid alignment interrupts (see subsequent @@ -989,150 +1003,154 @@ VectorSRegister tmp_vsr2 = VSR2; Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10; - - // Don't try anything fancy if arrays don't have many elements. - __ li(tmp3, 0); - __ cmpwi(CCR0, R5_ARG3, 17); - __ ble(CCR0, l_6); // copy 4 at a time - - if (!aligned) { - __ xorr(tmp1, R3_ARG1, R4_ARG2); - __ andi_(tmp1, tmp1, 3); - __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy. - - // Copy elements if necessary to align to 4 bytes. - __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary. - __ andi_(tmp1, tmp1, 3); - __ beq(CCR0, l_2); - - __ subf(R5_ARG3, tmp1, R5_ARG3); - __ bind(l_9); - __ lbz(tmp2, 0, R3_ARG1); - __ addic_(tmp1, tmp1, -1); - __ stb(tmp2, 0, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 1); - __ addi(R4_ARG2, R4_ARG2, 1); - __ bne(CCR0, l_9); - - __ bind(l_2); - } - - // copy 8 elements at a time - __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8 - __ andi_(tmp1, tmp2, 7); - __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8 - - // copy a 2-element word if necessary to align to 8 bytes - __ andi_(R0, R3_ARG1, 7); - __ beq(CCR0, l_7); - - __ lwzx(tmp2, R3_ARG1, tmp3); - __ addi(R5_ARG3, R5_ARG3, -4); - __ stwx(tmp2, R4_ARG2, tmp3); - { // FasterArrayCopy - __ addi(R3_ARG1, R3_ARG1, 4); - __ addi(R4_ARG2, R4_ARG2, 4); - } - __ bind(l_7); - - { // FasterArrayCopy - __ cmpwi(CCR0, R5_ARG3, 31); - __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain - - __ srdi(tmp1, R5_ARG3, 5); - __ andi_(R5_ARG3, R5_ARG3, 31); - __ mtctr(tmp1); - - if (!VM_Version::has_vsx()) { - - __ bind(l_8); - // Use unrolled version for mass copying (copy 32 elements a time) - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_8); - - } else { // Processor supports VSX, so use it to mass copy. - - // Prefetch the data into the L2 cache. - __ dcbt(R3_ARG1, 0); - - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + // Don't try anything fancy if arrays don't have many elements. + __ li(tmp3, 0); + __ cmpwi(CCR0, R5_ARG3, 17); + __ ble(CCR0, l_6); // copy 4 at a time + + if (!aligned) { + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp1, 3); + __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy. + + // Copy elements if necessary to align to 4 bytes. + __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary. + __ andi_(tmp1, tmp1, 3); + __ beq(CCR0, l_2); + + __ subf(R5_ARG3, tmp1, R5_ARG3); + __ bind(l_9); + __ lbz(tmp2, 0, R3_ARG1); + __ addic_(tmp1, tmp1, -1); + __ stb(tmp2, 0, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 1); + __ addi(R4_ARG2, R4_ARG2, 1); + __ bne(CCR0, l_9); + + __ bind(l_2); + } + + // copy 8 elements at a time + __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8 + __ andi_(tmp1, tmp2, 7); + __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8 + + // copy a 2-element word if necessary to align to 8 bytes + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -4); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); } - - __ li(tmp1, 16); - - // Backbranch target aligned to 32-byte. Not 16-byte align as - // loop contains < 8 instructions that fit inside a single - // i-cache sector. - __ align(32); - - __ bind(l_10); - // Use loop with VSX load/store instructions to - // copy 32 elements a time. - __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src - __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst - __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 - __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 - __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 - __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32 - __ bdnz(l_10); // Dec CTR and loop if not zero. - - // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); + __ bind(l_7); + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 31); + __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain + + __ srdi(tmp1, R5_ARG3, 5); + __ andi_(R5_ARG3, R5_ARG3, 31); + __ mtctr(tmp1); + + if (!VM_Version::has_vsx()) { + + __ bind(l_8); + // Use unrolled version for mass copying (copy 32 elements a time) + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_8); + + } else { // Processor supports VSX, so use it to mass copy. + + // Prefetch the data into the L2 cache. + __ dcbt(R3_ARG1, 0); + + // If supported set DSCR pre-fetch to deepest. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); + } + + __ li(tmp1, 16); + + // Backbranch target aligned to 32-byte. Not 16-byte align as + // loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); + + __ bind(l_10); + // Use loop with VSX load/store instructions to + // copy 32 elements a time. + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst + __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 + __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 + __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 + __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32 + __ bdnz(l_10); // Dec CTR and loop if not zero. + + // Restore DSCR pre-fetch value. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); + } + + } // VSX + } // FasterArrayCopy + + __ bind(l_6); + + // copy 4 elements at a time + __ cmpwi(CCR0, R5_ARG3, 4); + __ blt(CCR0, l_1); + __ srdi(tmp1, R5_ARG3, 2); + __ mtctr(tmp1); // is > 0 + __ andi_(R5_ARG3, R5_ARG3, 3); + + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); } - } // VSX - } // FasterArrayCopy - - __ bind(l_6); - - // copy 4 elements at a time - __ cmpwi(CCR0, R5_ARG3, 4); - __ blt(CCR0, l_1); - __ srdi(tmp1, R5_ARG3, 2); - __ mtctr(tmp1); // is > 0 - __ andi_(R5_ARG3, R5_ARG3, 3); - - { // FasterArrayCopy - __ addi(R3_ARG1, R3_ARG1, -4); - __ addi(R4_ARG2, R4_ARG2, -4); - __ bind(l_3); - __ lwzu(tmp2, 4, R3_ARG1); - __ stwu(tmp2, 4, R4_ARG2); - __ bdnz(l_3); - __ addi(R3_ARG1, R3_ARG1, 4); - __ addi(R4_ARG2, R4_ARG2, 4); - } - - // do single element copy - __ bind(l_1); - __ cmpwi(CCR0, R5_ARG3, 0); - __ beq(CCR0, l_4); - - { // FasterArrayCopy - __ mtctr(R5_ARG3); - __ addi(R3_ARG1, R3_ARG1, -1); - __ addi(R4_ARG2, R4_ARG2, -1); - - __ bind(l_5); - __ lbzu(tmp2, 1, R3_ARG1); - __ stbu(tmp2, 1, R4_ARG2); - __ bdnz(l_5); + // do single element copy + __ bind(l_1); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_4); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -1); + __ addi(R4_ARG2, R4_ARG2, -1); + + __ bind(l_5); + __ lbzu(tmp2, 1, R3_ARG1); + __ stbu(tmp2, 1, R4_ARG2); + __ bdnz(l_5); + } } __ bind(l_4); @@ -1167,15 +1185,17 @@ // Do reverse copy. We assume the case of actual overlap is rare enough // that we don't have to optimize it. Label l_1, l_2; - - __ b(l_2); - __ bind(l_1); - __ stbx(tmp1, R4_ARG2, R5_ARG3); - __ bind(l_2); - __ addic_(R5_ARG3, R5_ARG3, -1); - __ lbzx(tmp1, R3_ARG1, R5_ARG3); - __ bge(CCR0, l_1); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + __ b(l_2); + __ bind(l_1); + __ stbx(tmp1, R4_ARG2, R5_ARG3); + __ bind(l_2); + __ addic_(R5_ARG3, R5_ARG3, -1); + __ lbzx(tmp1, R3_ARG1, R5_ARG3); + __ bge(CCR0, l_1); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -1252,155 +1272,159 @@ assert_positive_int(R5_ARG3); Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9; - - // don't try anything fancy if arrays don't have many elements - __ li(tmp3, 0); - __ cmpwi(CCR0, R5_ARG3, 9); - __ ble(CCR0, l_6); // copy 2 at a time - - if (!aligned) { - __ xorr(tmp1, R3_ARG1, R4_ARG2); - __ andi_(tmp1, tmp1, 3); - __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy - - // At this point it is guaranteed that both, from and to have the same alignment mod 4. - - // Copy 1 element if necessary to align to 4 bytes. - __ andi_(tmp1, R3_ARG1, 3); - __ beq(CCR0, l_2); - - __ lhz(tmp2, 0, R3_ARG1); - __ addi(R3_ARG1, R3_ARG1, 2); - __ sth(tmp2, 0, R4_ARG2); - __ addi(R4_ARG2, R4_ARG2, 2); - __ addi(R5_ARG3, R5_ARG3, -1); - __ bind(l_2); - - // At this point the positions of both, from and to, are at least 4 byte aligned. - - // Copy 4 elements at a time. - // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. - __ xorr(tmp2, R3_ARG1, R4_ARG2); - __ andi_(tmp1, tmp2, 7); - __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned - - // Copy a 2-element word if necessary to align to 8 bytes. - __ andi_(R0, R3_ARG1, 7); - __ beq(CCR0, l_7); - - __ lwzx(tmp2, R3_ARG1, tmp3); - __ addi(R5_ARG3, R5_ARG3, -2); - __ stwx(tmp2, R4_ARG2, tmp3); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + // don't try anything fancy if arrays don't have many elements + __ li(tmp3, 0); + __ cmpwi(CCR0, R5_ARG3, 9); + __ ble(CCR0, l_6); // copy 2 at a time + + if (!aligned) { + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp1, 3); + __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 1 element if necessary to align to 4 bytes. + __ andi_(tmp1, R3_ARG1, 3); + __ beq(CCR0, l_2); + + __ lhz(tmp2, 0, R3_ARG1); + __ addi(R3_ARG1, R3_ARG1, 2); + __ sth(tmp2, 0, R4_ARG2); + __ addi(R4_ARG2, R4_ARG2, 2); + __ addi(R5_ARG3, R5_ARG3, -1); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(tmp2, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp2, 7); + __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned + + // Copy a 2-element word if necessary to align to 8 bytes. + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -2); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + } + + __ bind(l_7); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 15); + __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain + + __ srdi(tmp1, R5_ARG3, 4); + __ andi_(R5_ARG3, R5_ARG3, 15); + __ mtctr(tmp1); + + if (!VM_Version::has_vsx()) { + + __ bind(l_8); + // Use unrolled version for mass copying (copy 16 elements a time). + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_8); + + } else { // Processor supports VSX, so use it to mass copy. + + // Prefetch src data into L2 cache. + __ dcbt(R3_ARG1, 0); + + // If supported set DSCR pre-fetch to deepest. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); + } + __ li(tmp1, 16); + + // Backbranch target aligned to 32-byte. It's not aligned 16-byte + // as loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); + + __ bind(l_9); + // Use loop with VSX load/store instructions to + // copy 16 elements a time. + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load from src. + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst. + __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16. + __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16. + __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32. + __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32. + __ bdnz(l_9); // Dec CTR and loop if not zero. + + // Restore DSCR pre-fetch value. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); + } + + } + } // FasterArrayCopy + __ bind(l_6); + + // copy 2 elements at a time + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 2); + __ blt(CCR0, l_1); + __ srdi(tmp1, R5_ARG3, 1); + __ andi_(R5_ARG3, R5_ARG3, 1); + + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ mtctr(tmp1); + + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + __ addi(R3_ARG1, R3_ARG1, 4); __ addi(R4_ARG2, R4_ARG2, 4); } + + // do single element copy + __ bind(l_1); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_4); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -2); + __ addi(R4_ARG2, R4_ARG2, -2); + + __ bind(l_5); + __ lhzu(tmp2, 2, R3_ARG1); + __ sthu(tmp2, 2, R4_ARG2); + __ bdnz(l_5); + } } - __ bind(l_7); - - // Copy 4 elements at a time; either the loads or the stores can - // be unaligned if aligned == false. - - { // FasterArrayCopy - __ cmpwi(CCR0, R5_ARG3, 15); - __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain - - __ srdi(tmp1, R5_ARG3, 4); - __ andi_(R5_ARG3, R5_ARG3, 15); - __ mtctr(tmp1); - - if (!VM_Version::has_vsx()) { - - __ bind(l_8); - // Use unrolled version for mass copying (copy 16 elements a time). - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_8); - - } else { // Processor supports VSX, so use it to mass copy. - - // Prefetch src data into L2 cache. - __ dcbt(R3_ARG1, 0); - - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } - __ li(tmp1, 16); - - // Backbranch target aligned to 32-byte. It's not aligned 16-byte - // as loop contains < 8 instructions that fit inside a single - // i-cache sector. - __ align(32); - - __ bind(l_9); - // Use loop with VSX load/store instructions to - // copy 16 elements a time. - __ lxvd2x(tmp_vsr1, R3_ARG1); // Load from src. - __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst. - __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16. - __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16. - __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32. - __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32. - __ bdnz(l_9); // Dec CTR and loop if not zero. - - // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } - - } - } // FasterArrayCopy - __ bind(l_6); - - // copy 2 elements at a time - { // FasterArrayCopy - __ cmpwi(CCR0, R5_ARG3, 2); - __ blt(CCR0, l_1); - __ srdi(tmp1, R5_ARG3, 1); - __ andi_(R5_ARG3, R5_ARG3, 1); - - __ addi(R3_ARG1, R3_ARG1, -4); - __ addi(R4_ARG2, R4_ARG2, -4); - __ mtctr(tmp1); - - __ bind(l_3); - __ lwzu(tmp2, 4, R3_ARG1); - __ stwu(tmp2, 4, R4_ARG2); - __ bdnz(l_3); - - __ addi(R3_ARG1, R3_ARG1, 4); - __ addi(R4_ARG2, R4_ARG2, 4); - } - - // do single element copy - __ bind(l_1); - __ cmpwi(CCR0, R5_ARG3, 0); - __ beq(CCR0, l_4); - - { // FasterArrayCopy - __ mtctr(R5_ARG3); - __ addi(R3_ARG1, R3_ARG1, -2); - __ addi(R4_ARG2, R4_ARG2, -2); - - __ bind(l_5); - __ lhzu(tmp2, 2, R3_ARG1); - __ sthu(tmp2, 2, R4_ARG2); - __ bdnz(l_5); - } __ bind(l_4); __ li(R3_RET, 0); // return 0 __ blr(); @@ -1432,15 +1456,18 @@ array_overlap_test(nooverlap_target, 1); Label l_1, l_2; - __ sldi(tmp1, R5_ARG3, 1); - __ b(l_2); - __ bind(l_1); - __ sthx(tmp2, R4_ARG2, tmp1); - __ bind(l_2); - __ addic_(tmp1, tmp1, -2); - __ lhzx(tmp2, R3_ARG1, tmp1); - __ bge(CCR0, l_1); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + __ sldi(tmp1, R5_ARG3, 1); + __ b(l_2); + __ bind(l_1); + __ sthx(tmp2, R4_ARG2, tmp1); + __ bind(l_2); + __ addic_(tmp1, tmp1, -2); + __ lhzx(tmp2, R3_ARG1, tmp1); + __ bge(CCR0, l_1); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -1588,7 +1615,11 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ function_entry(); assert_positive_int(R5_ARG3); - generate_disjoint_int_copy_core(aligned); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_int_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); return start; @@ -1736,8 +1767,11 @@ STUB_ENTRY(jint_disjoint_arraycopy); array_overlap_test(nooverlap_target, 2); - - generate_conjoint_int_copy_core(aligned); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_int_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -1859,11 +1893,15 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ function_entry(); assert_positive_int(R5_ARG3); - generate_disjoint_long_copy_core(aligned); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_long_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); - return start; + return start; } // Generate core code for conjoint long copy (and oop copy on @@ -1986,8 +2024,11 @@ STUB_ENTRY(jlong_disjoint_arraycopy); array_overlap_test(nooverlap_target, 3); - generate_conjoint_long_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_long_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -3008,6 +3049,9 @@ // Note: the disjoint stubs must be generated first, some of // the conjoint stubs use them. + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + // non-aligned disjoint versions StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); @@ -3579,6 +3623,10 @@ } }; +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/sparc/stubGenerator_sparc.cpp --- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1076,6 +1076,17 @@ __ delayed()->add(end_from, left_shift, end_from); // restore address } + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + if (UseBlockCopy) { + __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); + __ membar(Assembler::StoreLoad); + } + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start_pc; + } + // // Generate stub for disjoint byte copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned. @@ -1107,61 +1118,66 @@ BLOCK_COMMENT("Entry:"); } - // for short arrays, just do single element copy - __ cmp(count, 23); // 16 + 7 - __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); - __ delayed()->mov(G0, offset); - - if (aligned) { - // 'aligned' == true when it is known statically during compilation - // of this arraycopy call site that both 'from' and 'to' addresses - // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). - // - // Aligned arrays have 4 bytes alignment in 32-bits VM - // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM - // - } else { - // copy bytes to align 'to' on 8 byte boundary - __ andcc(to, 7, G1); // misaligned bytes - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->neg(G1); - __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment - __ sub(count, G1, count); - __ BIND(L_align); - __ ldub(from, 0, O3); - __ deccc(G1); - __ inc(from); - __ stb(O3, to, 0); - __ br(Assembler::notZero, false, Assembler::pt, L_align); - __ delayed()->inc(to); - __ BIND(L_skip_alignment); - } - if (!aligned) { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise fall through to the next - // code for aligned copy. - // The compare above (count >= 23) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); - } - - // Both array are 8 bytes aligned, copy 16 bytes at a time + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + // for short arrays, just do single element copy + __ cmp(count, 23); // 16 + 7 + __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); + __ delayed()->mov(G0, offset); + + if (aligned) { + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM + // + } else { + // copy bytes to align 'to' on 8 byte boundary + __ andcc(to, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->neg(G1); + __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment + __ sub(count, G1, count); + __ BIND(L_align); + __ ldub(from, 0, O3); + __ deccc(G1); + __ inc(from); + __ stb(O3, to, 0); + __ br(Assembler::notZero, false, Assembler::pt, L_align); + __ delayed()->inc(to); + __ BIND(L_skip_alignment); + } + if (!aligned) { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise fall through to the next + // code for aligned copy. + // The compare above (count >= 23) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); + } + + // Both array are 8 bytes aligned, copy 16 bytes at a time __ and3(count, 7, G4); // Save count __ srl(count, 3, count); - generate_disjoint_long_copy_core(aligned); + generate_disjoint_long_copy_core(aligned); __ mov(G4, count); // Restore count - // copy tailing bytes - __ BIND(L_copy_byte); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ align(OptoLoopAlignment); - __ BIND(L_copy_byte_loop); - __ ldub(from, offset, O3); - __ deccc(count); - __ stb(O3, to, offset); - __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); - __ delayed()->inc(offset); + // copy tailing bytes + __ BIND(L_copy_byte); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_byte_loop); + __ ldub(from, offset, O3); + __ deccc(count); + __ stb(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); + __ delayed()->inc(offset); + } __ BIND(L_exit); // O3, O4 are used as temp registers @@ -1207,70 +1223,75 @@ array_overlap_test(nooverlap_target, 0); - __ add(to, count, end_to); // offset after last copied element - - // for short arrays, just do single element copy - __ cmp(count, 23); // 16 + 7 - __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); - __ delayed()->add(from, count, end_from); - { - // Align end of arrays since they could be not aligned even - // when arrays itself are aligned. - - // copy bytes to align 'end_to' on 8 byte boundary - __ andcc(end_to, 7, G1); // misaligned bytes - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->nop(); - __ sub(count, G1, count); - __ BIND(L_align); - __ dec(end_from); - __ dec(end_to); - __ ldub(end_from, 0, O3); - __ deccc(G1); - __ brx(Assembler::notZero, false, Assembler::pt, L_align); - __ delayed()->stb(O3, end_to, 0); - __ BIND(L_skip_alignment); + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + __ add(to, count, end_to); // offset after last copied element + + // for short arrays, just do single element copy + __ cmp(count, 23); // 16 + 7 + __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); + __ delayed()->add(from, count, end_from); + + { + // Align end of arrays since they could be not aligned even + // when arrays itself are aligned. + + // copy bytes to align 'end_to' on 8 byte boundary + __ andcc(end_to, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->nop(); + __ sub(count, G1, count); + __ BIND(L_align); + __ dec(end_from); + __ dec(end_to); + __ ldub(end_from, 0, O3); + __ deccc(G1); + __ brx(Assembler::notZero, false, Assembler::pt, L_align); + __ delayed()->stb(O3, end_to, 0); + __ BIND(L_skip_alignment); + } + if (aligned) { + // Both arrays are aligned to 8-bytes in 64-bits VM. + // The 'count' is decremented in copy_16_bytes_backward_with_shift() + // in unaligned case. + __ dec(count, 16); + } else { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise jump to the next + // code for aligned copy (and substracting 16 from 'count' before jump). + // The compare above (count >= 11) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, + L_aligned_copy, L_copy_byte); + } + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 16); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 16); + + // copy 1 element (2 bytes) at a time + __ BIND(L_copy_byte); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_byte_loop); + __ dec(end_from); + __ dec(end_to); + __ ldub(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); + __ delayed()->stb(O4, end_to, 0); } - if (aligned) { - // Both arrays are aligned to 8-bytes in 64-bits VM. - // The 'count' is decremented in copy_16_bytes_backward_with_shift() - // in unaligned case. - __ dec(count, 16); - } else { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise jump to the next - // code for aligned copy (and substracting 16 from 'count' before jump). - // The compare above (count >= 11) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, - L_aligned_copy, L_copy_byte); - } - // copy 4 elements (16 bytes) at a time - __ align(OptoLoopAlignment); - __ BIND(L_aligned_copy); - __ dec(end_from, 16); - __ ldx(end_from, 8, O3); - __ ldx(end_from, 0, O4); - __ dec(end_to, 16); - __ deccc(count, 16); - __ stx(O3, end_to, 8); - __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); - __ delayed()->stx(O4, end_to, 0); - __ inc(count, 16); - - // copy 1 element (2 bytes) at a time - __ BIND(L_copy_byte); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ align(OptoLoopAlignment); - __ BIND(L_copy_byte_loop); - __ dec(end_from); - __ dec(end_to); - __ ldub(end_from, 0, O4); - __ deccc(count); - __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); - __ delayed()->stb(O4, end_to, 0); __ BIND(L_exit); // O3, O4 are used as temp registers @@ -1311,68 +1332,72 @@ BLOCK_COMMENT("Entry:"); } - // for short arrays, just do single element copy - __ cmp(count, 11); // 8 + 3 (22 bytes) - __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); - __ delayed()->mov(G0, offset); - - if (aligned) { - // 'aligned' == true when it is known statically during compilation - // of this arraycopy call site that both 'from' and 'to' addresses - // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). - // - // Aligned arrays have 4 bytes alignment in 32-bits VM - // and 8 bytes - in 64-bits VM. - // - } else { - // copy 1 element if necessary to align 'to' on an 4 bytes - __ andcc(to, 3, G0); - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->lduh(from, 0, O3); - __ inc(from, 2); - __ inc(to, 2); - __ dec(count); - __ sth(O3, to, -2); - __ BIND(L_skip_alignment); - - // copy 2 elements to align 'to' on an 8 byte boundary - __ andcc(to, 7, G0); - __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); - __ delayed()->lduh(from, 0, O3); - __ dec(count, 2); - __ lduh(from, 2, O4); - __ inc(from, 4); - __ inc(to, 4); - __ sth(O3, to, -4); - __ sth(O4, to, -2); - __ BIND(L_skip_alignment2); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + // for short arrays, just do single element copy + __ cmp(count, 11); // 8 + 3 (22 bytes) + __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); + __ delayed()->mov(G0, offset); + + if (aligned) { + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. + // + } else { + // copy 1 element if necessary to align 'to' on an 4 bytes + __ andcc(to, 3, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->lduh(from, 0, O3); + __ inc(from, 2); + __ inc(to, 2); + __ dec(count); + __ sth(O3, to, -2); + __ BIND(L_skip_alignment); + + // copy 2 elements to align 'to' on an 8 byte boundary + __ andcc(to, 7, G0); + __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); + __ delayed()->lduh(from, 0, O3); + __ dec(count, 2); + __ lduh(from, 2, O4); + __ inc(from, 4); + __ inc(to, 4); + __ sth(O3, to, -4); + __ sth(O4, to, -2); + __ BIND(L_skip_alignment2); + } + if (!aligned) { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise fall through to the next + // code for aligned copy. + // The compare above (count >= 11) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); + } + + // Both array are 8 bytes aligned, copy 16 bytes at a time + __ and3(count, 3, G4); // Save + __ srl(count, 2, count); + generate_disjoint_long_copy_core(aligned); + __ mov(G4, count); // restore + + // copy 1 element at a time + __ BIND(L_copy_2_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_2_bytes_loop); + __ lduh(from, offset, O3); + __ deccc(count); + __ sth(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); + __ delayed()->inc(offset, 2); } - if (!aligned) { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise fall through to the next - // code for aligned copy. - // The compare above (count >= 11) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); - } - - // Both array are 8 bytes aligned, copy 16 bytes at a time - __ and3(count, 3, G4); // Save - __ srl(count, 2, count); - generate_disjoint_long_copy_core(aligned); - __ mov(G4, count); // restore - - // copy 1 element at a time - __ BIND(L_copy_2_bytes); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ align(OptoLoopAlignment); - __ BIND(L_copy_2_bytes_loop); - __ lduh(from, offset, O3); - __ deccc(count); - __ sth(O3, to, offset); - __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); - __ delayed()->inc(offset, 2); __ BIND(L_exit); // O3, O4 are used as temp registers @@ -1639,79 +1664,83 @@ array_overlap_test(nooverlap_target, 1); - __ sllx(count, LogBytesPerShort, byte_count); - __ add(to, byte_count, end_to); // offset after last copied element - - // for short arrays, just do single element copy - __ cmp(count, 11); // 8 + 3 (22 bytes) - __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); - __ delayed()->add(from, byte_count, end_from); - { - // Align end of arrays since they could be not aligned even - // when arrays itself are aligned. - - // copy 1 element if necessary to align 'end_to' on an 4 bytes - __ andcc(end_to, 3, G0); - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->lduh(end_from, -2, O3); - __ dec(end_from, 2); - __ dec(end_to, 2); - __ dec(count); - __ sth(O3, end_to, 0); - __ BIND(L_skip_alignment); - - // copy 2 elements to align 'end_to' on an 8 byte boundary - __ andcc(end_to, 7, G0); - __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); - __ delayed()->lduh(end_from, -2, O3); - __ dec(count, 2); - __ lduh(end_from, -4, O4); - __ dec(end_from, 4); - __ dec(end_to, 4); - __ sth(O3, end_to, 2); - __ sth(O4, end_to, 0); - __ BIND(L_skip_alignment2); + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + __ sllx(count, LogBytesPerShort, byte_count); + __ add(to, byte_count, end_to); // offset after last copied element + + // for short arrays, just do single element copy + __ cmp(count, 11); // 8 + 3 (22 bytes) + __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); + __ delayed()->add(from, byte_count, end_from); + + { + // Align end of arrays since they could be not aligned even + // when arrays itself are aligned. + + // copy 1 element if necessary to align 'end_to' on an 4 bytes + __ andcc(end_to, 3, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->lduh(end_from, -2, O3); + __ dec(end_from, 2); + __ dec(end_to, 2); + __ dec(count); + __ sth(O3, end_to, 0); + __ BIND(L_skip_alignment); + + // copy 2 elements to align 'end_to' on an 8 byte boundary + __ andcc(end_to, 7, G0); + __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); + __ delayed()->lduh(end_from, -2, O3); + __ dec(count, 2); + __ lduh(end_from, -4, O4); + __ dec(end_from, 4); + __ dec(end_to, 4); + __ sth(O3, end_to, 2); + __ sth(O4, end_to, 0); + __ BIND(L_skip_alignment2); + } + if (aligned) { + // Both arrays are aligned to 8-bytes in 64-bits VM. + // The 'count' is decremented in copy_16_bytes_backward_with_shift() + // in unaligned case. + __ dec(count, 8); + } else { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise jump to the next + // code for aligned copy (and substracting 8 from 'count' before jump). + // The compare above (count >= 11) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, + L_aligned_copy, L_copy_2_bytes); + } + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 8); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 8); + + // copy 1 element (2 bytes) at a time + __ BIND(L_copy_2_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ BIND(L_copy_2_bytes_loop); + __ dec(end_from, 2); + __ dec(end_to, 2); + __ lduh(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); + __ delayed()->sth(O4, end_to, 0); } - if (aligned) { - // Both arrays are aligned to 8-bytes in 64-bits VM. - // The 'count' is decremented in copy_16_bytes_backward_with_shift() - // in unaligned case. - __ dec(count, 8); - } else { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise jump to the next - // code for aligned copy (and substracting 8 from 'count' before jump). - // The compare above (count >= 11) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, - L_aligned_copy, L_copy_2_bytes); - } - // copy 4 elements (16 bytes) at a time - __ align(OptoLoopAlignment); - __ BIND(L_aligned_copy); - __ dec(end_from, 16); - __ ldx(end_from, 8, O3); - __ ldx(end_from, 0, O4); - __ dec(end_to, 16); - __ deccc(count, 8); - __ stx(O3, end_to, 8); - __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); - __ delayed()->stx(O4, end_to, 0); - __ inc(count, 8); - - // copy 1 element (2 bytes) at a time - __ BIND(L_copy_2_bytes); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ BIND(L_copy_2_bytes_loop); - __ dec(end_from, 2); - __ dec(end_to, 2); - __ lduh(end_from, 0, O4); - __ deccc(count); - __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); - __ delayed()->sth(O4, end_to, 0); - __ BIND(L_exit); // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); @@ -1870,9 +1899,11 @@ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) BLOCK_COMMENT("Entry:"); } - - generate_disjoint_int_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_int_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); __ retl(); @@ -2005,9 +2036,11 @@ } array_overlap_test(nooverlap_target, 2); - - generate_conjoint_int_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_int_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); __ retl(); @@ -2156,8 +2189,11 @@ BLOCK_COMMENT("Entry:"); } - generate_disjoint_long_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, true, false); + generate_disjoint_long_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); __ retl(); @@ -2232,9 +2268,11 @@ } array_overlap_test(nooverlap_target, 3); - - generate_conjoint_long_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, true, false); + generate_conjoint_long_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); __ retl(); @@ -2929,6 +2967,9 @@ address entry_jlong_arraycopy; address entry_checkcast_arraycopy; + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + //*** jbyte // Always need aligned and unaligned versions StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, @@ -5821,6 +5862,10 @@ }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/x86/assembler_x86.cpp --- a/src/hotspot/cpu/x86/assembler_x86.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/x86/assembler_x86.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -789,6 +789,8 @@ case 0x59: // mulpd case 0x6E: // movd case 0x7E: // movd + case 0x6F: // movdq + case 0x7F: // movdq case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush case 0xFE: // paddd debug_only(has_disp32 = true); @@ -4274,6 +4276,7 @@ emit_operand(dst, src); emit_int8(mode & 0xFF); } + void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, ""); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/x86/stubGenerator_x86_32.cpp --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -889,91 +889,98 @@ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - - __ subptr(to, from); // to --> to_from - __ cmpl(count, 2< to_from + __ cmpl(count, 2<arraycopy_epilogue(_masm, decorators, t, from, to, count); @@ -1079,104 +1086,112 @@ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - // copy from high to low - __ cmpl(count, 2<arraycopy_epilogue(_masm, decorators, t, from, to, count); @@ -1212,23 +1227,30 @@ *entry = __ pc(); // Entry point from conjoint arraycopy stub. BLOCK_COMMENT("Entry:"); - __ subptr(to, from); // to --> to_from - if (VM_Version::supports_mmx()) { - if (UseXMMForArrayCopy) { - xmm_copy_forward(from, to_from, count); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, true, true); + __ subptr(to, from); // to --> to_from + if (VM_Version::supports_mmx()) { + if (UseXMMForArrayCopy) { + xmm_copy_forward(from, to_from, count); + } else { + mmx_copy_forward(from, to_from, count); + } } else { - mmx_copy_forward(from, to_from, count); + __ jmpb(L_copy_8_bytes); + __ align(OptoLoopAlignment); + __ BIND(L_copy_8_bytes_loop); + __ fild_d(Address(from, 0)); + __ fistp_d(Address(from, to_from, Address::times_1)); + __ addptr(from, 8); + __ BIND(L_copy_8_bytes); + __ decrement(count); + __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); } - } else { - __ jmpb(L_copy_8_bytes); - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - __ fild_d(Address(from, 0)); - __ fistp_d(Address(from, to_from, Address::times_1)); - __ addptr(from, 8); - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); + } + if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { + __ emms(); } inc_copy_counter_np(T_LONG); __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -1267,26 +1289,31 @@ __ movptr(from, Address(rsp, 8)); // from __ jump_cc(Assembler::aboveEqual, nooverlap); - __ jmpb(L_copy_8_bytes); - - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - if (VM_Version::supports_mmx()) { - if (UseXMMForArrayCopy) { - __ movq(xmm0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), xmm0); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, true, true); + + __ jmpb(L_copy_8_bytes); + + __ align(OptoLoopAlignment); + __ BIND(L_copy_8_bytes_loop); + if (VM_Version::supports_mmx()) { + if (UseXMMForArrayCopy) { + __ movq(xmm0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), xmm0); + } else { + __ movq(mmx0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), mmx0); + } } else { - __ movq(mmx0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), mmx0); + __ fild_d(Address(from, count, Address::times_8)); + __ fistp_d(Address(to, count, Address::times_8)); } - } else { - __ fild_d(Address(from, count, Address::times_8)); - __ fistp_d(Address(to, count, Address::times_8)); + __ BIND(L_copy_8_bytes); + __ decrement(count); + __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); + } - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { __ emms(); } @@ -3945,7 +3972,10 @@ } }; // end class declaration - +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/cpu/x86/stubGenerator_x86_64.cpp --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1433,7 +1433,6 @@ __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords } - // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored @@ -1482,51 +1481,55 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(byte_count, count); - __ shrptr(count, 3); // count => qword_count - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); // make the count negative - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(byte_count, 4); - __ jccb(Assembler::zero, L_copy_2_bytes); - __ movl(rax, Address(end_from, 8)); - __ movl(Address(end_to, 8), rax); - - __ addptr(end_from, 4); - __ addptr(end_to, 4); - - // Check for and copy trailing word - __ BIND(L_copy_2_bytes); - __ testl(byte_count, 2); - __ jccb(Assembler::zero, L_copy_byte); - __ movw(rax, Address(end_from, 8)); - __ movw(Address(end_to, 8), rax); - - __ addptr(end_from, 2); - __ addptr(end_to, 2); - - // Check for and copy trailing byte - __ BIND(L_copy_byte); - __ testl(byte_count, 1); - __ jccb(Assembler::zero, L_exit); - __ movb(rax, Address(end_from, 8)); - __ movb(Address(end_to, 8), rax); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(byte_count, count); + __ shrptr(count, 3); // count => qword_count + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); // make the count negative + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(byte_count, 4); + __ jccb(Assembler::zero, L_copy_2_bytes); + __ movl(rax, Address(end_from, 8)); + __ movl(Address(end_to, 8), rax); + + __ addptr(end_from, 4); + __ addptr(end_to, 4); + + // Check for and copy trailing word + __ BIND(L_copy_2_bytes); + __ testl(byte_count, 2); + __ jccb(Assembler::zero, L_copy_byte); + __ movw(rax, Address(end_from, 8)); + __ movw(Address(end_to, 8), rax); + + __ addptr(end_from, 2); + __ addptr(end_to, 2); + + // Check for and copy trailing byte + __ BIND(L_copy_byte); + __ testl(byte_count, 1); + __ jccb(Assembler::zero, L_exit); + __ movb(rax, Address(end_from, 8)); + __ movb(Address(end_to, 8), rax); + } __ BIND(L_exit); + address ucme_exit_pc = __ pc(); restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1534,10 +1537,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - __ jmp(L_copy_4_bytes); - + { + UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + __ jmp(L_copy_4_bytes); + } return start; } @@ -1582,41 +1587,44 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(byte_count, count); - __ shrptr(count, 3); // count => qword_count - - // Copy from high to low addresses. - - // Check for and copy trailing byte - __ testl(byte_count, 1); - __ jcc(Assembler::zero, L_copy_2_bytes); - __ movb(rax, Address(from, byte_count, Address::times_1, -1)); - __ movb(Address(to, byte_count, Address::times_1, -1), rax); - __ decrement(byte_count); // Adjust for possible trailing word - - // Check for and copy trailing word - __ BIND(L_copy_2_bytes); - __ testl(byte_count, 2); - __ jcc(Assembler::zero, L_copy_4_bytes); - __ movw(rax, Address(from, byte_count, Address::times_1, -2)); - __ movw(Address(to, byte_count, Address::times_1, -2), rax); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(byte_count, 4); - __ jcc(Assembler::zero, L_copy_bytes); - __ movl(rax, Address(from, qword_count, Address::times_8)); - __ movl(Address(to, qword_count, Address::times_8), rax); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(byte_count, count); + __ shrptr(count, 3); // count => qword_count + + // Copy from high to low addresses. + + // Check for and copy trailing byte + __ testl(byte_count, 1); + __ jcc(Assembler::zero, L_copy_2_bytes); + __ movb(rax, Address(from, byte_count, Address::times_1, -1)); + __ movb(Address(to, byte_count, Address::times_1, -1), rax); + __ decrement(byte_count); // Adjust for possible trailing word + + // Check for and copy trailing word + __ BIND(L_copy_2_bytes); + __ testl(byte_count, 2); + __ jcc(Assembler::zero, L_copy_4_bytes); + __ movw(rax, Address(from, byte_count, Address::times_1, -2)); + __ movw(Address(to, byte_count, Address::times_1, -2), rax); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(byte_count, 4); + __ jcc(Assembler::zero, L_copy_bytes); + __ movl(rax, Address(from, qword_count, Address::times_8)); + __ movl(Address(to, qword_count, Address::times_8), rax); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1624,9 +1632,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1684,44 +1695,48 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(word_count, count); - __ shrptr(count, 2); // count => qword_count - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - - // Original 'dest' is trashed, so we can't use it as a - // base register for a possible trailing word copy - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(word_count, 2); - __ jccb(Assembler::zero, L_copy_2_bytes); - __ movl(rax, Address(end_from, 8)); - __ movl(Address(end_to, 8), rax); - - __ addptr(end_from, 4); - __ addptr(end_to, 4); - - // Check for and copy trailing word - __ BIND(L_copy_2_bytes); - __ testl(word_count, 1); - __ jccb(Assembler::zero, L_exit); - __ movw(rax, Address(end_from, 8)); - __ movw(Address(end_to, 8), rax); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(word_count, count); + __ shrptr(count, 2); // count => qword_count + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + + // Original 'dest' is trashed, so we can't use it as a + // base register for a possible trailing word copy + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(word_count, 2); + __ jccb(Assembler::zero, L_copy_2_bytes); + __ movl(rax, Address(end_from, 8)); + __ movl(Address(end_to, 8), rax); + + __ addptr(end_from, 4); + __ addptr(end_to, 4); + + // Check for and copy trailing word + __ BIND(L_copy_2_bytes); + __ testl(word_count, 1); + __ jccb(Assembler::zero, L_exit); + __ movw(rax, Address(end_from, 8)); + __ movw(Address(end_to, 8), rax); + } __ BIND(L_exit); + address ucme_exit_pc = __ pc(); restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1729,9 +1744,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - __ jmp(L_copy_4_bytes); + { + UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + __ jmp(L_copy_4_bytes); + } return start; } @@ -1798,33 +1816,36 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(word_count, count); - __ shrptr(count, 2); // count => qword_count - - // Copy from high to low addresses. Use 'to' as scratch. - - // Check for and copy trailing word - __ testl(word_count, 1); - __ jccb(Assembler::zero, L_copy_4_bytes); - __ movw(rax, Address(from, word_count, Address::times_2, -2)); - __ movw(Address(to, word_count, Address::times_2, -2), rax); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(word_count, 2); - __ jcc(Assembler::zero, L_copy_bytes); - __ movl(rax, Address(from, qword_count, Address::times_8)); - __ movl(Address(to, qword_count, Address::times_8), rax); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(word_count, count); + __ shrptr(count, 2); // count => qword_count + + // Copy from high to low addresses. Use 'to' as scratch. + + // Check for and copy trailing word + __ testl(word_count, 1); + __ jccb(Assembler::zero, L_copy_4_bytes); + __ movw(rax, Address(from, word_count, Address::times_2, -2)); + __ movw(Address(to, word_count, Address::times_2, -2), rax); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(word_count, 2); + __ jcc(Assembler::zero, L_copy_bytes); + __ movl(rax, Address(from, qword_count, Address::times_8)); + __ movl(Address(to, qword_count, Address::times_8), rax); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1832,9 +1853,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1905,31 +1929,35 @@ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, type, from, to, count); - // 'from', 'to' and 'count' are now valid - __ movptr(dword_count, count); - __ shrptr(count, 1); // count => qword_count - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 - __ jccb(Assembler::zero, L_exit); - __ movl(rax, Address(end_from, 8)); - __ movl(Address(end_to, 8), rax); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(dword_count, count); + __ shrptr(count, 1); // count => qword_count + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 + __ jccb(Assembler::zero, L_exit); + __ movl(rax, Address(end_from, 8)); + __ movl(Address(end_to, 8), rax); + } __ BIND(L_exit); + address ucme_exit_pc = __ pc(); bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); restore_arg_regs_using_thread(); inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free @@ -1938,9 +1966,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - __ jmp(L_copy_4_bytes); + { + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + __ jmp(L_copy_4_bytes); + } return start; } @@ -2001,26 +2032,29 @@ bs->arraycopy_prologue(_masm, decorators, type, from, to, count); assert_clean_int(count, rax); // Make sure 'count' is clean int. - // 'from', 'to' and 'count' are now valid - __ movptr(dword_count, count); - __ shrptr(count, 1); // count => qword_count - - // Copy from high to low addresses. Use 'to' as scratch. - - // Check for and copy trailing dword - __ testl(dword_count, 1); - __ jcc(Assembler::zero, L_copy_bytes); - __ movl(rax, Address(from, dword_count, Address::times_4, -4)); - __ movl(Address(to, dword_count, Address::times_4, -4), rax); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(dword_count, count); + __ shrptr(count, 1); // count => qword_count + + // Copy from high to low addresses. Use 'to' as scratch. + + // Check for and copy trailing dword + __ testl(dword_count, 1); + __ jcc(Assembler::zero, L_copy_bytes); + __ movl(rax, Address(from, dword_count, Address::times_4, -4)); + __ movl(Address(to, dword_count, Address::times_4, -4), rax); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } if (is_oop) { __ jmp(L_exit); } @@ -2031,8 +2065,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } __ BIND(L_exit); bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); @@ -2102,20 +2140,23 @@ BasicType type = is_oop ? T_OBJECT : T_LONG; BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count); - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } if (is_oop) { __ jmp(L_exit); } else { @@ -2127,8 +2168,12 @@ __ ret(0); } - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } __ BIND(L_exit); bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count); @@ -2195,16 +2240,19 @@ BasicType type = is_oop ? T_OBJECT : T_LONG; BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count); - - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } if (is_oop) { __ jmp(L_exit); } else { @@ -2215,10 +2263,13 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); } - - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } __ BIND(L_exit); bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count); restore_arg_regs_using_thread(); @@ -6036,6 +6087,10 @@ } }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 16 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os/windows/os_windows.cpp --- a/src/hotspot/os/windows/os_windows.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os/windows/os_windows.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -2581,10 +2581,18 @@ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; } - if ((thread->thread_state() == _thread_in_vm && + + bool is_unsafe_arraycopy = (thread->thread_state() == _thread_in_native || in_java) && UnsafeCopyMemory::contains_pc(pc); + if (((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native || + is_unsafe_arraycopy) && thread->doing_unsafe_access()) || (nm != NULL && nm->has_unsafe_access())) { - return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, (address)Assembler::locate_next_instruction(pc))); + address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } + return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, next_pc)); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp --- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -441,8 +441,12 @@ // underlying file has been truncated. Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = pc + 4; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); os::Aix::ucontext_set_pc(uc, next_pc); return 1; @@ -461,9 +465,13 @@ stub = pc + 4; // continue with next instruction. goto run_stub; } - else if (thread->thread_state() == _thread_in_vm && + else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { address next_pc = pc + 4; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); os::Aix::ucontext_set_pc(uc, next_pc); return 1; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp --- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -589,8 +589,12 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -659,10 +663,14 @@ // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ thread->doing_unsafe_access()) { address next_pc = Assembler::locate_next_instruction(pc); + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp --- a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -193,7 +193,8 @@ /*if (thread->thread_state() == _thread_in_Java) { ShouldNotCallThis(); } - else*/ if (thread->thread_state() == _thread_in_vm && + else*/ if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { ShouldNotCallThis(); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp --- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -419,8 +419,12 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = pc + NativeCall::instruction_size; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -439,10 +443,14 @@ // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ thread->doing_unsafe_access()) { address next_pc = pc + NativeCall::instruction_size; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp --- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -384,7 +384,7 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + if ((nm != NULL && nm->has_unsafe_access()) || (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc))) { unsafe_access = true; } } else if (sig == SIGSEGV && @@ -398,7 +398,8 @@ // Zombie stub = SharedRuntime::get_handle_wrong_method_stub(); } - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { unsafe_access = true; } @@ -418,6 +419,9 @@ // any other suitable exception reason, // so assume it is an unsafe access. address next_pc = pc + Assembler::InstructionSize; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } #ifdef __thumb__ if (uc->uc_mcontext.arm_cpsr & PSR_T_BIT) { next_pc = (address)((intptr_t)next_pc | 0x1); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp --- a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -469,8 +469,12 @@ // underlying file has been truncated. Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = pc + 4; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); os::Linux::ucontext_set_pc(uc, next_pc); return true; @@ -485,11 +489,15 @@ // flushing of icache is not necessary. stub = pc + 4; // continue with next instruction. } - else if (thread->thread_state() == _thread_in_vm && + else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { address next_pc = pc + 4; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); - os::Linux::ucontext_set_pc(uc, pc + 4); + os::Linux::ucontext_set_pc(uc, next_pc); return true; } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp --- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -467,7 +467,8 @@ // when the vector facility is installed, but operating system support is missing. VM_Version::reset_has_VectorFacility(); stub = pc; // Continue with next instruction. - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { // We don't really need a stub here! Just set the pending exeption and // continue at the next instruction after the faulting read. Returning diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp --- a/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -385,7 +385,11 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + if (is_unsafe_arraycopy) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } *stub = SharedRuntime::handle_unsafe_access(thread, npc); return true; } @@ -550,8 +554,12 @@ } if (sig == SIGBUS && - thread->thread_state() == _thread_in_vm && + (thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && thread->doing_unsafe_access()) { + if (UnsafeCopyMemory::contains_pc(pc)) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, npc); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp --- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -435,8 +435,12 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -483,10 +487,14 @@ // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } - } else if (thread->thread_state() == _thread_in_vm && - sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ - thread->doing_unsafe_access()) { + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && + (sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access())) { address next_pc = Assembler::locate_next_instruction(pc); + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp --- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -207,7 +207,8 @@ /*if (thread->thread_state() == _thread_in_Java) { ShouldNotCallThis(); } - else*/ if (thread->thread_state() == _thread_in_vm && + else*/ if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { ShouldNotCallThis(); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp --- a/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -436,8 +436,12 @@ } - if (thread->thread_state() == _thread_in_vm) { + if (thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) { if (sig == SIGBUS && thread->doing_unsafe_access()) { + if (UnsafeCopyMemory::contains_pc(pc)) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, npc); } } @@ -476,7 +480,11 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + if (is_unsafe_arraycopy) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, npc); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp --- a/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -517,9 +517,13 @@ stub = VM_Version::cpuinfo_cont_addr(); } - if (thread->thread_state() == _thread_in_vm) { + if (thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) { if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) { address next_pc = Assembler::locate_next_instruction(pc); + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -536,8 +540,12 @@ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); if (cb != NULL) { CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy)) { address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/aot/aotCodeHeap.cpp --- a/src/hotspot/share/aot/aotCodeHeap.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/aot/aotCodeHeap.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,7 +38,6 @@ #include "memory/universe.hpp" #include "oops/compressedOops.hpp" #include "oops/method.inline.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/handles.inline.hpp" #include "runtime/os.hpp" #include "runtime/safepointVerifiers.hpp" @@ -734,7 +733,8 @@ } } if (marked > 0) { - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/aot/aotCompiledMethod.cpp --- a/src/hotspot/share/aot/aotCompiledMethod.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/aot/aotCompiledMethod.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -165,7 +165,7 @@ { // Enter critical section. Does not block for safepoint. - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); if (*_state_adr == new_state) { // another thread already performed this transition so nothing @@ -188,10 +188,12 @@ #endif // Remove AOTCompiledMethod from method. - if (method() != NULL) { - method()->unlink_code(this); + if (method() != NULL && (method()->code() == this || + method()->from_compiled_entry() == verified_entry_point())) { + HandleMark hm; + method()->clear_code(false /* already owns Patching_lock */); } - } // leave critical region under CompiledMethod_lock + } // leave critical region under Patching_lock if (TraceCreateZombies) { @@ -214,7 +216,7 @@ { // Enter critical section. Does not block for safepoint. - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); if (*_state_adr == in_use) { // another thread already performed this transition so nothing @@ -228,7 +230,7 @@ // Log the transition once log_state_change(); - } // leave critical region under CompiledMethod_lock + } // leave critical region under Patching_lock if (TraceCreateZombies) { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/aot/aotCompiledMethod.hpp --- a/src/hotspot/share/aot/aotCompiledMethod.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/aot/aotCompiledMethod.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -176,7 +176,6 @@ state() == not_used; } virtual bool is_alive() const { return _is_alive(); } virtual bool is_in_use() const { return state() == in_use; } - virtual bool is_not_installed() const { return state() == not_installed; } virtual bool is_unloading() { return false; } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/ci/ciMethodData.cpp --- a/src/hotspot/share/ci/ciMethodData.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/ci/ciMethodData.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -81,13 +81,13 @@ // Check for entries that reference an unloaded method class PrepareExtraDataClosure : public CleanExtraDataClosure { MethodData* _mdo; - uint64_t _safepoint_counter; + SafepointStateTracker _safepoint_tracker; GrowableArray _uncached_methods; public: PrepareExtraDataClosure(MethodData* mdo) : _mdo(mdo), - _safepoint_counter(SafepointSynchronize::safepoint_counter()), + _safepoint_tracker(SafepointSynchronize::safepoint_state_tracker()), _uncached_methods() { } @@ -103,7 +103,7 @@ } bool has_safepointed() { - return SafepointSynchronize::safepoint_counter() != _safepoint_counter; + return _safepoint_tracker.safepoint_state_changed(); } bool finish() { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/classfile/stringTable.cpp --- a/src/hotspot/share/classfile/stringTable.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/classfile/stringTable.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -79,8 +79,7 @@ // -------------------------------------------------------------------------- -typedef ConcurrentHashTable, - StringTableConfig, mtSymbol> StringTableHash; +typedef ConcurrentHashTable StringTableHash; static StringTableHash* _local_table = NULL; volatile bool StringTable::_has_work = false; @@ -101,11 +100,12 @@ java_lang_String::hash_code(s, len); } -class StringTableConfig : public StringTableHash::BaseConfig { +class StringTableConfig : public StackObj { private: public: - static uintx get_hash(WeakHandle const& value, - bool* is_dead) { + typedef WeakHandle Value; + + static uintx get_hash(Value const& value, bool* is_dead) { EXCEPTION_MARK; oop val_oop = value.peek(); if (val_oop == NULL) { @@ -124,15 +124,13 @@ return 0; } // We use default allocation/deallocation but counted - static void* allocate_node(size_t size, - WeakHandle const& value) { + static void* allocate_node(size_t size, Value const& value) { StringTable::item_added(); - return StringTableHash::BaseConfig::allocate_node(size, value); + return AllocateHeap(size, mtSymbol); } - static void free_node(void* memory, - WeakHandle const& value) { + static void free_node(void* memory, Value const& value) { value.release(); - StringTableHash::BaseConfig::free_node(memory, value); + FreeHeap(memory); StringTable::item_removed(); } }; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/classfile/symbolTable.cpp --- a/src/hotspot/share/classfile/symbolTable.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/classfile/symbolTable.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -77,8 +77,7 @@ // -------------------------------------------------------------------------- -typedef ConcurrentHashTable SymbolTableHash; +typedef ConcurrentHashTable SymbolTableHash; static SymbolTableHash* _local_table = NULL; volatile bool SymbolTable::_has_work = 0; @@ -121,10 +120,12 @@ } #endif -class SymbolTableConfig : public SymbolTableHash::BaseConfig { +class SymbolTableConfig : public AllStatic { private: public: - static uintx get_hash(Symbol* const& value, bool* is_dead) { + typedef Symbol* Value; // value of the Node in the hashtable + + static uintx get_hash(Value const& value, bool* is_dead) { *is_dead = (value->refcount() == 0); if (*is_dead) { return 0; @@ -133,11 +134,11 @@ } } // We use default allocation/deallocation but counted - static void* allocate_node(size_t size, Symbol* const& value) { + static void* allocate_node(size_t size, Value const& value) { SymbolTable::item_added(); - return SymbolTableHash::BaseConfig::allocate_node(size, value); + return AllocateHeap(size, mtSymbol); } - static void free_node(void* memory, Symbol* const& value) { + static void free_node(void* memory, Value const& value) { // We get here because #1 some threads lost a race to insert a newly created Symbol // or #2 we're cleaning up unused symbol. // If #1, then the symbol can be either permanent (refcount==PERM_REFCOUNT), @@ -150,7 +151,7 @@ assert(value->refcount() == 0, "expected dead symbol"); } SymbolTable::delete_symbol(value); - SymbolTableHash::BaseConfig::free_node(memory, value); + FreeHeap(memory); SymbolTable::item_removed(); } }; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/code/codeCache.cpp --- a/src/hotspot/share/code/codeCache.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/code/codeCache.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1142,25 +1142,28 @@ // At least one nmethod has been marked for deoptimization - Deoptimization::deoptimize_all_marked(); + // All this already happens inside a VM_Operation, so we'll do all the work here. + // Stuff copied from VM_Deoptimize and modified slightly. + + // We do not want any GCs to happen while we are in the middle of this VM operation + ResourceMark rm; + DeoptimizationMarker dm; + + // Deoptimize all activations depending on marked nmethods + Deoptimization::deoptimize_dependents(); + + // Make the dependent methods not entrant + make_marked_nmethods_not_entrant(); } #endif // INCLUDE_JVMTI -// Mark methods for deopt (if safe or possible). +// Deoptimize all methods void CodeCache::mark_all_nmethods_for_deoptimization() { MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); CompiledMethodIterator iter(CompiledMethodIterator::only_alive_and_not_unloading); while(iter.next()) { CompiledMethod* nm = iter.method(); - if (!nm->method()->is_method_handle_intrinsic() && - !nm->is_not_installed() && - nm->is_in_use() && - !nm->is_native_method()) { - // Intrinsics and native methods are never deopted. A method that is - // not installed yet or is not in use is not safe to deopt; the - // is_in_use() check covers the not_entrant and not zombie cases. - // Note: A not_entrant method can become a zombie at anytime if it was - // made not_entrant before the previous safepoint/handshake. + if (!nm->method()->is_method_handle_intrinsic()) { nm->mark_for_deoptimization(); } } @@ -1188,12 +1191,7 @@ CompiledMethodIterator iter(CompiledMethodIterator::only_alive_and_not_unloading); while(iter.next()) { CompiledMethod* nm = iter.method(); - if (nm->is_marked_for_deoptimization() && nm->is_in_use()) { - // only_alive_and_not_unloading() can return not_entrant nmethods. - // A not_entrant method can become a zombie at anytime if it was - // made not_entrant before the previous safepoint/handshake. The - // is_in_use() check covers the not_entrant and not zombie cases - // that have become true after the method was marked for deopt. + if (nm->is_marked_for_deoptimization() && !nm->is_not_entrant()) { nm->make_not_entrant(); } } @@ -1205,12 +1203,17 @@ if (number_of_nmethods_with_dependencies() == 0) return; + // CodeCache can only be updated by a thread_in_VM and they will all be + // stopped during the safepoint so CodeCache will be safe to update without + // holding the CodeCache_lock. + KlassDepChange changes(dependee); // Compute the dependent nmethods if (mark_for_deoptimization(changes) > 0) { // At least one nmethod has been marked for deoptimization - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } } @@ -1219,9 +1222,26 @@ // --- Compile_lock is not held. However we are at a safepoint. assert_locked_or_safepoint(Compile_lock); + // CodeCache can only be updated by a thread_in_VM and they will all be + // stopped dring the safepoint so CodeCache will be safe to update without + // holding the CodeCache_lock. + // Compute the dependent nmethods if (mark_for_deoptimization(m_h()) > 0) { - Deoptimization::deoptimize_all_marked(); + // At least one nmethod has been marked for deoptimization + + // All this already happens inside a VM_Operation, so we'll do all the work here. + // Stuff copied from VM_Deoptimize and modified slightly. + + // We do not want any GCs to happen while we are in the middle of this VM operation + ResourceMark rm; + DeoptimizationMarker dm; + + // Deoptimize all activations depending on marked nmethods + Deoptimization::deoptimize_dependents(); + + // Make the dependent methods not entrant + make_marked_nmethods_not_entrant(); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/code/compiledMethod.hpp --- a/src/hotspot/share/code/compiledMethod.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/code/compiledMethod.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -214,7 +214,6 @@ }; virtual bool is_in_use() const = 0; - virtual bool is_not_installed() const = 0; virtual int comp_level() const = 0; virtual int compile_id() const = 0; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/code/dependencyContext.hpp --- a/src/hotspot/share/code/dependencyContext.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/code/dependencyContext.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -99,15 +99,15 @@ // Safepoints are forbidden during DC lifetime. GC can invalidate // _dependency_context_addr if it relocates the holder // (e.g. CallSiteContext Java object). - uint64_t _safepoint_counter; + SafepointStateTracker _safepoint_tracker; DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr) : _dependency_context_addr(bucket_addr), _last_cleanup_addr(last_cleanup_addr), - _safepoint_counter(SafepointSynchronize::safepoint_counter()) {} + _safepoint_tracker(SafepointSynchronize::safepoint_state_tracker()) {} ~DependencyContext() { - assert(SafepointSynchronize::is_same_safepoint(_safepoint_counter), "must be the same safepoint"); + assert(!_safepoint_tracker.safepoint_state_changed(), "must be the same safepoint"); } #else DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr) diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/code/nmethod.cpp --- a/src/hotspot/share/code/nmethod.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/code/nmethod.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -50,7 +50,6 @@ #include "oops/oop.inline.hpp" #include "prims/jvmtiImpl.hpp" #include "runtime/atomic.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/flags/flagSetting.hpp" #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" @@ -1178,7 +1177,11 @@ // have the Method* live here, in case we unload the nmethod because // it is pointing to some oop (other than the Method*) being unloaded. if (_method != NULL) { - _method->unlink_code(this); + // OSR methods point to the Method*, but the Method* does not + // point back! + if (_method->code() == this) { + _method->clear_code(); // Break a cycle + } } // Make the class unloaded - i.e., change state and notify sweeper @@ -1260,9 +1263,16 @@ } } -void nmethod::unlink_from_method() { - if (method() != NULL) { - method()->unlink_code(this); +void nmethod::unlink_from_method(bool acquire_lock) { + // We need to check if both the _code and _from_compiled_code_entry_point + // refer to this nmethod because there is a race in setting these two fields + // in Method* as seen in bugid 4947125. + // If the vep() points to the zombie nmethod, the memory for the nmethod + // could be flushed and the compiler and vtable stubs could still call + // through it. + if (method() != NULL && (method()->code() == this || + method()->from_compiled_entry() == verified_entry_point())) { + method()->clear_code(acquire_lock); } } @@ -1289,24 +1299,24 @@ // during patching, depending on the nmethod state we must notify the GC that // code has been unloaded, unregistering it. We cannot do this right while - // holding the CompiledMethod_lock because we need to use the CodeCache_lock. This + // holding the Patching_lock because we need to use the CodeCache_lock. This // would be prone to deadlocks. // This flag is used to remember whether we need to later lock and unregister. bool nmethod_needs_unregister = false; - // invalidate osr nmethod before acquiring the patching lock since - // they both acquire leaf locks and we don't want a deadlock. - // This logic is equivalent to the logic below for patching the - // verified entry point of regular methods. We check that the - // nmethod is in use to ensure that it is invalidated only once. - if (is_osr_method() && is_in_use()) { - // this effectively makes the osr nmethod not entrant - invalidate_osr_method(); - } - { + // invalidate osr nmethod before acquiring the patching lock since + // they both acquire leaf locks and we don't want a deadlock. + // This logic is equivalent to the logic below for patching the + // verified entry point of regular methods. We check that the + // nmethod is in use to ensure that it is invalidated only once. + if (is_osr_method() && is_in_use()) { + // this effectively makes the osr nmethod not entrant + invalidate_osr_method(); + } + // Enter critical section. Does not block for safepoint. - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); if (_state == state) { // another thread already performed this transition so nothing @@ -1350,9 +1360,8 @@ log_state_change(); // Remove nmethod from method. - unlink_from_method(); - - } // leave critical region under CompiledMethod_lock + unlink_from_method(false /* already owns Patching_lock */); + } // leave critical region under Patching_lock #if INCLUDE_JVMCI // Invalidate can't occur while holding the Patching lock diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/code/nmethod.hpp --- a/src/hotspot/share/code/nmethod.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/code/nmethod.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -119,7 +119,7 @@ // used by jvmti to track if an unload event has been posted for this nmethod. bool _unload_reported; - // Protected by CompiledMethod_lock + // Protected by Patching_lock volatile signed char _state; // {not_installed, in_use, not_entrant, zombie, unloaded} #ifdef ASSERT @@ -387,7 +387,7 @@ int comp_level() const { return _comp_level; } - void unlink_from_method(); + void unlink_from_method(bool acquire_lock); // Support for oops in scopes and relocs: // Note: index 0 is reserved for null. diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp --- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -4250,7 +4250,6 @@ if (should_unload_classes()) { heap->prune_scavengable_nmethods(); } - JvmtiExport::gc_epilogue(); // If we encountered any (marking stack / work queue) overflow // events during the current CMS cycle, take appropriate diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1Analytics.cpp --- a/src/hotspot/share/gc/g1/g1Analytics.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,7 +38,7 @@ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; -static double cost_per_card_ms_defaults[] = { +static double cost_per_log_buffer_entry_ms_defaults[] = { 0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015 }; @@ -47,7 +47,7 @@ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; -static double cost_per_entry_ms_defaults[] = { +static double young_only_cost_per_remset_card_ms_defaults[] = { 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 }; @@ -77,12 +77,12 @@ _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _prev_collection_pause_end_ms(0.0), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_log_buffer_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -101,10 +101,10 @@ int index = MIN2(ParallelGCThreads - 1, 7u); _rs_length_diff_seq->add(rs_length_diff_defaults[index]); - _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); + _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms_defaults[index]); _cost_scan_hcc_seq->add(0.0); _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); - _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]); + _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]); _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]); @@ -158,19 +158,19 @@ (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms; } -void G1Analytics::report_cost_per_card_ms(double cost_per_card_ms) { - _cost_per_card_ms_seq->add(cost_per_card_ms); +void G1Analytics::report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms) { + _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms); } void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) { _cost_scan_hcc_seq->add(cost_scan_hcc); } -void G1Analytics::report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc) { +void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) { if (for_young_gc) { - _cost_per_entry_ms_seq->add(cost_per_entry_ms); + _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); } else { - _mixed_cost_per_entry_ms_seq->add(cost_per_entry_ms); + _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); } } @@ -222,8 +222,8 @@ return get_new_prediction(_alloc_rate_ms_seq); } -double G1Analytics::predict_cost_per_card_ms() const { - return get_new_prediction(_cost_per_card_ms_seq); +double G1Analytics::predict_cost_per_log_buffer_entry_ms() const { + return get_new_prediction(_cost_per_log_buffer_entry_ms_seq); } double G1Analytics::predict_scan_hcc_ms() const { @@ -231,7 +231,7 @@ } double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const { - return pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms(); + return pending_cards * predict_cost_per_log_buffer_entry_ms() + predict_scan_hcc_ms(); } double G1Analytics::predict_young_cards_per_entry_ratio() const { @@ -256,17 +256,17 @@ double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const { if (for_young_gc) { - return card_num * get_new_prediction(_cost_per_entry_ms_seq); + return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); } else { return predict_mixed_rs_scan_time_ms(card_num); } } double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const { - if (_mixed_cost_per_entry_ms_seq->num() < 3) { - return card_num * get_new_prediction(_cost_per_entry_ms_seq); + if (_mixed_cost_per_remset_card_ms_seq->num() < 3) { + return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); } else { - return card_num * get_new_prediction(_mixed_cost_per_entry_ms_seq); + return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1Analytics.hpp --- a/src/hotspot/share/gc/g1/g1Analytics.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -46,12 +46,12 @@ double _prev_collection_pause_end_ms; TruncatedSeq* _rs_length_diff_seq; - TruncatedSeq* _cost_per_card_ms_seq; + TruncatedSeq* _cost_per_log_buffer_entry_ms_seq; TruncatedSeq* _cost_scan_hcc_seq; TruncatedSeq* _young_cards_per_entry_ratio_seq; TruncatedSeq* _mixed_cards_per_entry_ratio_seq; - TruncatedSeq* _cost_per_entry_ms_seq; - TruncatedSeq* _mixed_cost_per_entry_ms_seq; + TruncatedSeq* _young_only_cost_per_remset_card_ms_seq; + TruncatedSeq* _mixed_cost_per_remset_card_ms_seq; TruncatedSeq* _cost_per_byte_ms_seq; TruncatedSeq* _constant_other_time_ms_seq; TruncatedSeq* _young_other_cost_per_region_ms_seq; @@ -99,9 +99,9 @@ void report_concurrent_mark_remark_times_ms(double ms); void report_concurrent_mark_cleanup_times_ms(double ms); void report_alloc_rate_ms(double alloc_rate); - void report_cost_per_card_ms(double cost_per_card_ms); + void report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms); void report_cost_scan_hcc(double cost_scan_hcc); - void report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc); + void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc); void report_rs_length_diff(double rs_length_diff); void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress); @@ -116,7 +116,7 @@ double predict_alloc_rate_ms() const; int num_alloc_rate_ms() const; - double predict_cost_per_card_ms() const; + double predict_cost_per_log_buffer_entry_ms() const; double predict_scan_hcc_ms() const; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CardTable.cpp --- a/src/hotspot/share/gc/g1/g1CardTable.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CardTable.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -30,28 +30,6 @@ #include "runtime/atomic.hpp" #include "runtime/orderAccess.hpp" -bool G1CardTable::mark_card_deferred(size_t card_index) { - CardValue val = _byte_map[card_index]; - // It's already processed - if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { - return false; - } - - // Cached bit can be installed either on a clean card or on a claimed card. - CardValue new_val = val; - if (val == clean_card_val()) { - new_val = deferred_card_val(); - } else { - if (val & claimed_card_val()) { - new_val = val | deferred_card_val(); - } - } - if (new_val != val) { - Atomic::cmpxchg(new_val, &_byte_map[card_index], val); - } - return true; -} - void G1CardTable::g1_mark_as_young(const MemRegion& mr) { CardValue *const first = byte_for(mr.start()); CardValue *const last = byte_after(mr.last()); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CardTable.hpp --- a/src/hotspot/share/gc/g1/g1CardTable.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CardTable.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -44,55 +44,65 @@ virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled); }; -class G1CardTable: public CardTable { +class G1CardTable : public CardTable { friend class VMStructs; friend class G1CardTableChangedListener; G1CardTableChangedListener _listener; +public: enum G1CardValues { - g1_young_gen = CT_MR_BS_last_reserved << 1 + g1_young_gen = CT_MR_BS_last_reserved << 1, + + // During evacuation we use the card table to consolidate the cards we need to + // scan for roots onto the card table from the various sources. Further it is + // used to record already completely scanned cards to avoid re-scanning them + // when incrementally evacuating the old gen regions of a collection set. + // This means that already scanned cards should be preserved. + // + // The merge at the start of each evacuation round simply sets cards to dirty + // that are clean; scanned cards are set to 0x1. + // + // This means that the LSB determines what to do with the card during evacuation + // given the following possible values: + // + // 11111111 - clean, do not scan + // 00000001 - already scanned, do not scan + // 00000000 - dirty, needs to be scanned. + // + g1_card_already_scanned = 0x1 }; -public: + static const size_t WordAllClean = SIZE_MAX; + static const size_t WordAllDirty = 0; + + STATIC_ASSERT(BitsPerByte == 8); + static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned; + G1CardTable(MemRegion whole_heap): CardTable(whole_heap, /* scanned concurrently */ true), _listener() { _listener.set_card_table(this); } - bool is_card_dirty(size_t card_index) { - return _byte_map[card_index] == dirty_card_val(); - } static CardValue g1_young_card_val() { return g1_young_gen; } -/* - Claimed and deferred bits are used together in G1 during the evacuation - pause. These bits can have the following state transitions: - 1. The claimed bit can be put over any other card state. Except that - the "dirty -> dirty and claimed" transition is checked for in - G1 code and is not used. - 2. Deferred bit can be set only if the previous state of the card - was either clean or claimed. mark_card_deferred() is wait-free. - We do not care if the operation is be successful because if - it does not it will only result in duplicate entry in the update - buffer because of the "cache-miss". So it's not worth spinning. - */ - - bool is_card_claimed(size_t card_index) { - CardValue val = _byte_map[card_index]; - return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); - } - - inline void set_card_claimed(size_t card_index); - void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN; void g1_mark_as_young(const MemRegion& mr); - bool mark_card_deferred(size_t card_index); + size_t index_for_cardvalue(CardValue const* p) const { + return pointer_delta(p, _byte_map, sizeof(CardValue)); + } + + // Mark the given card as Dirty if it is Clean. + inline void mark_clean_as_dirty(size_t card_index); - bool is_card_deferred(size_t card_index) { - CardValue val = _byte_map[card_index]; - return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); - } + // Change Clean cards in a (large) area on the card table as Dirty, preserving + // already scanned cards. Assumes that most cards in that area are Clean. + inline void mark_region_dirty(size_t start_card_index, size_t num_cards); + + // Mark the given range of cards as Scanned. All of these cards must be Dirty. + inline void mark_as_scanned(size_t start_card_index, size_t num_cards); + + inline uint region_idx_for(CardValue* p); static size_t compute_size(size_t mem_region_size_in_words) { size_t number_of_slots = (mem_region_size_in_words / card_size_in_words); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CardTable.inline.hpp --- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -26,15 +26,58 @@ #define SHARE_GC_G1_G1CARDTABLE_INLINE_HPP #include "gc/g1/g1CardTable.hpp" +#include "gc/g1/heapRegion.hpp" -void G1CardTable::set_card_claimed(size_t card_index) { - jbyte val = _byte_map[card_index]; - if (val == clean_card_val()) { - val = (jbyte)claimed_card_val(); - } else { - val |= (jbyte)claimed_card_val(); +inline uint G1CardTable::region_idx_for(CardValue* p) { + size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue)); + return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift)); +} + +inline void G1CardTable::mark_clean_as_dirty(size_t card_index) { + CardValue value = _byte_map[card_index]; + if (value == clean_card_val()) { + _byte_map[card_index] = dirty_card_val(); } - _byte_map[card_index] = val; } -#endif // SHARE_GC_G1_G1CARDTABLE_INLINE_HPP +inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { + assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned."); + assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible."); + + size_t const num_chunks = num_cards / sizeof(size_t); + + size_t* cur_word = (size_t*)&_byte_map[start_card_index]; + size_t* const end_word_map = cur_word + num_chunks; + while (cur_word < end_word_map) { + size_t value = *cur_word; + if (value == WordAllClean) { + *cur_word = WordAllDirty; + } else if (value == WordAllDirty) { + // do nothing. + } else { + // There is a mix of cards in there. Tread slowly. + CardValue* cur = (CardValue*)cur_word; + for (size_t i = 0; i < sizeof(size_t); i++) { + CardValue value = *cur; + if (value == clean_card_val()) { + *cur = dirty_card_val(); + } + cur++; + } + } + cur_word++; + } +} + +inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) { + CardValue* start = &_byte_map[start_card_index]; + CardValue* const end = start + num_cards; + while (start < end) { + CardValue value = *start; + assert(value == dirty_card_val(), + "Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start), p2i(end)); + *start++ = g1_card_already_scanned; + } +} + +#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */ diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CollectedHeap.cpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1677,7 +1677,6 @@ _card_table = ct; G1BarrierSet::satb_mark_queue_set().initialize(this, - SATB_Q_CBL_mon, &bs->satb_mark_queue_buffer_allocator(), G1SATBProcessCompletedThreshold, G1SATBBufferEnqueueingThresholdPercent); @@ -1955,7 +1954,7 @@ n_completed_buffers++; } assert(dcqs.completed_buffers_num() == 0, "Completed buffers exist!"); - phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers, G1GCPhaseTimes::UpdateRSProcessedBuffers); + phase_times()->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_i, n_completed_buffers, G1GCPhaseTimes::MergeLBProcessedBuffers); } // Computes the sum of the storage used by the various regions. @@ -2239,8 +2238,8 @@ _collection_set.iterate(cl); } -void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, uint worker_id) { - _collection_set.iterate_incremental_part_from(cl, worker_id, workers()->active_workers()); +void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, HeapRegionClaimer* hr_claimer, uint worker_id) { + _collection_set.iterate_incremental_part_from(cl, hr_claimer, worker_id, workers()->active_workers()); } HeapWord* G1CollectedHeap::block_start(const void* addr) const { @@ -2631,8 +2630,6 @@ size_t _total_humongous; size_t _candidate_humongous; - G1DirtyCardQueue _dcq; - bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const { assert(region->is_starts_humongous(), "Must start a humongous object"); @@ -2692,8 +2689,7 @@ public: RegisterRegionsWithRegionAttrTableClosure() : _total_humongous(0), - _candidate_humongous(0), - _dcq(&G1BarrierSet::dirty_card_queue_set()) { + _candidate_humongous(0) { } virtual bool do_heap_region(HeapRegion* r) { @@ -2708,49 +2704,9 @@ uint rindex = r->hrm_index(); g1h->set_humongous_reclaim_candidate(rindex, is_candidate); if (is_candidate) { + g1h->register_humongous_region_with_region_attr(rindex); _candidate_humongous++; - g1h->register_humongous_region_with_region_attr(rindex); - // Is_candidate already filters out humongous object with large remembered sets. - // If we have a humongous object with a few remembered sets, we simply flush these - // remembered set entries into the DCQS. That will result in automatic - // re-evaluation of their remembered set entries during the following evacuation - // phase. - if (!r->rem_set()->is_empty()) { - guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries), - "Found a not-small remembered set here. This is inconsistent with previous assumptions."); - G1CardTable* ct = g1h->card_table(); - HeapRegionRemSetIterator hrrs(r->rem_set()); - size_t card_index; - while (hrrs.has_next(card_index)) { - CardTable::CardValue* card_ptr = ct->byte_for_index(card_index); - // The remembered set might contain references to already freed - // regions. Filter out such entries to avoid failing card table - // verification. - if (g1h->is_in(ct->addr_for(card_ptr))) { - if (*card_ptr != G1CardTable::dirty_card_val()) { - *card_ptr = G1CardTable::dirty_card_val(); - _dcq.enqueue(card_ptr); - } - } - } - assert(hrrs.n_yielded() == r->rem_set()->occupied(), - "Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries", - hrrs.n_yielded(), r->rem_set()->occupied()); - // We should only clear the card based remembered set here as we will not - // implicitly rebuild anything else during eager reclaim. Note that at the moment - // (and probably never) we do not enter this path if there are other kind of - // remembered sets for this region. - r->rem_set()->clear_locked(true /* only_cardset */); - // Clear_locked() above sets the state to Empty. However we want to continue - // collecting remembered set entries for humongous regions that were not - // reclaimed. - r->rem_set()->set_state_complete(); -#ifdef ASSERT - G1HeapRegionAttr region_attr = g1h->region_attr(oop(r->bottom())); - assert(region_attr.needs_remset_update(), "must be"); -#endif - } - assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); + // We will later handle the remembered sets of these regions. } else { g1h->register_region_with_region_attr(r); } @@ -2761,8 +2717,6 @@ size_t total_humongous() const { return _total_humongous; } size_t candidate_humongous() const { return _candidate_humongous; } - - void flush_rem_set_entries() { _dcq.flush(); } }; void G1CollectedHeap::register_regions_with_region_attr() { @@ -2775,9 +2729,6 @@ cl.total_humongous(), cl.candidate_humongous()); _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0; - - // Finally flush all remembered set entries to re-check into the global DCQS. - cl.flush_rem_set_entries(); } #ifndef PRODUCT @@ -3072,7 +3023,7 @@ workers()->active_workers(), collection_set()->young_region_length(), collection_set()->optional_region_length()); - pre_evacuate_collection_set(evacuation_info); + pre_evacuate_collection_set(evacuation_info, &per_thread_states); // Actually do the work... evacuate_initial_collection_set(&per_thread_states); @@ -3105,9 +3056,7 @@ double sample_end_time_sec = os::elapsedTime(); double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; - size_t total_cards_scanned = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScanRSScannedCards) + - phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::ScanRSScannedCards); - policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc); + policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); } verify_after_young_collection(verify_type); @@ -3581,7 +3530,7 @@ phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0); } -void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info) { +void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { _expand_heap_after_alloc_failure = true; _evacuation_failed = false; @@ -3592,10 +3541,15 @@ // Initialize the GC alloc regions. _allocator->init_gc_alloc_regions(evacuation_info); + { + Ticks start = Ticks::now(); + rem_set()->prepare_for_scan_heap_roots(); + phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0); + } + register_regions_with_region_attr(); assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table."); - rem_set()->prepare_for_scan_rem_set(); _preserved_marks_set.assert_empty(); #if COMPILER2_OR_JVMCI @@ -3697,8 +3651,8 @@ void scan_roots(G1ParScanThreadState* pss, uint worker_id) { _root_processor->evacuate_roots(pss, worker_id); - _g1h->rem_set()->update_rem_set(pss, worker_id); - _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ObjCopy, G1GCPhaseTimes::CodeRoots); + _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ObjCopy); + _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::ObjCopy); } void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) { @@ -3725,6 +3679,14 @@ }; void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* per_thread_states) { + G1GCPhaseTimes* p = phase_times(); + + { + Ticks start = Ticks::now(); + rem_set()->merge_heap_roots(false /* remset_only */, G1GCPhaseTimes::MergeRS); + p->record_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0); + } + Tickspan task_time; const uint num_workers = workers()->active_workers(); @@ -3739,7 +3701,6 @@ } Tickspan total_processing = Ticks::now() - start_processing; - G1GCPhaseTimes* p = phase_times(); p->record_initial_evac_time(task_time.seconds() * 1000.0); p->record_or_add_code_root_fixup_time((total_processing - task_time).seconds() * 1000.0); } @@ -3747,7 +3708,8 @@ class G1EvacuateOptionalRegionsTask : public G1EvacuateRegionsBaseTask { void scan_roots(G1ParScanThreadState* pss, uint worker_id) { - _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::OptObjCopy, G1GCPhaseTimes::OptCodeRoots); + _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptObjCopy); + _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::OptObjCopy); } void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) { @@ -3783,8 +3745,6 @@ void G1CollectedHeap::evacuate_optional_collection_set(G1ParScanThreadStateSet* per_thread_states) { const double gc_start_time_ms = phase_times()->cur_collection_start_sec() * 1000.0; - Ticks start = Ticks::now(); - while (!evacuation_failed() && _collection_set.optional_region_length() > 0) { double time_used_ms = os::elapsedTime() * 1000.0 - gc_start_time_ms; @@ -3797,18 +3757,24 @@ break; } - evacuate_next_optional_regions(per_thread_states); + { + Ticks start = Ticks::now(); + rem_set()->merge_heap_roots(true /* remset_only */, G1GCPhaseTimes::OptMergeRS); + phase_times()->record_or_add_optional_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0); + } + + { + Ticks start = Ticks::now(); + evacuate_next_optional_regions(per_thread_states); + phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0); + } } _collection_set.abandon_optional_collection_set(per_thread_states); - - phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0); } void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { - // Also cleans the card table from temporary duplicate detection information used - // during UpdateRS/ScanRS. - rem_set()->cleanup_after_scan_rem_set(); + rem_set()->cleanup_after_scan_heap_roots(); // Process any discovered reference objects - we have // to do this _before_ we retire the GC alloc regions diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CollectedHeap.hpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -78,7 +78,6 @@ class G1HotCardCache; class G1RemSet; class G1YoungRemSetSamplingThread; -class HeapRegionRemSetIterator; class G1ConcurrentMark; class G1ConcurrentMarkThread; class G1ConcurrentRefine; @@ -757,7 +756,7 @@ void evacuate_next_optional_regions(G1ParScanThreadStateSet* per_thread_states); public: - void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info); + void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss); void post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss); void expand_heap_after_young_collection(); @@ -1115,7 +1114,8 @@ public: - inline G1HeapRegionAttr region_attr(const void* obj); + inline G1HeapRegionAttr region_attr(const void* obj) const; + inline G1HeapRegionAttr region_attr(uint idx) const; // Return "TRUE" iff the given object address is in the reserved // region of g1. @@ -1182,7 +1182,12 @@ // Starts the iteration so that the start regions of a given worker id over the // set active_workers are evenly spread across the set of collection set regions // to be iterated. - void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id); + // The variant with the HeapRegionClaimer guarantees that the closure will be + // applied to a particular region exactly once. + void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id) { + collection_set_iterate_increment_from(blk, NULL, worker_id); + } + void collection_set_iterate_increment_from(HeapRegionClosure *blk, HeapRegionClaimer* hr_claimer, uint worker_id); // Returns the HeapRegion that contains addr. addr must not be NULL. template diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -163,10 +163,14 @@ return _region_attr.is_in_cset_or_humongous((HeapWord*)obj); } -G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) { +G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) const { return _region_attr.at((HeapWord*)addr); } +G1HeapRegionAttr G1CollectedHeap::region_attr(uint idx) const { + return _region_attr.get_by_index(idx); +} + void G1CollectedHeap::register_humongous_region_with_region_attr(uint index) { _region_attr.set_humongous(index, region_at(index)->rem_set()->is_tracked()); } @@ -177,7 +181,7 @@ void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) { _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked()); - _rem_set->prepare_for_scan_rem_set(r->hrm_index()); + _rem_set->prepare_for_scan_heap_roots(r->hrm_index()); } void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CollectionSet.cpp --- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -217,10 +217,13 @@ } } -void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const { +void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, + HeapRegionClaimer* hr_claimer, + uint worker_id, + uint total_workers) const { assert_at_safepoint(); - size_t len = _collection_set_cur_length - _inc_part_start; + size_t len = increment_length(); if (len == 0) { return; } @@ -229,9 +232,12 @@ size_t cur_pos = start_pos; do { - HeapRegion* r = _g1h->region_at(_collection_set_regions[cur_pos + _inc_part_start]); - bool result = cl->do_heap_region(r); - guarantee(!result, "Must not cancel iteration"); + uint region_idx = _collection_set_regions[cur_pos + _inc_part_start]; + if (hr_claimer == NULL || hr_claimer->claim_region(region_idx)) { + HeapRegion* r = _g1h->region_at(region_idx); + bool result = cl->do_heap_region(r); + guarantee(!result, "Must not cancel iteration"); + } cur_pos++; if (cur_pos == len) { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1CollectionSet.hpp --- a/src/hotspot/share/gc/g1/g1CollectionSet.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -36,6 +36,7 @@ class G1Policy; class G1SurvivorRegions; class HeapRegion; +class HeapRegionClaimer; class HeapRegionClosure; // The collection set. @@ -279,7 +280,12 @@ // Iterate over the current collection set increment applying the given HeapRegionClosure // from a starting position determined by the given worker id. - void iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const; + void iterate_incremental_part_from(HeapRegionClosure* cl, HeapRegionClaimer* hr_claimer, uint worker_id, uint total_workers) const; + + // Returns the length of the current increment in number of regions. + size_t increment_length() const { return _collection_set_cur_length - _inc_part_start; } + // Returns the length of the whole current collection set in number of regions + size_t cur_length() const { return _collection_set_cur_length; } // Iterate over the entire collection set (all increments calculated so far), applying // the given HeapRegionClosure on all of them. diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1ConcurrentMark.cpp --- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -2419,12 +2419,13 @@ abort_marking_if_regular_check_fail(); } + // Can't assert qset is empty here, even if not aborted. If concurrent, + // some other thread might be adding to the queue. If not concurrent, + // some other thread might have won the race for the last buffer, but + // has not yet decremented the count. + _draining_satb_buffers = false; - assert(has_aborted() || - _cm->concurrent() || - satb_mq_set.completed_buffers_num() == 0, "invariant"); - // again, this was a potentially expensive operation, decrease the // limits to get the regular clock call early decrease_limits(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp --- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -206,7 +206,7 @@ // available buffers near green_zone value. When yellow_size is // large we don't want to allow a full step to accumulate before // doing any processing, as that might lead to significantly more - // than green_zone buffers to be processed by update_rs. + // than green_zone buffers to be processed during scanning. step = MIN2(step, ParallelGCThreads / 2.0); } size_t activate_offset = static_cast(ceil(step * (worker_i + 1))); @@ -322,18 +322,18 @@ } static size_t calc_new_green_zone(size_t green, - double update_rs_time, - size_t update_rs_processed_buffers, + double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { // Adjust green zone based on whether we're meeting the time goal. // Limit to max_green_zone. const double inc_k = 1.1, dec_k = 0.9; - if (update_rs_time > goal_ms) { + if (log_buffer_scan_time > goal_ms) { if (green > 0) { green = static_cast(green * dec_k); } - } else if (update_rs_time < goal_ms && - update_rs_processed_buffers > green) { + } else if (log_buffer_scan_time < goal_ms && + processed_log_buffers > green) { green = static_cast(MAX2(green * inc_k, green + 1.0)); green = MIN2(green, max_green_zone); } @@ -350,20 +350,20 @@ return MIN2(yellow + (yellow - green), max_red_zone); } -void G1ConcurrentRefine::update_zones(double update_rs_time, - size_t update_rs_processed_buffers, +void G1ConcurrentRefine::update_zones(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { log_trace( CTRL_TAGS )("Updating Refinement Zones: " - "update_rs time: %.3fms, " - "update_rs buffers: " SIZE_FORMAT ", " - "update_rs goal time: %.3fms", - update_rs_time, - update_rs_processed_buffers, + "log buffer scan time: %.3fms, " + "processed buffers: " SIZE_FORMAT ", " + "goal time: %.3fms", + log_buffer_scan_time, + processed_log_buffers, goal_ms); _green_zone = calc_new_green_zone(_green_zone, - update_rs_time, - update_rs_processed_buffers, + log_buffer_scan_time, + processed_log_buffers, goal_ms); _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size); _red_zone = calc_new_red_zone(_green_zone, _yellow_zone); @@ -376,13 +376,13 @@ _green_zone, _yellow_zone, _red_zone); } -void G1ConcurrentRefine::adjust(double update_rs_time, - size_t update_rs_processed_buffers, +void G1ConcurrentRefine::adjust(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); if (G1UseAdaptiveConcRefinement) { - update_zones(update_rs_time, update_rs_processed_buffers, goal_ms); + update_zones(log_buffer_scan_time, processed_log_buffers, goal_ms); // Change the barrier params if (max_num_threads() == 0) { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp --- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -97,8 +97,8 @@ size_t min_yellow_zone_size); // Update green/yellow/red zone values based on how well goals are being met. - void update_zones(double update_rs_time, - size_t update_rs_processed_buffers, + void update_zones(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms); static uint worker_id_offset(); @@ -115,7 +115,7 @@ void stop(); // Adjust refinement thresholds based on work done during the pause and the goal time. - void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms); + void adjust(double log_buffer_scan_time, size_t processed_log_buffers, double goal_ms); size_t activation_threshold(uint worker_id) const; size_t deactivation_threshold(uint worker_id) const; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp --- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -78,7 +78,14 @@ } G1DirtyCardQueueSet::G1DirtyCardQueueSet(bool notify_when_complete) : - PtrQueueSet(notify_when_complete), + PtrQueueSet(), + _cbl_mon(NULL), + _completed_buffers_head(NULL), + _completed_buffers_tail(NULL), + _n_completed_buffers(0), + _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever), + _process_completed_buffers(false), + _notify_when_complete(notify_when_complete), _max_completed_buffers(MaxCompletedBuffersUnlimited), _completed_buffers_padding(0), _free_ids(NULL), @@ -90,6 +97,7 @@ } G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { + abandon_completed_buffers(); delete _free_ids; } @@ -101,7 +109,9 @@ void G1DirtyCardQueueSet::initialize(Monitor* cbl_mon, BufferNode::Allocator* allocator, bool init_free_ids) { - PtrQueueSet::initialize(cbl_mon, allocator); + PtrQueueSet::initialize(allocator); + assert(_cbl_mon == NULL, "Init order issue?"); + _cbl_mon = cbl_mon; if (init_free_ids) { _free_ids = new G1FreeIdSet(0, num_par_ids()); } @@ -111,6 +121,123 @@ G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); } +void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + cbn->set_next(NULL); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = cbn; + _completed_buffers_tail = cbn; + } else { + _completed_buffers_tail->set_next(cbn); + _completed_buffers_tail = cbn; + } + _n_completed_buffers++; + + if (!process_completed_buffers() && + (_n_completed_buffers > process_completed_buffers_threshold())) { + set_process_completed_buffers(true); + if (_notify_when_complete) { + _cbl_mon->notify_all(); + } + } + assert_completed_buffers_list_len_correct_locked(); +} + +BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + + if (_n_completed_buffers <= stop_at) { + return NULL; + } + + assert(_n_completed_buffers > 0, "invariant"); + assert(_completed_buffers_head != NULL, "invariant"); + assert(_completed_buffers_tail != NULL, "invariant"); + + BufferNode* bn = _completed_buffers_head; + _n_completed_buffers--; + _completed_buffers_head = bn->next(); + if (_completed_buffers_head == NULL) { + assert(_n_completed_buffers == 0, "invariant"); + _completed_buffers_tail = NULL; + set_process_completed_buffers(false); + } + assert_completed_buffers_list_len_correct_locked(); + bn->set_next(NULL); + return bn; +} + +void G1DirtyCardQueueSet::abandon_completed_buffers() { + BufferNode* buffers_to_delete = NULL; + { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + buffers_to_delete = _completed_buffers_head; + _completed_buffers_head = NULL; + _completed_buffers_tail = NULL; + _n_completed_buffers = 0; + set_process_completed_buffers(false); + } + while (buffers_to_delete != NULL) { + BufferNode* bn = buffers_to_delete; + buffers_to_delete = bn->next(); + bn->set_next(NULL); + deallocate_buffer(bn); + } +} + +void G1DirtyCardQueueSet::notify_if_necessary() { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_n_completed_buffers > process_completed_buffers_threshold()) { + set_process_completed_buffers(true); + if (_notify_when_complete) + _cbl_mon->notify(); + } +} + +#ifdef ASSERT +void G1DirtyCardQueueSet::assert_completed_buffers_list_len_correct_locked() { + assert_lock_strong(_cbl_mon); + size_t n = 0; + for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) { + ++n; + } + assert(n == _n_completed_buffers, + "Completed buffer length is wrong: counted: " SIZE_FORMAT + ", expected: " SIZE_FORMAT, n, _n_completed_buffers); +} +#endif // ASSERT + +// Merge lists of buffers. Notify the processing threads. +// The source queue is emptied as a result. The queues +// must share the monitor. +void G1DirtyCardQueueSet::merge_bufferlists(G1DirtyCardQueueSet *src) { + assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } else { + assert(_completed_buffers_head != NULL, "Well formedness"); + if (src->_completed_buffers_head != NULL) { + _completed_buffers_tail->set_next(src->_completed_buffers_head); + _completed_buffers_tail = src->_completed_buffers_tail; + } + } + _n_completed_buffers += src->_n_completed_buffers; + + src->_n_completed_buffers = 0; + src->_completed_buffers_head = NULL; + src->_completed_buffers_tail = NULL; + src->set_process_completed_buffers(false); + + assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || + _completed_buffers_head != NULL && _completed_buffers_tail != NULL, + "Sanity"); + assert_completed_buffers_list_len_correct_locked(); +} + bool G1DirtyCardQueueSet::apply_closure_to_buffer(G1CardTableEntryClosure* cl, BufferNode* node, bool consume, diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp --- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -76,6 +76,21 @@ }; class G1DirtyCardQueueSet: public PtrQueueSet { + Monitor* _cbl_mon; // Protects the fields below. + BufferNode* _completed_buffers_head; + BufferNode* _completed_buffers_tail; + volatile size_t _n_completed_buffers; + + size_t _process_completed_buffers_threshold; + volatile bool _process_completed_buffers; + + // If true, notify_all on _cbl_mon when the threshold is reached. + bool _notify_when_complete; + + void assert_completed_buffers_list_len_correct_locked() NOT_DEBUG_RETURN; + + void abandon_completed_buffers(); + // Apply the closure to the elements of "node" from it's index to // buffer_size. If all closure applications return true, then // returns true. Stops processing after the first closure @@ -111,7 +126,7 @@ // mutator must start doing some of the concurrent refinement work, size_t _max_completed_buffers; size_t _completed_buffers_padding; - static const size_t MaxCompletedBuffersUnlimited = ~size_t(0); + static const size_t MaxCompletedBuffersUnlimited = SIZE_MAX; G1FreeIdSet* _free_ids; @@ -142,6 +157,34 @@ // it can be reused in place. bool process_or_enqueue_completed_buffer(BufferNode* node); + virtual void enqueue_completed_buffer(BufferNode* node); + + // If the number of completed buffers is > stop_at, then remove and + // return a completed buffer from the list. Otherwise, return NULL. + BufferNode* get_completed_buffer(size_t stop_at = 0); + + // The number of buffers in the list. Racy... + size_t completed_buffers_num() const { return _n_completed_buffers; } + + bool process_completed_buffers() { return _process_completed_buffers; } + void set_process_completed_buffers(bool x) { _process_completed_buffers = x; } + + // Get/Set the number of completed buffers that triggers log processing. + // Log processing should be done when the number of buffers exceeds the + // threshold. + void set_process_completed_buffers_threshold(size_t sz) { + _process_completed_buffers_threshold = sz; + } + size_t process_completed_buffers_threshold() const { + return _process_completed_buffers_threshold; + } + static const size_t ProcessCompletedBuffersThresholdNever = SIZE_MAX; + + // Notify the consumer if the number of buffers crossed the threshold + void notify_if_necessary(); + + void merge_bufferlists(G1DirtyCardQueueSet* src); + // Apply G1RefineCardConcurrentlyClosure to completed buffers until there are stop_at // completed buffers remaining. bool refine_completed_buffer_concurrently(uint worker_i, size_t stop_at); @@ -150,13 +193,13 @@ // must never return false. Must only be called during GC. bool apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i); - void reset_for_par_iteration() { _cur_par_buffer_node = completed_buffers_head(); } + void reset_for_par_iteration() { _cur_par_buffer_node = _completed_buffers_head; } // Applies the current closure to all completed buffers, non-consumptively. // Can be used in parallel, all callers using the iteration state initialized // by reset_for_par_iteration. void par_apply_closure_to_all_completed_buffers(G1CardTableEntryClosure* cl); - // If a full collection is happening, reset partial logs, and ignore + // If a full collection is happening, reset partial logs, and release // completed ones: the full collection will make them all irrelevant. void abandon_logs(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1EvacFailure.cpp --- a/src/hotspot/share/gc/g1/g1EvacFailure.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1EvacFailure.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -37,15 +37,19 @@ #include "oops/compressedOops.inline.hpp" #include "oops/oop.inline.hpp" -class UpdateRSetDeferred : public BasicOopIterateClosure { +class UpdateLogBuffersDeferred : public BasicOopIterateClosure { private: G1CollectedHeap* _g1h; G1DirtyCardQueue* _dcq; G1CardTable* _ct; + // Remember the last enqueued card to avoid enqueuing the same card over and over; + // since we only ever handle a card once, this is sufficient. + size_t _last_enqueued_card; + public: - UpdateRSetDeferred(G1DirtyCardQueue* dcq) : - _g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()) {} + UpdateLogBuffersDeferred(G1DirtyCardQueue* dcq) : + _g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()), _last_enqueued_card(SIZE_MAX) {} virtual void do_oop(narrowOop* p) { do_oop_work(p); } virtual void do_oop( oop* p) { do_oop_work(p); } @@ -62,8 +66,9 @@ return; } size_t card_index = _ct->index_for(p); - if (_ct->mark_card_deferred(card_index)) { + if (card_index != _last_enqueued_card) { _dcq->enqueue(_ct->byte_for_index(card_index)); + _last_enqueued_card = card_index; } } }; @@ -73,21 +78,21 @@ G1ConcurrentMark* _cm; HeapRegion* _hr; size_t _marked_bytes; - UpdateRSetDeferred* _update_rset_cl; + UpdateLogBuffersDeferred* _log_buffer_cl; bool _during_initial_mark; uint _worker_id; HeapWord* _last_forwarded_object_end; public: RemoveSelfForwardPtrObjClosure(HeapRegion* hr, - UpdateRSetDeferred* update_rset_cl, + UpdateLogBuffersDeferred* log_buffer_cl, bool during_initial_mark, uint worker_id) : _g1h(G1CollectedHeap::heap()), _cm(_g1h->concurrent_mark()), _hr(hr), _marked_bytes(0), - _update_rset_cl(update_rset_cl), + _log_buffer_cl(log_buffer_cl), _during_initial_mark(during_initial_mark), _worker_id(worker_id), _last_forwarded_object_end(hr->bottom()) { } @@ -144,7 +149,7 @@ // The problem is that, if evacuation fails, we might have // remembered set entries missing given that we skipped cards on // the collection set. So, we'll recreate such entries now. - obj->oop_iterate(_update_rset_cl); + obj->oop_iterate(_log_buffer_cl); HeapWord* obj_end = obj_addr + obj_size; _last_forwarded_object_end = obj_end; @@ -193,25 +198,22 @@ class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure { G1CollectedHeap* _g1h; uint _worker_id; - HeapRegionClaimer* _hrclaimer; G1DirtyCardQueue _dcq; - UpdateRSetDeferred _update_rset_cl; + UpdateLogBuffersDeferred _log_buffer_cl; public: - RemoveSelfForwardPtrHRClosure(uint worker_id, - HeapRegionClaimer* hrclaimer) : + RemoveSelfForwardPtrHRClosure(uint worker_id) : _g1h(G1CollectedHeap::heap()), _worker_id(worker_id), - _hrclaimer(hrclaimer), _dcq(&_g1h->dirty_card_queue_set()), - _update_rset_cl(&_dcq){ + _log_buffer_cl(&_dcq) { } size_t remove_self_forward_ptr_by_walking_hr(HeapRegion* hr, bool during_initial_mark) { RemoveSelfForwardPtrObjClosure rspc(hr, - &_update_rset_cl, + &_log_buffer_cl, during_initial_mark, _worker_id); hr->object_iterate(&rspc); @@ -225,26 +227,24 @@ assert(!hr->is_pinned(), "Unexpected pinned region at index %u", hr->hrm_index()); assert(hr->in_collection_set(), "bad CS"); - if (_hrclaimer->claim_region(hr->hrm_index())) { - if (hr->evacuation_failed()) { - hr->clear_index_in_opt_cset(); + if (hr->evacuation_failed()) { + hr->clear_index_in_opt_cset(); - bool during_initial_mark = _g1h->collector_state()->in_initial_mark_gc(); - bool during_conc_mark = _g1h->collector_state()->mark_or_rebuild_in_progress(); + bool during_initial_mark = _g1h->collector_state()->in_initial_mark_gc(); + bool during_conc_mark = _g1h->collector_state()->mark_or_rebuild_in_progress(); - hr->note_self_forwarding_removal_start(during_initial_mark, + hr->note_self_forwarding_removal_start(during_initial_mark, during_conc_mark); - _g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr); + _g1h->verifier()->check_bitmaps("Self-Forwarding Ptr Removal", hr); - hr->reset_bot(); - - size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark); + hr->reset_bot(); - hr->rem_set()->clean_strong_code_roots(hr); - hr->rem_set()->clear_locked(true); + size_t live_bytes = remove_self_forward_ptr_by_walking_hr(hr, during_initial_mark); - hr->note_self_forwarding_removal_end(live_bytes); - } + hr->rem_set()->clean_strong_code_roots(hr); + hr->rem_set()->clear_locked(true); + + hr->note_self_forwarding_removal_end(live_bytes); } return false; } @@ -256,7 +256,7 @@ _hrclaimer(_g1h->workers()->active_workers()) { } void G1ParRemoveSelfForwardPtrsTask::work(uint worker_id) { - RemoveSelfForwardPtrHRClosure rsfp_cl(worker_id, &_hrclaimer); + RemoveSelfForwardPtrHRClosure rsfp_cl(worker_id); - _g1h->collection_set_iterate_increment_from(&rsfp_cl, worker_id); + _g1h->collection_set_iterate_increment_from(&rsfp_cl, &_hrclaimer, worker_id); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1FullCollector.cpp --- a/src/hotspot/share/gc/g1/g1FullCollector.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1FullCollector.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -183,7 +183,6 @@ update_derived_pointers(); BiasedLocking::restore_marks(); - JvmtiExport::gc_epilogue(); _heap->prepare_heap_for_mutators(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -66,14 +66,30 @@ _gc_par_phases[WaitForStrongCLD] = new WorkerDataArray(max_gc_threads, "Wait For Strong CLD (ms):"); _gc_par_phases[WeakCLDRoots] = new WorkerDataArray(max_gc_threads, "Weak CLD Roots (ms):"); - _gc_par_phases[UpdateRS] = new WorkerDataArray(max_gc_threads, "Update RS (ms):"); + _gc_par_phases[MergeRS] = new WorkerDataArray(max_gc_threads, "Remembered Sets (ms):"); + _merge_rs_merged_sparse = new WorkerDataArray(max_gc_threads, "Merged Sparse:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_sparse, MergeRSMergedSparse); + _merge_rs_merged_fine = new WorkerDataArray(max_gc_threads, "Merged Fine:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine); + _merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse); + + _gc_par_phases[OptMergeRS] = new WorkerDataArray(max_gc_threads, "Optional Remembered Sets (ms):"); + _opt_merge_rs_merged_sparse = new WorkerDataArray(max_gc_threads, "Merged Sparse:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_sparse, MergeRSMergedSparse); + _opt_merge_rs_merged_fine = new WorkerDataArray(max_gc_threads, "Merged Fine:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine); + _opt_merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse); + + _gc_par_phases[MergeLB] = new WorkerDataArray(max_gc_threads, "Log Buffers (ms):"); if (G1HotCardCache::default_use_cache()) { - _gc_par_phases[ScanHCC] = new WorkerDataArray(max_gc_threads, "Scan HCC (ms):"); + _gc_par_phases[MergeHCC] = new WorkerDataArray(max_gc_threads, "Hot Card Cache (ms):"); } else { - _gc_par_phases[ScanHCC] = NULL; + _gc_par_phases[MergeHCC] = NULL; } - _gc_par_phases[ScanRS] = new WorkerDataArray(max_gc_threads, "Scan RS (ms):"); - _gc_par_phases[OptScanRS] = new WorkerDataArray(max_gc_threads, "Optional Scan RS (ms):"); + _gc_par_phases[ScanHR] = new WorkerDataArray(max_gc_threads, "Scan Heap Roots (ms):"); + _gc_par_phases[OptScanHR] = new WorkerDataArray(max_gc_threads, "Optional Scan Heap Roots (ms):"); _gc_par_phases[CodeRoots] = new WorkerDataArray(max_gc_threads, "Code Root Scan (ms):"); _gc_par_phases[OptCodeRoots] = new WorkerDataArray(max_gc_threads, "Optional Code Root Scan (ms):"); _gc_par_phases[ObjCopy] = new WorkerDataArray(max_gc_threads, "Object Copy (ms):"); @@ -84,30 +100,30 @@ _gc_par_phases[GCWorkerEnd] = new WorkerDataArray(max_gc_threads, "GC Worker End (ms):"); _gc_par_phases[Other] = new WorkerDataArray(max_gc_threads, "GC Worker Other (ms):"); - _scan_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); - _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScanRSScannedCards); - _scan_rs_claimed_cards = new WorkerDataArray(max_gc_threads, "Claimed Cards:"); - _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ScanRSClaimedCards); - _scan_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); - _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, ScanRSSkippedCards); + _scan_hr_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); + _gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_scanned_cards, ScanHRScannedCards); + _scan_hr_scanned_blocks = new WorkerDataArray(max_gc_threads, "Scanned Blocks:"); + _gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_scanned_blocks, ScanHRScannedBlocks); + _scan_hr_claimed_chunks = new WorkerDataArray(max_gc_threads, "Claimed Chunks:"); + _gc_par_phases[ScanHR]->link_thread_work_items(_scan_hr_claimed_chunks, ScanHRClaimedChunks); - _opt_scan_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_scanned_cards, ScanRSScannedCards); - _opt_scan_rs_claimed_cards = new WorkerDataArray(max_gc_threads, "Claimed Cards:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_claimed_cards, ScanRSClaimedCards); - _opt_scan_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_skipped_cards, ScanRSSkippedCards); - _opt_scan_rs_scanned_opt_refs = new WorkerDataArray(max_gc_threads, "Scanned Refs:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_scanned_opt_refs, ScanRSScannedOptRefs); - _opt_scan_rs_used_memory = new WorkerDataArray(max_gc_threads, "Used Memory:"); - _gc_par_phases[OptScanRS]->link_thread_work_items(_opt_scan_rs_used_memory, ScanRSUsedMemory); + _opt_scan_hr_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_cards, ScanHRScannedCards); + _opt_scan_hr_scanned_blocks = new WorkerDataArray(max_gc_threads, "Scanned Blocks:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_blocks, ScanHRScannedBlocks); + _opt_scan_hr_claimed_chunks = new WorkerDataArray(max_gc_threads, "Claimed Chunks:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_claimed_chunks, ScanHRClaimedChunks); + _opt_scan_hr_scanned_opt_refs = new WorkerDataArray(max_gc_threads, "Scanned Refs:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_scanned_opt_refs, ScanHRScannedOptRefs); + _opt_scan_hr_used_memory = new WorkerDataArray(max_gc_threads, "Used Memory:"); + _gc_par_phases[OptScanHR]->link_thread_work_items(_opt_scan_hr_used_memory, ScanHRUsedMemory); - _update_rs_processed_buffers = new WorkerDataArray(max_gc_threads, "Processed Buffers:"); - _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers, UpdateRSProcessedBuffers); - _update_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); - _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_scanned_cards, UpdateRSScannedCards); - _update_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); - _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_skipped_cards, UpdateRSSkippedCards); + _merge_lb_processed_buffers = new WorkerDataArray(max_gc_threads, "Processed Buffers:"); + _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_processed_buffers, MergeLBProcessedBuffers); + _merge_lb_dirty_cards = new WorkerDataArray(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_dirty_cards, MergeLBDirtyCards); + _merge_lb_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); + _gc_par_phases[MergeLB]->link_thread_work_items(_merge_lb_skipped_cards, MergeLBSkippedCards); _obj_copy_lab_waste = new WorkerDataArray(max_gc_threads, "LAB Waste"); _gc_par_phases[ObjCopy]->link_thread_work_items(_obj_copy_lab_waste, ObjCopyLABWaste); @@ -148,6 +164,8 @@ _cur_optional_evac_ms = 0.0; _cur_collection_code_root_fixup_time_ms = 0.0; _cur_strong_code_root_purge_time_ms = 0.0; + _cur_merge_heap_roots_time_ms = 0.0; + _cur_optional_merge_heap_roots_time_ms = 0.0; _cur_evac_fail_recalc_used = 0.0; _cur_evac_fail_remove_self_forwards = 0.0; _cur_string_deduplication_time_ms = 0.0; @@ -160,6 +178,7 @@ _cur_collection_start_sec = 0.0; _root_region_scan_wait_time_ms = 0.0; _external_accounted_time_ms = 0.0; + _recorded_prepare_heap_roots_time_ms = 0.0; _recorded_clear_claimed_marks_time_ms = 0.0; _recorded_young_cset_choice_time_ms = 0.0; _recorded_non_young_cset_choice_time_ms = 0.0; @@ -219,9 +238,7 @@ record_time_secs(GCWorkerTotal, i , total_worker_time); double worker_known_time = worker_time(ExtRootScan, i) + - worker_time(ScanHCC, i) + - worker_time(UpdateRS, i) + - worker_time(ScanRS, i) + + worker_time(ScanHR, i) + worker_time(CodeRoots, i) + worker_time(ObjCopy, i) + worker_time(Termination, i); @@ -231,11 +248,15 @@ // Make sure all slots are uninitialized since this thread did not seem to have been started ASSERT_PHASE_UNINITIALIZED(GCWorkerEnd); ASSERT_PHASE_UNINITIALIZED(ExtRootScan); - ASSERT_PHASE_UNINITIALIZED(ScanHCC); - ASSERT_PHASE_UNINITIALIZED(UpdateRS); - ASSERT_PHASE_UNINITIALIZED(ScanRS); + ASSERT_PHASE_UNINITIALIZED(MergeHCC); + ASSERT_PHASE_UNINITIALIZED(MergeRS); + ASSERT_PHASE_UNINITIALIZED(OptMergeRS); + ASSERT_PHASE_UNINITIALIZED(MergeLB); + ASSERT_PHASE_UNINITIALIZED(ScanHR); ASSERT_PHASE_UNINITIALIZED(CodeRoots); + ASSERT_PHASE_UNINITIALIZED(OptCodeRoots); ASSERT_PHASE_UNINITIALIZED(ObjCopy); + ASSERT_PHASE_UNINITIALIZED(OptObjCopy); ASSERT_PHASE_UNINITIALIZED(Termination); } } @@ -365,6 +386,7 @@ _recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms + _cur_region_register_time + + _recorded_prepare_heap_roots_time_ms + _recorded_clear_claimed_marks_time_ms; info_time("Pre Evacuate Collection Set", sum_ms); @@ -380,6 +402,7 @@ trace_count("Humongous Candidate", _cur_fast_reclaim_humongous_candidates); } + debug_time("Prepare Heap Roots", _recorded_prepare_heap_roots_time_ms); if (_recorded_clear_claimed_marks_time_ms > 0.0) { debug_time("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms); } @@ -387,10 +410,13 @@ } double G1GCPhaseTimes::print_evacuate_optional_collection_set() const { - const double sum_ms = _cur_optional_evac_ms; + const double sum_ms = _cur_optional_evac_ms + _cur_optional_merge_heap_roots_time_ms; if (sum_ms > 0) { - info_time("Evacuate Optional Collection Set", sum_ms); - debug_phase(_gc_par_phases[OptScanRS]); + info_time("Merge Optional Heap Roots", _cur_optional_merge_heap_roots_time_ms); + debug_phase(_gc_par_phases[OptMergeRS]); + + info_time("Evacuate Optional Collection Set", _cur_optional_evac_ms); + debug_phase(_gc_par_phases[OptScanHR]); debug_phase(_gc_par_phases[OptObjCopy]); debug_phase(_gc_par_phases[OptCodeRoots]); debug_phase(_gc_par_phases[OptTermination]); @@ -398,21 +424,23 @@ return sum_ms; } -double G1GCPhaseTimes::print_evacuate_collection_set() const { - const double sum_ms = _cur_collection_initial_evac_time_ms; +double G1GCPhaseTimes::print_evacuate_initial_collection_set() const { + info_time("Merge Heap Roots", _cur_merge_heap_roots_time_ms); - info_time("Evacuate Collection Set", sum_ms); + debug_phase(_gc_par_phases[MergeRS]); + if (G1HotCardCache::default_use_cache()) { + debug_phase(_gc_par_phases[MergeHCC]); + } + debug_phase(_gc_par_phases[MergeLB]); + + info_time("Evacuate Collection Set", _cur_collection_initial_evac_time_ms); trace_phase(_gc_par_phases[GCWorkerStart], false); debug_phase(_gc_par_phases[ExtRootScan]); for (int i = ExtRootScanSubPhasesFirst; i <= ExtRootScanSubPhasesLast; i++) { trace_phase(_gc_par_phases[i]); } - if (G1HotCardCache::default_use_cache()) { - debug_phase(_gc_par_phases[ScanHCC]); - } - debug_phase(_gc_par_phases[UpdateRS]); - debug_phase(_gc_par_phases[ScanRS]); + debug_phase(_gc_par_phases[ScanHR]); debug_phase(_gc_par_phases[CodeRoots]); debug_phase(_gc_par_phases[ObjCopy]); debug_phase(_gc_par_phases[Termination]); @@ -420,7 +448,7 @@ debug_phase(_gc_par_phases[GCWorkerTotal]); trace_phase(_gc_par_phases[GCWorkerEnd], false); - return sum_ms; + return _cur_collection_initial_evac_time_ms + _cur_merge_heap_roots_time_ms; } double G1GCPhaseTimes::print_post_evacuate_collection_set() const { @@ -503,7 +531,7 @@ double accounted_ms = 0.0; accounted_ms += print_pre_evacuate_collection_set(); - accounted_ms += print_evacuate_collection_set(); + accounted_ms += print_evacuate_initial_collection_set(); accounted_ms += print_evacuate_optional_collection_set(); accounted_ms += print_post_evacuate_collection_set(); print_other(accounted_ms); @@ -530,10 +558,12 @@ "CMRefRoots", "WaitForStrongCLD", "WeakCLDRoots", - "UpdateRS", - "ScanHCC", - "ScanRS", - "OptScanRS", + "MergeRS", + "OptMergeRS", + "MergeLB", + "MergeHCC", + "ScanHR", + "OptScanHR", "CodeRoots", "OptCodeRoots", "ObjCopy", @@ -580,8 +610,8 @@ _stopped = true; } -G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) : - _start_time(), _phase(phase), _phase_times(phase_times), _worker_id(worker_id), _event() { +G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id, bool must_record) : + _start_time(), _phase(phase), _phase_times(phase_times), _worker_id(worker_id), _event(), _must_record(must_record) { if (_phase_times != NULL) { _start_time = Ticks::now(); } @@ -589,7 +619,11 @@ G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() { if (_phase_times != NULL) { - _phase_times->record_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds()); + if (_must_record) { + _phase_times->record_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds()); + } else { + _phase_times->record_or_add_time_secs(_phase, _worker_id, (Ticks::now() - _start_time).seconds()); + } _event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_phase)); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -60,10 +60,12 @@ CMRefRoots, WaitForStrongCLD, WeakCLDRoots, - UpdateRS, - ScanHCC, - ScanRS, - OptScanRS, + MergeRS, + OptMergeRS, + MergeLB, + MergeHCC, + ScanHR, + OptScanHR, CodeRoots, OptCodeRoots, ObjCopy, @@ -84,18 +86,24 @@ static const GCParPhases ExtRootScanSubPhasesFirst = ThreadRoots; static const GCParPhases ExtRootScanSubPhasesLast = WeakCLDRoots; - enum GCScanRSWorkItems { - ScanRSScannedCards, - ScanRSClaimedCards, - ScanRSSkippedCards, - ScanRSScannedOptRefs, - ScanRSUsedMemory + enum GCMergeRSWorkTimes { + MergeRSMergedSparse, + MergeRSMergedFine, + MergeRSMergedCoarse }; - enum GCUpdateRSWorkItems { - UpdateRSProcessedBuffers, - UpdateRSScannedCards, - UpdateRSSkippedCards + enum GCScanHRWorkItems { + ScanHRScannedCards, + ScanHRScannedBlocks, + ScanHRClaimedChunks, + ScanHRScannedOptRefs, + ScanHRUsedMemory + }; + + enum GCMergeLBWorkItems { + MergeLBProcessedBuffers, + MergeLBDirtyCards, + MergeLBSkippedCards }; enum GCObjCopyWorkItems { @@ -109,19 +117,27 @@ WorkerDataArray* _gc_par_phases[GCParPhasesSentinel]; - WorkerDataArray* _update_rs_processed_buffers; - WorkerDataArray* _update_rs_scanned_cards; - WorkerDataArray* _update_rs_skipped_cards; + WorkerDataArray* _merge_rs_merged_sparse; + WorkerDataArray* _merge_rs_merged_fine; + WorkerDataArray* _merge_rs_merged_coarse; + + WorkerDataArray* _merge_lb_processed_buffers; + WorkerDataArray* _merge_lb_dirty_cards; + WorkerDataArray* _merge_lb_skipped_cards; - WorkerDataArray* _scan_rs_scanned_cards; - WorkerDataArray* _scan_rs_claimed_cards; - WorkerDataArray* _scan_rs_skipped_cards; + WorkerDataArray* _scan_hr_scanned_cards; + WorkerDataArray* _scan_hr_scanned_blocks; + WorkerDataArray* _scan_hr_claimed_chunks; - WorkerDataArray* _opt_scan_rs_scanned_cards; - WorkerDataArray* _opt_scan_rs_claimed_cards; - WorkerDataArray* _opt_scan_rs_skipped_cards; - WorkerDataArray* _opt_scan_rs_scanned_opt_refs; - WorkerDataArray* _opt_scan_rs_used_memory; + WorkerDataArray* _opt_merge_rs_merged_sparse; + WorkerDataArray* _opt_merge_rs_merged_fine; + WorkerDataArray* _opt_merge_rs_merged_coarse; + + WorkerDataArray* _opt_scan_hr_scanned_cards; + WorkerDataArray* _opt_scan_hr_scanned_blocks; + WorkerDataArray* _opt_scan_hr_claimed_chunks; + WorkerDataArray* _opt_scan_hr_scanned_opt_refs; + WorkerDataArray* _opt_scan_hr_used_memory; WorkerDataArray* _obj_copy_lab_waste; WorkerDataArray* _obj_copy_lab_undo_waste; @@ -145,6 +161,9 @@ double _cur_string_deduplication_time_ms; + double _cur_merge_heap_roots_time_ms; + double _cur_optional_merge_heap_roots_time_ms; + double _cur_prepare_tlab_time_ms; double _cur_resize_tlab_time_ms; @@ -159,6 +178,8 @@ double _external_accounted_time_ms; + double _recorded_prepare_heap_roots_time_ms; + double _recorded_clear_claimed_marks_time_ms; double _recorded_young_cset_choice_time_ms; @@ -208,7 +229,8 @@ void trace_count(const char* name, size_t value) const; double print_pre_evacuate_collection_set() const; - double print_evacuate_collection_set() const; + double print_merge_heap_roots_time() const; + double print_evacuate_initial_collection_set() const; double print_evacuate_optional_collection_set() const; double print_post_evacuate_collection_set() const; void print_other(double accounted_ms) const; @@ -278,6 +300,14 @@ _cur_strong_code_root_purge_time_ms = ms; } + void record_merge_heap_roots_time(double ms) { + _cur_merge_heap_roots_time_ms += ms; + } + + void record_or_add_optional_merge_heap_roots_time(double ms) { + _cur_optional_merge_heap_roots_time_ms += ms; + } + void record_evac_fail_recalc_used_time(double ms) { _cur_evac_fail_recalc_used = ms; } @@ -357,6 +387,10 @@ _external_accounted_time_ms += time_ms; } + void record_prepare_heap_roots_time_ms(double recorded_prepare_heap_roots_time_ms) { + _recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms; + } + void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) { _recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms; } @@ -397,6 +431,10 @@ return _cur_fast_reclaim_humongous_time_ms; } + size_t fast_reclaim_humongous_candidates() const { + return _cur_fast_reclaim_humongous_candidates; + } + ReferenceProcessorPhaseTimes* ref_phase_times() { return &_ref_phase_times; } WeakProcessorPhaseTimes* weak_phase_times() { return &_weak_phase_times; } @@ -424,8 +462,10 @@ G1GCPhaseTimes* _phase_times; uint _worker_id; EventGCPhaseParallel _event; + bool _must_record; + public: - G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id); + G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id, bool must_record = true); virtual ~G1GCParPhaseTimesTracker(); }; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp --- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -39,8 +39,8 @@ } // After a collection pause, young list target length is updated. So we need to make sure we have enough regions in dram for young gen. -void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) { - G1Policy::record_collection_pause_end(pause_time_ms, cards_scanned, heap_used_bytes_before_gc); +void G1HeterogeneousHeapPolicy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) { + G1Policy::record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); _manager->adjust_dram_regions((uint)young_list_target_length(), G1CollectedHeap::heap()->workers()); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp --- a/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1HeterogeneousHeapPolicy.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,7 +38,7 @@ // initialize policy virtual void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set); // Record end of an evacuation pause. - virtual void record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc); + virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc); // Record the end of full collection. virtual void record_full_collection_end(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1ParScanThreadState.cpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -51,6 +51,7 @@ _tenuring_threshold(g1h->policy()->tenuring_threshold()), _scanner(g1h, this), _worker_id(worker_id), + _last_enqueued_card(SIZE_MAX), _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1), _stack_trim_lower_threshold(GCDrainStackTargetSize), _trim_ticks(), @@ -371,7 +372,7 @@ } size_t used_memory = pss->oops_into_optional_region(hr)->used_memory(); - _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanRS, worker_index, used_memory, G1GCPhaseTimes::ScanRSUsedMemory); + _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1ParScanThreadState.hpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -60,6 +60,10 @@ uint _worker_id; + // Remember the last enqueued card to avoid enqueuing the same card over and over; + // since we only ever scan a card once, this is sufficient. + size_t _last_enqueued_card; + // Upper and lower threshold to start and end work queue draining. uint const _stack_trim_upper_threshold; uint const _stack_trim_lower_threshold; @@ -128,8 +132,9 @@ } size_t card_index = ct()->index_for(p); // If the card hasn't been added to the buffer, do it. - if (ct()->mark_card_deferred(card_index)) { + if (_last_enqueued_card != card_index) { dirty_card_queue().enqueue(ct()->byte_for_index(card_index)); + _last_enqueued_card = card_index; } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1Policy.cpp --- a/src/hotspot/share/gc/g1/g1Policy.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1Policy.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -572,10 +572,24 @@ return result; } +double G1Policy::log_buffer_processing_time() const { + double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR); + size_t log_buffer_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + + phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); + // This may happen if there are duplicate cards in different log buffers. + if (log_buffer_dirty_cards > scan_heap_roots_cards) { + return all_cards_processing_time + average_time_ms(G1GCPhaseTimes::MergeLB); + } + return (all_cards_processing_time * log_buffer_dirty_cards / scan_heap_roots_cards) + average_time_ms(G1GCPhaseTimes::MergeLB); +} + // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1Policy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) { +void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) { + G1GCPhaseTimes* p = phase_times(); + double end_time_sec = os::elapsedTime(); assert_used_and_recalculate_used_equal(_g1h); @@ -645,29 +659,40 @@ _short_lived_surv_rate_group->start_adding_regions(); // Do that for any other surv rate groups - double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::ScanHCC) : 0.0; + double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0; if (update_stats) { - double cost_per_card_ms = 0.0; - if (_pending_cards > 0) { - cost_per_card_ms = (average_time_ms(G1GCPhaseTimes::UpdateRS)) / (double) _pending_cards; - _analytics->report_cost_per_card_ms(cost_per_card_ms); + double cost_per_log_buffer_entry = 0.0; + size_t const pending_log_buffer_entries = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + if (pending_log_buffer_entries > 0) { + cost_per_log_buffer_entry = log_buffer_processing_time() / pending_log_buffer_entries; + _analytics->report_cost_per_log_buffer_entry_ms(cost_per_log_buffer_entry); } _analytics->report_cost_scan_hcc(scan_hcc_time_ms); - double cost_per_entry_ms = 0.0; - if (cards_scanned > 10) { - double avg_time_scan_rs = average_time_ms(G1GCPhaseTimes::ScanRS); - if (this_pause_was_young_only) { - avg_time_scan_rs += average_time_ms(G1GCPhaseTimes::OptScanRS); - } - cost_per_entry_ms = avg_time_scan_rs / cards_scanned; - _analytics->report_cost_per_entry_ms(cost_per_entry_ms, this_pause_was_young_only); + size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + + p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); + size_t remset_cards_scanned = 0; + // There might have been duplicate log buffer entries in the queues which could + // increase this value beyond the cards scanned. In this case attribute all cards + // to the log buffers. + if (pending_log_buffer_entries <= total_cards_scanned) { + remset_cards_scanned = total_cards_scanned - pending_log_buffer_entries; + } + + double cost_per_remset_card_ms = 0.0; + if (remset_cards_scanned > 10) { + double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) * + remset_cards_scanned / total_cards_scanned) + + average_time_ms(G1GCPhaseTimes::MergeRS); + + cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned; + _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only); } if (_max_rs_lengths > 0) { double cards_per_entry_ratio = - (double) cards_scanned / (double) _max_rs_lengths; + (double) remset_cards_scanned / (double) _max_rs_lengths; _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only); } @@ -759,20 +784,26 @@ } // Note that _mmu_tracker->max_gc_time() returns the time in seconds. - double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; + double scan_log_buffer_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - if (update_rs_time_goal_ms < scan_hcc_time_ms) { + if (scan_log_buffer_time_goal_ms < scan_hcc_time_ms) { log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)." - "Update RS time goal: %1.2fms Scan HCC time: %1.2fms", - update_rs_time_goal_ms, scan_hcc_time_ms); + "Log Buffer Scan time goal: %1.2fms Scan HCC time: %1.2fms", + scan_log_buffer_time_goal_ms, scan_hcc_time_ms); - update_rs_time_goal_ms = 0; + scan_log_buffer_time_goal_ms = 0; } else { - update_rs_time_goal_ms -= scan_hcc_time_ms; + scan_log_buffer_time_goal_ms -= scan_hcc_time_ms; } - _g1h->concurrent_refine()->adjust(average_time_ms(G1GCPhaseTimes::UpdateRS), - phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), - update_rs_time_goal_ms); + + double const log_buffer_time = log_buffer_processing_time(); + + log_debug(gc, ergo, refine)("Concurrent refinement times: Log Buffer Scan time goal: %1.2fms Log Buffer Scan time: %1.2fms HCC time: %1.2fms", + scan_log_buffer_time_goal_ms, log_buffer_time, scan_hcc_time_ms); + + _g1h->concurrent_refine()->adjust(log_buffer_time, + phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBProcessedBuffers), + scan_log_buffer_time_goal_ms); } G1IHOPControl* G1Policy::create_ihop_control(const G1Predictions* predictor){ diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1Policy.hpp --- a/src/hotspot/share/gc/g1/g1Policy.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1Policy.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -111,6 +111,8 @@ bool should_update_surv_rate_group_predictors() { return collector_state()->in_young_only_phase() && !collector_state()->mark_or_rebuild_in_progress(); } + + double log_buffer_processing_time() const; public: const G1Predictions& predictor() const { return _predictor; } const G1Analytics* analytics() const { return const_cast(_analytics); } @@ -311,7 +313,7 @@ // Record the start and end of an evacuation pause. void record_collection_pause_start(double start_time_sec); - virtual void record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc); + virtual void record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc); // Record the start and end of a full collection. void record_full_collection_start(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1RemSet.cpp --- a/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,7 +38,8 @@ #include "gc/g1/g1SharedDirtyCardQueue.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionManager.inline.hpp" -#include "gc/g1/heapRegionRemSet.hpp" +#include "gc/g1/heapRegionRemSet.inline.hpp" +#include "gc/g1/sparsePRT.hpp" #include "gc/shared/gcTraceTime.inline.hpp" #include "gc/shared/suspendibleThreadSet.hpp" #include "jfr/jfrEvents.hpp" @@ -52,40 +53,453 @@ #include "utilities/stack.inline.hpp" #include "utilities/ticks.hpp" -// Collects information about the overall remembered set scan progress during an evacuation. +// Collects information about the overall heap root scan progress during an evacuation. +// +// Scanning the remembered sets works by first merging all sources of cards to be +// scanned (log buffers, hcc, remembered sets) into a single data structure to remove +// duplicates and simplify work distribution. +// +// During the following card scanning we not only scan this combined set of cards, but +// also remember that these were completely scanned. The following evacuation passes +// do not scan these cards again, and so need to be preserved across increments. +// +// The representation for all the cards to scan is the card table: cards can have +// one of three states during GC: +// - clean: these cards will not be scanned in this pass +// - dirty: these cards will be scanned in this pass +// - scanned: these cards have already been scanned in a previous pass +// +// After all evacuation is done, we reset the card table to clean. +// +// Work distribution occurs on "chunk" basis, i.e. contiguous ranges of cards. As an +// additional optimization, during card merging we remember which regions and which +// chunks actually contain cards to be scanned. Threads iterate only across these +// regions, and only compete for chunks containing any cards. +// +// Within these chunks, a worker scans the card table on "blocks" of cards, i.e. +// contiguous ranges of dirty cards to be scanned. These blocks are converted to actual +// memory ranges and then passed on to actual scanning. class G1RemSetScanState : public CHeapObj { + class G1DirtyRegions; + + size_t _max_regions; + + // Has this region that is part of the regions in the collection set been processed yet. + typedef bool G1RemsetIterState; + + G1RemsetIterState volatile* _collection_set_iter_state; + + // Card table iteration claim for each heap region, from 0 (completely unscanned) + // to (>=) HeapRegion::CardsPerRegion (completely scanned). + uint volatile* _card_table_scan_state; + + // Random power of two number of cards we want to claim per thread. This corresponds + // to a 64k of memory work chunk area for every thread. + // We use the same claim size as Parallel GC. No particular measurements have been + // performed to determine an optimal number. + static const uint CardsPerChunk = 128; + + uint _scan_chunks_per_region; + bool* _region_scan_chunks; + uint8_t _scan_chunks_shift; +public: + uint scan_chunk_size() const { return (uint)1 << _scan_chunks_shift; } + + // Returns whether the chunk corresponding to the given region/card in region contain a + // dirty card, i.e. actually needs scanning. + bool chunk_needs_scan(uint const region_idx, uint const card_in_region) const { + size_t const idx = (size_t)region_idx * _scan_chunks_per_region + (card_in_region >> _scan_chunks_shift); + assert(idx < (_max_regions * _scan_chunks_per_region), "Index " SIZE_FORMAT " out of bounds " SIZE_FORMAT, + idx, _max_regions * _scan_chunks_per_region); + return _region_scan_chunks[idx]; + } + private: + // The complete set of regions which card table needs to be cleared at the end of GC because + // we scribbled all over them. + G1DirtyRegions* _all_dirty_regions; + // The set of regions which card table needs to be scanned for new dirty cards + // in the current evacuation pass. + G1DirtyRegions* _next_dirty_regions; + + // Set of (unique) regions that can be added to concurrently. + class G1DirtyRegions : public CHeapObj { + uint* _buffer; + uint _cur_idx; + size_t _max_regions; + + bool* _contains; + + public: + G1DirtyRegions(size_t max_regions) : + _buffer(NEW_C_HEAP_ARRAY(uint, max_regions, mtGC)), + _cur_idx(0), + _max_regions(max_regions), + _contains(NEW_C_HEAP_ARRAY(bool, max_regions, mtGC)) { + + reset(); + } + + static size_t chunk_size() { return M; } + + ~G1DirtyRegions() { + FREE_C_HEAP_ARRAY(uint, _buffer); + FREE_C_HEAP_ARRAY(bool, _contains); + } + + void reset() { + _cur_idx = 0; + ::memset(_contains, false, _max_regions * sizeof(bool)); + } + + uint size() const { return _cur_idx; } + + uint at(uint idx) const { + assert(idx < _cur_idx, "Index %u beyond valid regions", idx); + return _buffer[idx]; + } + + void add_dirty_region(uint region) { + if (_contains[region]) { + return; + } + + bool marked_as_dirty = Atomic::cmpxchg(true, &_contains[region], false) == false; + if (marked_as_dirty) { + uint allocated = Atomic::add(1u, &_cur_idx) - 1; + _buffer[allocated] = region; + } + } + + // Creates the union of this and the other G1DirtyRegions. + void merge(const G1DirtyRegions* other) { + for (uint i = 0; i < other->size(); i++) { + uint region = other->at(i); + if (!_contains[region]) { + _buffer[_cur_idx++] = region; + _contains[region] = true; + } + } + } + }; + + // Returns whether the given region contains cards we need to scan. The remembered + // set and other sources may contain cards that + // - are in uncommitted regions + // - are located in the collection set + // - are located in free regions + // as we do not clean up remembered sets before merging heap roots. + bool contains_cards_to_process(uint const region_idx) const { + HeapRegion* hr = G1CollectedHeap::heap()->region_at_or_null(region_idx); + return (hr != NULL && !hr->in_collection_set() && hr->is_old_or_humongous_or_archive()); + } + + class G1MergeCardSetClosure : public HeapRegionClosure { + G1RemSetScanState* _scan_state; + G1CardTable* _ct; + + uint _merged_sparse; + uint _merged_fine; + uint _merged_coarse; + + // Returns if the region contains cards we need to scan. If so, remember that + // region in the current set of dirty regions. + bool remember_if_interesting(uint const region_idx) { + if (!_scan_state->contains_cards_to_process(region_idx)) { + return false; + } + _scan_state->add_dirty_region(region_idx); + return true; + } + public: + G1MergeCardSetClosure(G1RemSetScanState* scan_state) : + _scan_state(scan_state), + _ct(G1CollectedHeap::heap()->card_table()), + _merged_sparse(0), + _merged_fine(0), + _merged_coarse(0) { } + + void next_coarse_prt(uint const region_idx) { + if (!remember_if_interesting(region_idx)) { + return; + } + + _merged_coarse++; + + size_t region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; + _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion); + _scan_state->set_chunk_region_dirty(region_base_idx); + } + + void next_fine_prt(uint const region_idx, BitMap* bm) { + if (!remember_if_interesting(region_idx)) { + return; + } + + _merged_fine++; + + size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; + BitMap::idx_t cur = bm->get_next_one_offset(0); + while (cur != bm->size()) { + _ct->mark_clean_as_dirty(region_base_idx + cur); + _scan_state->set_chunk_dirty(region_base_idx + cur); + cur = bm->get_next_one_offset(cur + 1); + } + } + + void next_sparse_prt(uint const region_idx, SparsePRTEntry::card_elem_t* cards, uint const num_cards) { + if (!remember_if_interesting(region_idx)) { + return; + } + + _merged_sparse++; + + size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; + for (uint i = 0; i < num_cards; i++) { + size_t card_idx = region_base_idx + cards[i]; + _ct->mark_clean_as_dirty(card_idx); + _scan_state->set_chunk_dirty(card_idx); + } + } + + virtual bool do_heap_region(HeapRegion* r) { + assert(r->in_collection_set() || r->is_starts_humongous(), "must be"); + + HeapRegionRemSet* rem_set = r->rem_set(); + if (!rem_set->is_empty()) { + rem_set->iterate_prts(*this); + } + + return false; + } + + size_t merged_sparse() const { return _merged_sparse; } + size_t merged_fine() const { return _merged_fine; } + size_t merged_coarse() const { return _merged_coarse; } + }; + + // Visitor for the remembered sets of humongous candidate regions to merge their + // remembered set into the card table. + class G1FlushHumongousCandidateRemSets : public HeapRegionClosure { + G1MergeCardSetClosure _cl; + + public: + G1FlushHumongousCandidateRemSets(G1RemSetScanState* scan_state) : _cl(scan_state) { } + + virtual bool do_heap_region(HeapRegion* r) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + if (!r->is_starts_humongous() || + !g1h->region_attr(r->hrm_index()).is_humongous() || + r->rem_set()->is_empty()) { + return false; + } + + guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries), + "Found a not-small remembered set here. This is inconsistent with previous assumptions."); + + _cl.do_heap_region(r); + + // We should only clear the card based remembered set here as we will not + // implicitly rebuild anything else during eager reclaim. Note that at the moment + // (and probably never) we do not enter this path if there are other kind of + // remembered sets for this region. + r->rem_set()->clear_locked(true /* only_cardset */); + // Clear_locked() above sets the state to Empty. However we want to continue + // collecting remembered set entries for humongous regions that were not + // reclaimed. + r->rem_set()->set_state_complete(); +#ifdef ASSERT + G1HeapRegionAttr region_attr = g1h->region_attr(r->hrm_index()); + assert(region_attr.needs_remset_update(), "must be"); +#endif + assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); + + return false; + } + + size_t merged_sparse() const { return _cl.merged_sparse(); } + size_t merged_fine() const { return _cl.merged_fine(); } + size_t merged_coarse() const { return _cl.merged_coarse(); } + }; + + // Visitor for the log buffer entries to merge them into the card table. + class G1MergeLogBufferCardsClosure : public G1CardTableEntryClosure { + G1RemSetScanState* _scan_state; + G1CardTable* _ct; + + size_t _cards_dirty; + size_t _cards_skipped; + public: + G1MergeLogBufferCardsClosure(G1CollectedHeap* g1h, G1RemSetScanState* scan_state) : + _scan_state(scan_state), _ct(g1h->card_table()), _cards_dirty(0), _cards_skipped(0) + {} + + bool do_card_ptr(CardValue* card_ptr, uint worker_i) { + // The only time we care about recording cards that + // contain references that point into the collection set + // is during RSet updating within an evacuation pause. + // In this case worker_id should be the id of a GC worker thread. + assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); + + uint const region_idx = _ct->region_idx_for(card_ptr); + + // The second clause must come after - the log buffers might contain cards to uncommited + // regions. + // This code may count duplicate entries in the log buffers (even if rare) multiple + // times. + if (_scan_state->contains_cards_to_process(region_idx) && (*card_ptr == G1CardTable::dirty_card_val())) { + _scan_state->add_dirty_region(region_idx); + _scan_state->set_chunk_dirty(_ct->index_for_cardvalue(card_ptr)); + _cards_dirty++; + } else { + // We may have had dirty cards in the (initial) collection set (or the + // young regions which are always in the initial collection set). We do + // not fix their cards here: we already added these regions to the set of + // regions to clear the card table at the end during the prepare() phase. + _cards_skipped++; + } + return true; + } + + size_t cards_dirty() const { return _cards_dirty; } + size_t cards_skipped() const { return _cards_skipped; } + }; + + class G1MergeHeapRootsTask : public AbstractGangTask { + HeapRegionClaimer _hr_claimer; + G1RemSetScanState* _scan_state; + bool _remembered_set_only; + + G1GCPhaseTimes::GCParPhases _merge_phase; + + volatile bool _fast_reclaim_handled; + + public: + G1MergeHeapRootsTask(G1RemSetScanState* scan_state, uint num_workers, bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase) : + AbstractGangTask("G1 Merge Heap Roots"), + _hr_claimer(num_workers), + _scan_state(scan_state), + _remembered_set_only(remembered_set_only), + _merge_phase(merge_phase), + _fast_reclaim_handled(false) { } + + virtual void work(uint worker_id) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1GCPhaseTimes* p = g1h->phase_times(); + + // We schedule flushing the remembered sets of humongous fast reclaim candidates + // onto the card table first to allow the remaining parallelized tasks hide it. + if (!_remembered_set_only && + p->fast_reclaim_humongous_candidates() > 0 && + !_fast_reclaim_handled && + !Atomic::cmpxchg(true, &_fast_reclaim_handled, false)) { + + G1FlushHumongousCandidateRemSets cl(_scan_state); + g1h->heap_region_iterate(&cl); + + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + } + + // Merge remembered sets of current candidates. + { + G1GCParPhaseTimesTracker x(p, _merge_phase, worker_id, !_remembered_set_only /* must_record */); + G1MergeCardSetClosure cl(_scan_state); + g1h->collection_set_iterate_increment_from(&cl, &_hr_claimer, worker_id); + + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); + p->record_or_add_thread_work_item(_merge_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + } + + // Apply closure to log entries in the HCC. + if (!_remembered_set_only && G1HotCardCache::default_use_cache()) { + assert(_merge_phase == G1GCPhaseTimes::MergeRS, "Wrong merge phase"); + G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeHCC, worker_id); + G1MergeLogBufferCardsClosure cl(g1h, _scan_state); + g1h->iterate_hcc_closure(&cl, worker_id); + } + + // Now apply the closure to all remaining log entries. + if (!_remembered_set_only) { + assert(_merge_phase == G1GCPhaseTimes::MergeRS, "Wrong merge phase"); + G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeLB, worker_id); + + G1MergeLogBufferCardsClosure cl(g1h, _scan_state); + g1h->iterate_dirty_card_closure(&cl, worker_id); + + p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards); + p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards); + } + } + }; + + // Creates a snapshot of the current _top values at the start of collection to + // filter out card marks that we do not want to scan. + class G1ResetScanTopClosure : public HeapRegionClosure { + G1RemSetScanState* _scan_state; + + public: + G1ResetScanTopClosure(G1RemSetScanState* scan_state) : _scan_state(scan_state) { } + + virtual bool do_heap_region(HeapRegion* r) { + uint hrm_index = r->hrm_index(); + if (r->in_collection_set()) { + // Young regions had their card table marked as young at their allocation; + // we need to make sure that these marks are cleared at the end of GC, *but* + // they should not be scanned for cards. + // So directly add them to the "all_dirty_regions". + // Same for regions in the (initial) collection set: they may contain cards from + // the log buffers, make sure they are cleaned. + _scan_state->add_all_dirty_region(hrm_index); + } else if (r->is_old_or_humongous_or_archive()) { + _scan_state->set_scan_top(hrm_index, r->top()); + } + return false; + } + }; + // For each region, contains the maximum top() value to be used during this garbage + // collection. Subsumes common checks like filtering out everything but old and + // humongous regions outside the collection set. + // This is valid because we are not interested in scanning stray remembered set + // entries from free or archive regions. + HeapWord** _scan_top; + class G1ClearCardTableTask : public AbstractGangTask { G1CollectedHeap* _g1h; - uint* _dirty_region_list; - size_t _num_dirty_regions; - size_t _chunk_length; + G1DirtyRegions* _regions; + uint _chunk_length; - size_t volatile _cur_dirty_regions; + uint volatile _cur_dirty_regions; + + G1RemSetScanState* _scan_state; + public: G1ClearCardTableTask(G1CollectedHeap* g1h, - uint* dirty_region_list, - size_t num_dirty_regions, - size_t chunk_length) : + G1DirtyRegions* regions, + uint chunk_length, + G1RemSetScanState* scan_state) : AbstractGangTask("G1 Clear Card Table Task"), _g1h(g1h), - _dirty_region_list(dirty_region_list), - _num_dirty_regions(num_dirty_regions), + _regions(regions), _chunk_length(chunk_length), - _cur_dirty_regions(0) { + _cur_dirty_regions(0), + _scan_state(scan_state) { assert(chunk_length > 0, "must be"); } - static size_t chunk_size() { return M; } + static uint chunk_size() { return M; } void work(uint worker_id) { - while (_cur_dirty_regions < _num_dirty_regions) { - size_t next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length; - size_t max = MIN2(next + _chunk_length, _num_dirty_regions); + while (_cur_dirty_regions < _regions->size()) { + uint next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length; + uint max = MIN2(next + _chunk_length, _regions->size()); - for (size_t i = next; i < max; i++) { - HeapRegion* r = _g1h->region_at(_dirty_region_list[i]); + for (uint i = next; i < max; i++) { + HeapRegion* r = _g1h->region_at(_regions->at(i)); if (!r->is_survivor()) { r->clear_cardtable(); } @@ -94,159 +508,222 @@ } }; - size_t _max_regions; - - // Scan progress for the remembered set of a single region. Transitions from - // Unclaimed -> Claimed -> Complete. - // At each of the transitions the thread that does the transition needs to perform - // some special action once. This is the reason for the extra "Claimed" state. - typedef jint G1RemsetIterState; - - static const G1RemsetIterState Unclaimed = 0; // The remembered set has not been scanned yet. - static const G1RemsetIterState Claimed = 1; // The remembered set is currently being scanned. - static const G1RemsetIterState Complete = 2; // The remembered set has been completely scanned. + // Clear the card table of "dirty" regions. + void clear_card_table(WorkGang* workers) { + uint num_regions = _all_dirty_regions->size(); - G1RemsetIterState volatile* _iter_states; - // The current location where the next thread should continue scanning in a region's - // remembered set. - size_t volatile* _iter_claims; + if (num_regions == 0) { + return; + } - // Temporary buffer holding the regions we used to store remembered set scan duplicate - // information. These are also called "dirty". Valid entries are from [0.._cur_dirty_region) - uint* _dirty_region_buffer; - - // Flag for every region whether it is in the _dirty_region_buffer already - // to avoid duplicates. - bool volatile* _in_dirty_region_buffer; - size_t _cur_dirty_region; + uint const num_chunks = (uint)(align_up((size_t)num_regions << HeapRegion::LogCardsPerRegion, G1ClearCardTableTask::chunk_size()) / G1ClearCardTableTask::chunk_size()); + uint const num_workers = MIN2(num_chunks, workers->active_workers()); + uint const chunk_length = G1ClearCardTableTask::chunk_size() / (uint)HeapRegion::CardsPerRegion; - // Creates a snapshot of the current _top values at the start of collection to - // filter out card marks that we do not want to scan. - class G1ResetScanTopClosure : public HeapRegionClosure { - private: - HeapWord** _scan_top; - public: - G1ResetScanTopClosure(HeapWord** scan_top) : _scan_top(scan_top) { } + // Iterate over the dirty cards region list. + G1ClearCardTableTask cl(G1CollectedHeap::heap(), _all_dirty_regions, chunk_length, this); - virtual bool do_heap_region(HeapRegion* r) { - uint hrm_index = r->hrm_index(); - if (!r->in_collection_set() && r->is_old_or_humongous_or_archive() && !r->is_empty()) { - _scan_top[hrm_index] = r->top(); - } else { - _scan_top[hrm_index] = NULL; - } - return false; - } - }; + log_debug(gc, ergo)("Running %s using %u workers for %u " + "units of work for %u regions.", + cl.name(), num_workers, num_chunks, num_regions); + workers->run_task(&cl, num_workers); - // For each region, contains the maximum top() value to be used during this garbage - // collection. Subsumes common checks like filtering out everything but old and - // humongous regions outside the collection set. - // This is valid because we are not interested in scanning stray remembered set - // entries from free or archive regions. - HeapWord** _scan_top; +#ifndef PRODUCT + G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup(); +#endif + } + public: G1RemSetScanState() : _max_regions(0), - _iter_states(NULL), - _iter_claims(NULL), - _dirty_region_buffer(NULL), - _in_dirty_region_buffer(NULL), - _cur_dirty_region(0), + _collection_set_iter_state(NULL), + _card_table_scan_state(NULL), + _scan_chunks_per_region((uint)(HeapRegion::CardsPerRegion / CardsPerChunk)), + _region_scan_chunks(NULL), + _scan_chunks_shift(0), + _all_dirty_regions(NULL), + _next_dirty_regions(NULL), _scan_top(NULL) { } ~G1RemSetScanState() { - if (_iter_states != NULL) { - FREE_C_HEAP_ARRAY(G1RemsetIterState, _iter_states); - } - if (_iter_claims != NULL) { - FREE_C_HEAP_ARRAY(size_t, _iter_claims); - } - if (_dirty_region_buffer != NULL) { - FREE_C_HEAP_ARRAY(uint, _dirty_region_buffer); - } - if (_in_dirty_region_buffer != NULL) { - FREE_C_HEAP_ARRAY(bool, _in_dirty_region_buffer); - } - if (_scan_top != NULL) { - FREE_C_HEAP_ARRAY(HeapWord*, _scan_top); - } + FREE_C_HEAP_ARRAY(G1RemsetIterState, _collection_set_iter_state); + FREE_C_HEAP_ARRAY(uint, _card_table_scan_state); + FREE_C_HEAP_ARRAY(bool, _region_scan_chunks); + FREE_C_HEAP_ARRAY(HeapWord*, _scan_top); } - void initialize(uint max_regions) { - assert(_iter_states == NULL, "Must not be initialized twice"); - assert(_iter_claims == NULL, "Must not be initialized twice"); + void initialize(size_t max_regions) { + assert(_collection_set_iter_state == NULL, "Must not be initialized twice"); _max_regions = max_regions; - _iter_states = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC); - _iter_claims = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); - _dirty_region_buffer = NEW_C_HEAP_ARRAY(uint, max_regions, mtGC); - _in_dirty_region_buffer = NEW_C_HEAP_ARRAY(bool, max_regions, mtGC); + _collection_set_iter_state = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC); + _card_table_scan_state = NEW_C_HEAP_ARRAY(uint, max_regions, mtGC); + _region_scan_chunks = NEW_C_HEAP_ARRAY(bool, max_regions * _scan_chunks_per_region, mtGC); + + _scan_chunks_shift = (uint8_t)log2_intptr(HeapRegion::CardsPerRegion / _scan_chunks_per_region); _scan_top = NEW_C_HEAP_ARRAY(HeapWord*, max_regions, mtGC); } - void reset() { - for (uint i = 0; i < _max_regions; i++) { - _iter_states[i] = Unclaimed; - clear_scan_top(i); + void prepare() { + for (size_t i = 0; i < _max_regions; i++) { + _collection_set_iter_state[i] = false; + clear_scan_top((uint)i); } - G1ResetScanTopClosure cl(_scan_top); + _all_dirty_regions = new G1DirtyRegions(_max_regions); + + G1ResetScanTopClosure cl(this); G1CollectedHeap::heap()->heap_region_iterate(&cl); - memset((void*)_iter_claims, 0, _max_regions * sizeof(size_t)); - memset((void*)_in_dirty_region_buffer, false, _max_regions * sizeof(bool)); - _cur_dirty_region = 0; + _next_dirty_regions = new G1DirtyRegions(_max_regions); } - // Attempt to claim the remembered set of the region for iteration. Returns true - // if this call caused the transition from Unclaimed to Claimed. - inline bool claim_iter(uint region) { - assert(region < _max_regions, "Tried to access invalid region %u", region); - if (_iter_states[region] != Unclaimed) { - return false; + void print_merge_heap_roots_stats() { + size_t num_scan_chunks = 0; + for (uint i = 0; i < _max_regions * _scan_chunks_per_region; i++) { + if (_region_scan_chunks[i]) { + num_scan_chunks++; + } } - G1RemsetIterState res = Atomic::cmpxchg(Claimed, &_iter_states[region], Unclaimed); - return (res == Unclaimed); + size_t num_visited_cards = num_scan_chunks * CardsPerChunk; + size_t total_dirty_region_cards = _next_dirty_regions->size() * HeapRegion::CardsPerRegion; + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + size_t total_old_region_cards = + (g1h->num_regions() - (g1h->num_free_regions() - g1h->collection_set()->cur_length())) * HeapRegion::CardsPerRegion; + + log_debug(gc,remset)("Visited cards " SIZE_FORMAT " Total dirty " SIZE_FORMAT " (%.2lf%%) Total old " SIZE_FORMAT " (%.2lf%%)", + num_visited_cards, + total_dirty_region_cards, + percent_of(num_visited_cards, total_dirty_region_cards), + total_old_region_cards, + percent_of(num_visited_cards, total_old_region_cards)); } - // Try to atomically sets the iteration state to "complete". Returns true for the - // thread that caused the transition. - inline bool set_iter_complete(uint region) { - if (iter_is_complete(region)) { - return false; + void merge_heap_roots(WorkGang* workers, bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase) { + { + _all_dirty_regions->merge(_next_dirty_regions); + _next_dirty_regions->reset(); + for (size_t i = 0; i < _max_regions; i++) { + _card_table_scan_state[i] = 0; + } + + ::memset(_region_scan_chunks, false, _max_regions * _scan_chunks_per_region * sizeof(*_region_scan_chunks)); } - G1RemsetIterState res = Atomic::cmpxchg(Complete, &_iter_states[region], Claimed); - return (res == Claimed); + + size_t const increment_length = G1CollectedHeap::heap()->collection_set()->increment_length(); + + uint const num_workers = !remembered_set_only ? workers->active_workers() : + MIN2(workers->active_workers(), (uint)increment_length); + + { + G1MergeHeapRootsTask cl(this, num_workers, remembered_set_only, merge_phase); + log_debug(gc, ergo)("Running %s using %u workers for " SIZE_FORMAT " regions", + cl.name(), num_workers, increment_length); + workers->run_task(&cl, num_workers); + } + + if (log_is_enabled(Debug, gc, remset)) { + print_merge_heap_roots_stats(); + } } - // Returns true if the region's iteration is complete. - inline bool iter_is_complete(uint region) const { - assert(region < _max_regions, "Tried to access invalid region %u", region); - return _iter_states[region] == Complete; + void set_chunk_region_dirty(size_t const region_card_idx) { + size_t chunk_idx = region_card_idx >> _scan_chunks_shift; + for (uint i = 0; i < _scan_chunks_per_region; i++) { + _region_scan_chunks[chunk_idx++] = true; + } + } + + void set_chunk_dirty(size_t const card_idx) { + assert((card_idx >> _scan_chunks_shift) < (_max_regions * _scan_chunks_per_region), + "Trying to access index " SIZE_FORMAT " out of bounds " SIZE_FORMAT, + card_idx >> _scan_chunks_shift, _max_regions * _scan_chunks_per_region); + size_t const chunk_idx = card_idx >> _scan_chunks_shift; + if (!_region_scan_chunks[chunk_idx]) { + _region_scan_chunks[chunk_idx] = true; + } } - // The current position within the remembered set of the given region. - inline size_t iter_claimed(uint region) const { - assert(region < _max_regions, "Tried to access invalid region %u", region); - return _iter_claims[region]; + void cleanup(WorkGang* workers) { + _all_dirty_regions->merge(_next_dirty_regions); + + clear_card_table(workers); + + delete _all_dirty_regions; + _all_dirty_regions = NULL; + + delete _next_dirty_regions; + _next_dirty_regions = NULL; } - // Claim the next block of cards within the remembered set of the region with - // step size. - inline size_t iter_claimed_next(uint region, size_t step) { - return Atomic::add(step, &_iter_claims[region]) - step; - } + void iterate_dirty_regions_from(HeapRegionClosure* cl, uint worker_id) { + uint num_regions = _next_dirty_regions->size(); - void add_dirty_region(uint region) { - if (_in_dirty_region_buffer[region]) { + if (num_regions == 0) { return; } - if (!Atomic::cmpxchg(true, &_in_dirty_region_buffer[region], false)) { - size_t allocated = Atomic::add(1u, &_cur_dirty_region) - 1; - _dirty_region_buffer[allocated] = region; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + WorkGang* workers = g1h->workers(); + uint const max_workers = workers->active_workers(); + + uint const start_pos = num_regions * worker_id / max_workers; + uint cur = start_pos; + + do { + bool result = cl->do_heap_region(g1h->region_at(_next_dirty_regions->at(cur))); + guarantee(!result, "Not allowed to ask for early termination."); + cur++; + if (cur == _next_dirty_regions->size()) { + cur = 0; + } + } while (cur != start_pos); + } + + // Attempt to claim the given region in the collection set for iteration. Returns true + // if this call caused the transition from Unclaimed to Claimed. + inline bool claim_collection_set_region(uint region) { + assert(region < _max_regions, "Tried to access invalid region %u", region); + if (_collection_set_iter_state[region]) { + return false; } + return !Atomic::cmpxchg(true, &_collection_set_iter_state[region], false); + } + + bool has_cards_to_scan(uint region) { + assert(region < _max_regions, "Tried to access invalid region %u", region); + return _card_table_scan_state[region] < HeapRegion::CardsPerRegion; + } + + uint claim_cards_to_scan(uint region, uint increment) { + assert(region < _max_regions, "Tried to access invalid region %u", region); + return Atomic::add(increment, &_card_table_scan_state[region]) - increment; + } + + void add_dirty_region(uint const region) { +#ifdef ASSERT + HeapRegion* hr = G1CollectedHeap::heap()->region_at(region); + assert(!hr->in_collection_set() && hr->is_old_or_humongous_or_archive(), + "Region %u is not suitable for scanning, is %sin collection set or %s", + hr->hrm_index(), hr->in_collection_set() ? "" : "not ", hr->get_short_type_str()); +#endif + _next_dirty_regions->add_dirty_region(region); + } + + void add_all_dirty_region(uint region) { +#ifdef ASSERT + HeapRegion* hr = G1CollectedHeap::heap()->region_at(region); + assert(hr->in_collection_set(), + "Only add young regions to all dirty regions directly but %u is %s", + hr->hrm_index(), hr->get_short_type_str()); +#endif + _all_dirty_regions->add_dirty_region(region); + } + + void set_scan_top(uint region_idx, HeapWord* value) { + _scan_top[region_idx] = value; } HeapWord* scan_top(uint region_idx) const { @@ -254,30 +731,7 @@ } void clear_scan_top(uint region_idx) { - _scan_top[region_idx] = NULL; - } - - // Clear the card table of "dirty" regions. - void clear_card_table(WorkGang* workers) { - if (_cur_dirty_region == 0) { - return; - } - - size_t const num_chunks = align_up(_cur_dirty_region * HeapRegion::CardsPerRegion, G1ClearCardTableTask::chunk_size()) / G1ClearCardTableTask::chunk_size(); - uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); - size_t const chunk_length = G1ClearCardTableTask::chunk_size() / HeapRegion::CardsPerRegion; - - // Iterate over the dirty cards region list. - G1ClearCardTableTask cl(G1CollectedHeap::heap(), _dirty_region_buffer, _cur_dirty_region, chunk_length); - - log_debug(gc, ergo)("Running %s using %u workers for " SIZE_FORMAT " " - "units of work for " SIZE_FORMAT " regions.", - cl.name(), num_workers, num_chunks, _cur_dirty_region); - workers->run_task(&cl, num_workers); - -#ifndef PRODUCT - G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup(); -#endif + set_scan_top(region_idx, NULL); } }; @@ -294,9 +748,7 @@ } G1RemSet::~G1RemSet() { - if (_scan_state != NULL) { - delete _scan_state; - } + delete _scan_state; } uint G1RemSet::num_par_rem_sets() { @@ -308,181 +760,252 @@ _scan_state->initialize(max_regions); } -class G1ScanRSForRegionClosure : public HeapRegionClosure { +// Helper class to scan and detect ranges of cards that need to be scanned on the +// card table. +class G1CardTableScanner : public StackObj { +public: + typedef CardTable::CardValue CardValue; + +private: + CardValue* const _base_addr; + + CardValue* _cur_addr; + CardValue* const _end_addr; + + static const size_t ToScanMask = G1CardTable::g1_card_already_scanned; + static const size_t ExpandedToScanMask = G1CardTable::WordAlreadyScanned; + + bool cur_addr_aligned() const { + return ((uintptr_t)_cur_addr) % sizeof(size_t) == 0; + } + + bool cur_card_is_dirty() const { + CardValue value = *_cur_addr; + return (value & ToScanMask) == 0; + } + + bool cur_word_of_cards_contains_any_dirty_card() const { + assert(cur_addr_aligned(), "Current address should be aligned"); + size_t const value = *(size_t*)_cur_addr; + return (~value & ExpandedToScanMask) != 0; + } + + bool cur_word_of_cards_all_dirty_cards() const { + size_t const value = *(size_t*)_cur_addr; + return value == G1CardTable::WordAllDirty; + } + + size_t get_and_advance_pos() { + _cur_addr++; + return pointer_delta(_cur_addr, _base_addr, sizeof(CardValue)) - 1; + } + +public: + G1CardTableScanner(CardValue* start_card, size_t size) : + _base_addr(start_card), + _cur_addr(start_card), + _end_addr(start_card + size) { + + assert(is_aligned(start_card, sizeof(size_t)), "Unaligned start addr " PTR_FORMAT, p2i(start_card)); + assert(is_aligned(size, sizeof(size_t)), "Unaligned size " SIZE_FORMAT, size); + } + + size_t find_next_dirty() { + while (!cur_addr_aligned()) { + if (cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + + assert(cur_addr_aligned(), "Current address should be aligned now."); + while (_cur_addr != _end_addr) { + if (cur_word_of_cards_contains_any_dirty_card()) { + for (size_t i = 0; i < sizeof(size_t); i++) { + if (cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + assert(false, "Should not reach here given we detected a dirty card in the word."); + } + _cur_addr += sizeof(size_t); + } + return get_and_advance_pos(); + } + + size_t find_next_non_dirty() { + assert(_cur_addr <= _end_addr, "Not allowed to search for marks after area."); + + while (!cur_addr_aligned()) { + if (!cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + + assert(cur_addr_aligned(), "Current address should be aligned now."); + while (_cur_addr != _end_addr) { + if (!cur_word_of_cards_all_dirty_cards()) { + for (size_t i = 0; i < sizeof(size_t); i++) { + if (!cur_card_is_dirty()) { + return get_and_advance_pos(); + } + _cur_addr++; + } + assert(false, "Should not reach here given we detected a non-dirty card in the word."); + } + _cur_addr += sizeof(size_t); + } + return get_and_advance_pos(); + } +}; + +// Helper class to claim dirty chunks within the card table. +class G1CardTableChunkClaimer { + G1RemSetScanState* _scan_state; + uint _region_idx; + uint _cur_claim; + +public: + G1CardTableChunkClaimer(G1RemSetScanState* scan_state, uint region_idx) : + _scan_state(scan_state), + _region_idx(region_idx), + _cur_claim(0) { + guarantee(size() <= HeapRegion::CardsPerRegion, "Should not claim more space than possible."); + } + + bool has_next() { + while (true) { + _cur_claim = _scan_state->claim_cards_to_scan(_region_idx, size()); + if (_cur_claim >= HeapRegion::CardsPerRegion) { + return false; + } + if (_scan_state->chunk_needs_scan(_region_idx, _cur_claim)) { + return true; + } + } + } + + uint value() const { return _cur_claim; } + uint size() const { return _scan_state->scan_chunk_size(); } +}; + +// Scans a heap region for dirty cards. +class G1ScanHRForRegionClosure : public HeapRegionClosure { G1CollectedHeap* _g1h; - G1CardTable *_ct; + G1CardTable* _ct; + G1BlockOffsetTable* _bot; G1ParScanThreadState* _pss; - G1ScanCardClosure* _scan_objs_on_card_cl; G1RemSetScanState* _scan_state; G1GCPhaseTimes::GCParPhases _phase; - uint _worker_i; - - size_t _opt_refs_scanned; - size_t _opt_refs_memory_used; + uint _worker_id; size_t _cards_scanned; - size_t _cards_claimed; - size_t _cards_skipped; + size_t _blocks_scanned; + size_t _chunks_claimed; Tickspan _rem_set_root_scan_time; Tickspan _rem_set_trim_partially_time; - Tickspan _strong_code_root_scan_time; - Tickspan _strong_code_trim_partially_time; - - void claim_card(size_t card_index, const uint region_idx_for_card) { - _ct->set_card_claimed(card_index); - _scan_state->add_dirty_region(region_idx_for_card); - } - - void scan_card(MemRegion mr, uint region_idx_for_card) { + void scan_memregion(uint region_idx_for_card, MemRegion mr) { HeapRegion* const card_region = _g1h->region_at(region_idx_for_card); - assert(!card_region->is_young(), "Should not scan card in young region %u", region_idx_for_card); - card_region->oops_on_card_seq_iterate_careful(mr, _scan_objs_on_card_cl); - _scan_objs_on_card_cl->trim_queue_partially(); - _cards_scanned++; + G1ScanCardClosure card_cl(_g1h, _pss); + card_region->oops_on_card_seq_iterate_careful(mr, &card_cl); + _pss->trim_queue_partially(); } - void scan_opt_rem_set_roots(HeapRegion* r) { - EventGCPhaseParallel event; - - G1OopStarChunkedList* opt_rem_set_list = _pss->oops_into_optional_region(r); - - G1ScanCardClosure scan_cl(_g1h, _pss); - G1ScanRSForOptionalClosure cl(_g1h, &scan_cl); - _opt_refs_scanned += opt_rem_set_list->oops_do(&cl, _pss->closures()->raw_strong_oops()); - _opt_refs_memory_used += opt_rem_set_list->used_memory(); - - event.commit(GCId::current(), _worker_i, G1GCPhaseTimes::phase_name(_phase)); - } - - void scan_rem_set_roots(HeapRegion* r) { - EventGCPhaseParallel event; - uint const region_idx = r->hrm_index(); - - if (_scan_state->claim_iter(region_idx)) { - // If we ever free the collection set concurrently, we should also - // clear the card table concurrently therefore we won't need to - // add regions of the collection set to the dirty cards region. - _scan_state->add_dirty_region(region_idx); - } - - if (r->rem_set()->cardset_is_empty()) { + void do_claimed_block(uint const region_idx_for_card, size_t const first_card, size_t const num_cards) { + HeapWord* const card_start = _bot->address_for_index_raw(first_card); +#ifdef ASSERT + HeapRegion* hr = _g1h->region_at_or_null(region_idx_for_card); + assert(hr == NULL || hr->is_in_reserved(card_start), + "Card start " PTR_FORMAT " to scan outside of region %u", p2i(card_start), _g1h->region_at(region_idx_for_card)->hrm_index()); +#endif + HeapWord* const top = _scan_state->scan_top(region_idx_for_card); + if (card_start >= top) { return; } - // We claim cards in blocks so as to reduce the contention. - size_t const block_size = G1RSetScanBlockSize; - - HeapRegionRemSetIterator iter(r->rem_set()); - size_t card_index; - - size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, block_size); - for (size_t current_card = 0; iter.has_next(card_index); current_card++) { - if (current_card >= claimed_card_block + block_size) { - claimed_card_block = _scan_state->iter_claimed_next(region_idx, block_size); - } - if (current_card < claimed_card_block) { - _cards_skipped++; - continue; - } - _cards_claimed++; - - HeapWord* const card_start = _g1h->bot()->address_for_index_raw(card_index); - uint const region_idx_for_card = _g1h->addr_to_region(card_start); + MemRegion mr(card_start, MIN2(card_start + ((size_t)num_cards << BOTConstants::LogN_words), top)); + scan_memregion(region_idx_for_card, mr); -#ifdef ASSERT - HeapRegion* hr = _g1h->region_at_or_null(region_idx_for_card); - assert(hr == NULL || hr->is_in_reserved(card_start), - "Card start " PTR_FORMAT " to scan outside of region %u", p2i(card_start), _g1h->region_at(region_idx_for_card)->hrm_index()); -#endif - HeapWord* const top = _scan_state->scan_top(region_idx_for_card); - if (card_start >= top) { - continue; - } + _cards_scanned += num_cards; + } - // If the card is dirty, then G1 will scan it during Update RS. - if (_ct->is_card_claimed(card_index) || _ct->is_card_dirty(card_index)) { - continue; - } - - // We claim lazily (so races are possible but they're benign), which reduces the - // number of duplicate scans (the rsets of the regions in the cset can intersect). - // Claim the card after checking bounds above: the remembered set may contain - // random cards into current survivor, and we would then have an incorrectly - // claimed card in survivor space. Card table clear does not reset the card table - // of survivor space regions. - claim_card(card_index, region_idx_for_card); - - MemRegion const mr(card_start, MIN2(card_start + BOTConstants::N_words, top)); - - scan_card(mr, region_idx_for_card); - } - event.commit(GCId::current(), _worker_i, G1GCPhaseTimes::phase_name(_phase)); + ALWAYSINLINE void do_card_block(uint const region_idx, size_t const first_card, size_t const num_cards) { + _ct->mark_as_scanned(first_card, num_cards); + do_claimed_block(region_idx, first_card, num_cards); + _blocks_scanned++; } - void scan_strong_code_roots(HeapRegion* r) { + void scan_heap_roots(HeapRegion* r) { EventGCPhaseParallel event; - // We pass a weak code blobs closure to the remembered set scanning because we want to avoid - // treating the nmethods visited to act as roots for concurrent marking. - // We only want to make sure that the oops in the nmethods are adjusted with regard to the - // objects copied by the current evacuation. - r->strong_code_roots_do(_pss->closures()->weak_codeblobs()); - event.commit(GCId::current(), _worker_i, G1GCPhaseTimes::phase_name(G1GCPhaseTimes::CodeRoots)); + uint const region_idx = r->hrm_index(); + + ResourceMark rm; + + G1CardTableChunkClaimer claim(_scan_state, region_idx); + + while (claim.has_next()) { + size_t const region_card_base_idx = ((size_t)region_idx << HeapRegion::LogCardsPerRegion) + claim.value(); + CardTable::CardValue* const base_addr = _ct->byte_for_index(region_card_base_idx); + + G1CardTableScanner scan(base_addr, claim.size()); + + size_t first_scan_idx = scan.find_next_dirty(); + while (first_scan_idx != claim.size()) { + assert(*_ct->byte_for_index(region_card_base_idx + first_scan_idx) <= 0x1, "is %d at region %u idx " SIZE_FORMAT, *_ct->byte_for_index(region_card_base_idx + first_scan_idx), region_idx, first_scan_idx); + + size_t const last_scan_idx = scan.find_next_non_dirty(); + size_t const len = last_scan_idx - first_scan_idx; + + do_card_block(region_idx, region_card_base_idx + first_scan_idx, len); + + if (last_scan_idx == claim.size()) { + break; + } + + first_scan_idx = scan.find_next_dirty(); + } + _chunks_claimed++; + } + + event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(G1GCPhaseTimes::ScanHR)); } public: - G1ScanRSForRegionClosure(G1RemSetScanState* scan_state, - G1ScanCardClosure* scan_obj_on_card, + G1ScanHRForRegionClosure(G1RemSetScanState* scan_state, G1ParScanThreadState* pss, - G1GCPhaseTimes::GCParPhases phase, - uint worker_i) : + uint worker_id, + G1GCPhaseTimes::GCParPhases phase) : _g1h(G1CollectedHeap::heap()), _ct(_g1h->card_table()), + _bot(_g1h->bot()), _pss(pss), - _scan_objs_on_card_cl(scan_obj_on_card), _scan_state(scan_state), _phase(phase), - _worker_i(worker_i), - _opt_refs_scanned(0), - _opt_refs_memory_used(0), + _worker_id(worker_id), _cards_scanned(0), - _cards_claimed(0), - _cards_skipped(0), + _blocks_scanned(0), + _chunks_claimed(0), _rem_set_root_scan_time(), - _rem_set_trim_partially_time(), - _strong_code_root_scan_time(), - _strong_code_trim_partially_time() { } + _rem_set_trim_partially_time() { + } bool do_heap_region(HeapRegion* r) { - assert(r->in_collection_set(), "Region %u is not in the collection set.", r->hrm_index()); + assert(!r->in_collection_set() && r->is_old_or_humongous_or_archive(), + "Should only be called on old gen non-collection set regions but region %u is not.", + r->hrm_index()); uint const region_idx = r->hrm_index(); - // The individual references for the optional remembered set are per-worker, so we - // always need to scan them. - if (r->has_index_in_opt_cset()) { + if (_scan_state->has_cards_to_scan(region_idx)) { G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_root_scan_time, _rem_set_trim_partially_time); - scan_opt_rem_set_roots(r); - } - - // Do an early out if we know we are complete. - if (_scan_state->iter_is_complete(region_idx)) { - return false; - } - - { - G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_root_scan_time, _rem_set_trim_partially_time); - scan_rem_set_roots(r); - } - - if (_scan_state->set_iter_complete(region_idx)) { - G1EvacPhaseWithTrimTimeTracker timer(_pss, _strong_code_root_scan_time, _strong_code_trim_partially_time); - // Scan the strong code root list attached to the current region - scan_strong_code_roots(r); + scan_heap_roots(r); } return false; } @@ -490,120 +1013,156 @@ Tickspan rem_set_root_scan_time() const { return _rem_set_root_scan_time; } Tickspan rem_set_trim_partially_time() const { return _rem_set_trim_partially_time; } + size_t cards_scanned() const { return _cards_scanned; } + size_t blocks_scanned() const { return _blocks_scanned; } + size_t chunks_claimed() const { return _chunks_claimed; } +}; + +void G1RemSet::scan_heap_roots(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase) { + G1ScanHRForRegionClosure cl(_scan_state, pss, worker_id, scan_phase); + _scan_state->iterate_dirty_regions_from(&cl, worker_id); + + G1GCPhaseTimes* p = _g1p->phase_times(); + + p->record_or_add_time_secs(objcopy_phase, worker_id, cl.rem_set_trim_partially_time().seconds()); + + p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_root_scan_time().seconds()); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.cards_scanned(), G1GCPhaseTimes::ScanHRScannedCards); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.blocks_scanned(), G1GCPhaseTimes::ScanHRScannedBlocks); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.chunks_claimed(), G1GCPhaseTimes::ScanHRClaimedChunks); +} + +// Heap region closure to be applied to all regions in the current collection set +// increment to fix up non-card related roots. +class G1ScanCollectionSetRegionClosure : public HeapRegionClosure { + G1ParScanThreadState* _pss; + G1RemSetScanState* _scan_state; + + G1GCPhaseTimes::GCParPhases _scan_phase; + G1GCPhaseTimes::GCParPhases _code_roots_phase; + + uint _worker_id; + + size_t _opt_refs_scanned; + size_t _opt_refs_memory_used; + + Tickspan _strong_code_root_scan_time; + Tickspan _strong_code_trim_partially_time; + + Tickspan _rem_set_opt_root_scan_time; + Tickspan _rem_set_opt_trim_partially_time; + + void scan_opt_rem_set_roots(HeapRegion* r) { + EventGCPhaseParallel event; + + G1OopStarChunkedList* opt_rem_set_list = _pss->oops_into_optional_region(r); + + G1ScanCardClosure scan_cl(G1CollectedHeap::heap(), _pss); + G1ScanRSForOptionalClosure cl(G1CollectedHeap::heap(), &scan_cl); + _opt_refs_scanned += opt_rem_set_list->oops_do(&cl, _pss->closures()->raw_strong_oops()); + _opt_refs_memory_used += opt_rem_set_list->used_memory(); + + event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_scan_phase)); + } + +public: + G1ScanCollectionSetRegionClosure(G1RemSetScanState* scan_state, + G1ParScanThreadState* pss, + uint worker_i, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases code_roots_phase) : + _pss(pss), + _scan_state(scan_state), + _scan_phase(scan_phase), + _code_roots_phase(code_roots_phase), + _worker_id(worker_i), + _opt_refs_scanned(0), + _opt_refs_memory_used(0), + _strong_code_root_scan_time(), + _strong_code_trim_partially_time(), + _rem_set_opt_root_scan_time(), + _rem_set_opt_trim_partially_time() { } + + bool do_heap_region(HeapRegion* r) { + uint const region_idx = r->hrm_index(); + + // The individual references for the optional remembered set are per-worker, so we + // always need to scan them. + if (r->has_index_in_opt_cset()) { + G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_opt_root_scan_time, _rem_set_opt_trim_partially_time); + scan_opt_rem_set_roots(r); + } + + if (_scan_state->claim_collection_set_region(region_idx)) { + EventGCPhaseParallel event; + + G1EvacPhaseWithTrimTimeTracker timer(_pss, _strong_code_root_scan_time, _strong_code_trim_partially_time); + // Scan the strong code root list attached to the current region + r->strong_code_roots_do(_pss->closures()->weak_codeblobs()); + + event.commit(GCId::current(), _worker_id, G1GCPhaseTimes::phase_name(_code_roots_phase)); + } + + return false; + } + Tickspan strong_code_root_scan_time() const { return _strong_code_root_scan_time; } Tickspan strong_code_root_trim_partially_time() const { return _strong_code_trim_partially_time; } - size_t cards_scanned() const { return _cards_scanned; } - size_t cards_claimed() const { return _cards_claimed; } - size_t cards_skipped() const { return _cards_skipped; } + Tickspan rem_set_opt_root_scan_time() const { return _rem_set_opt_root_scan_time; } + Tickspan rem_set_opt_trim_partially_time() const { return _rem_set_opt_trim_partially_time; } size_t opt_refs_scanned() const { return _opt_refs_scanned; } size_t opt_refs_memory_used() const { return _opt_refs_memory_used; } }; -void G1RemSet::scan_rem_set(G1ParScanThreadState* pss, - uint worker_i, - G1GCPhaseTimes::GCParPhases scan_phase, - G1GCPhaseTimes::GCParPhases objcopy_phase, - G1GCPhaseTimes::GCParPhases coderoots_phase) { - assert(pss->trim_ticks().value() == 0, "Queues must have been trimmed before entering."); - - G1ScanCardClosure scan_cl(_g1h, pss); - G1ScanRSForRegionClosure cl(_scan_state, &scan_cl, pss, scan_phase, worker_i); - _g1h->collection_set_iterate_increment_from(&cl, worker_i); - - G1GCPhaseTimes* p = _g1p->phase_times(); - - p->record_or_add_time_secs(objcopy_phase, worker_i, cl.rem_set_trim_partially_time().seconds()); +void G1RemSet::scan_collection_set_regions(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases coderoots_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase) { + G1ScanCollectionSetRegionClosure cl(_scan_state, pss, worker_id, scan_phase, coderoots_phase); + _g1h->collection_set_iterate_increment_from(&cl, worker_id); - p->record_or_add_time_secs(scan_phase, worker_i, cl.rem_set_root_scan_time().seconds()); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScanRSScannedCards); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ScanRSClaimedCards); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.cards_skipped(), G1GCPhaseTimes::ScanRSSkippedCards); - // At this time we only record some metrics for the optional remembered set. - if (scan_phase == G1GCPhaseTimes::OptScanRS) { - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.opt_refs_scanned(), G1GCPhaseTimes::ScanRSScannedOptRefs); - p->record_or_add_thread_work_item(scan_phase, worker_i, cl.opt_refs_memory_used(), G1GCPhaseTimes::ScanRSUsedMemory); - } - - p->record_or_add_time_secs(coderoots_phase, worker_i, cl.strong_code_root_scan_time().seconds()); - p->add_time_secs(objcopy_phase, worker_i, cl.strong_code_root_trim_partially_time().seconds()); -} - -// Closure used for updating rem sets. Only called during an evacuation pause. -class G1RefineCardClosure: public G1CardTableEntryClosure { - G1RemSet* _g1rs; - G1ScanCardClosure* _update_rs_cl; - - size_t _cards_scanned; - size_t _cards_skipped; -public: - G1RefineCardClosure(G1CollectedHeap* g1h, G1ScanCardClosure* update_rs_cl) : - _g1rs(g1h->rem_set()), _update_rs_cl(update_rs_cl), _cards_scanned(0), _cards_skipped(0) - {} + G1GCPhaseTimes* p = _g1h->phase_times(); - bool do_card_ptr(CardValue* card_ptr, uint worker_i) { - // The only time we care about recording cards that - // contain references that point into the collection set - // is during RSet updating within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread. - assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); - - bool card_scanned = _g1rs->refine_card_during_gc(card_ptr, _update_rs_cl); - - if (card_scanned) { - _update_rs_cl->trim_queue_partially(); - _cards_scanned++; - } else { - _cards_skipped++; - } - return true; - } - - size_t cards_scanned() const { return _cards_scanned; } - size_t cards_skipped() const { return _cards_skipped; } -}; + p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_opt_root_scan_time().seconds()); + p->record_or_add_time_secs(scan_phase, worker_id, cl.rem_set_opt_trim_partially_time().seconds()); -void G1RemSet::update_rem_set(G1ParScanThreadState* pss, uint worker_i) { - G1GCPhaseTimes* p = _g1p->phase_times(); - - // Apply closure to log entries in the HCC. - if (G1HotCardCache::default_use_cache()) { - G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::ScanHCC, worker_i); + p->record_or_add_time_secs(coderoots_phase, worker_id, cl.strong_code_root_scan_time().seconds()); + p->add_time_secs(objcopy_phase, worker_id, cl.strong_code_root_trim_partially_time().seconds()); - G1ScanCardClosure scan_hcc_cl(_g1h, pss); - G1RefineCardClosure refine_card_cl(_g1h, &scan_hcc_cl); - _g1h->iterate_hcc_closure(&refine_card_cl, worker_i); - } - - // Now apply the closure to all remaining log entries. - { - G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::UpdateRS, worker_i); - - G1ScanCardClosure update_rs_cl(_g1h, pss); - G1RefineCardClosure refine_card_cl(_g1h, &update_rs_cl); - _g1h->iterate_dirty_card_closure(&refine_card_cl, worker_i); - - p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_scanned(), G1GCPhaseTimes::UpdateRSScannedCards); - p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_skipped(), G1GCPhaseTimes::UpdateRSSkippedCards); + // At this time we record some metrics only for the evacuations after the initial one. + if (scan_phase == G1GCPhaseTimes::OptScanHR) { + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.opt_refs_scanned(), G1GCPhaseTimes::ScanHRScannedOptRefs); + p->record_or_add_thread_work_item(scan_phase, worker_id, cl.opt_refs_memory_used(), G1GCPhaseTimes::ScanHRUsedMemory); } } -void G1RemSet::prepare_for_scan_rem_set() { - G1BarrierSet::dirty_card_queue_set().concatenate_logs(); - _scan_state->reset(); +void G1RemSet::prepare_for_scan_heap_roots() { + G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); + dcqs.concatenate_logs(); + + _scan_state->prepare(); } -void G1RemSet::prepare_for_scan_rem_set(uint region_idx) { +void G1RemSet::merge_heap_roots(bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase) { + _scan_state->merge_heap_roots(_g1h->workers(), remembered_set_only, merge_phase); +} + +void G1RemSet::prepare_for_scan_heap_roots(uint region_idx) { _scan_state->clear_scan_top(region_idx); } -void G1RemSet::cleanup_after_scan_rem_set() { +void G1RemSet::cleanup_after_scan_heap_roots() { G1GCPhaseTimes* phase_times = _g1h->phase_times(); // Set all cards back to clean. double start = os::elapsedTime(); - _scan_state->clear_card_table(_g1h->workers()); + _scan_state->cleanup(_g1h->workers()); phase_times->record_clear_ct_time((os::elapsedTime() - start) * 1000.0); } @@ -759,53 +1318,6 @@ G1BarrierSet::shared_dirty_card_queue().enqueue(card_ptr); } -bool G1RemSet::refine_card_during_gc(CardValue* card_ptr, - G1ScanCardClosure* update_rs_cl) { - assert(_g1h->is_gc_active(), "Only call during GC"); - - // Construct the region representing the card. - HeapWord* card_start = _ct->addr_for(card_ptr); - // And find the region containing it. - uint const card_region_idx = _g1h->addr_to_region(card_start); - - HeapWord* scan_limit = _scan_state->scan_top(card_region_idx); - if (scan_limit == NULL) { - // This is a card into an uncommitted region. We need to bail out early as we - // should not access the corresponding card table entry. - return false; - } - - check_card_ptr(card_ptr, _ct); - - // If the card is no longer dirty, nothing to do. This covers cards that were already - // scanned as parts of the remembered sets. - if (*card_ptr != G1CardTable::dirty_card_val()) { - return false; - } - - // We claim lazily (so races are possible but they're benign), which reduces the - // number of potential duplicate scans (multiple threads may enqueue the same card twice). - *card_ptr = G1CardTable::clean_card_val() | G1CardTable::claimed_card_val(); - - _scan_state->add_dirty_region(card_region_idx); - if (scan_limit <= card_start) { - // If the card starts above the area in the region containing objects to scan, skip it. - return false; - } - - // Don't use addr_for(card_ptr + 1) which can ask for - // a card beyond the heap. - HeapWord* card_end = card_start + G1CardTable::card_size_in_words; - MemRegion dirty_region(card_start, MIN2(scan_limit, card_end)); - assert(!dirty_region.is_empty(), "sanity"); - - HeapRegion* const card_region = _g1h->region_at(card_region_idx); - assert(!card_region->is_young(), "Should not scan card in young region %u", card_region_idx); - bool card_processed = card_region->oops_on_card_seq_iterate_careful(dirty_region, update_rs_cl); - assert(card_processed, "must be"); - return true; -} - void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) { if ((G1SummarizeRSetStatsPeriod > 0) && log_is_enabled(Trace, gc, remset) && (period_count % G1SummarizeRSetStatsPeriod == 0)) { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1RemSet.hpp --- a/src/hotspot/share/gc/g1/g1RemSet.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1RemSet.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -46,6 +46,7 @@ class G1HotCardCache; class G1RemSetScanState; class G1ParScanThreadState; +class G1ParScanThreadStateSet; class G1Policy; class G1ScanCardClosure; class HeapRegionClaimer; @@ -84,39 +85,39 @@ G1HotCardCache* hot_card_cache); ~G1RemSet(); - // Scan all remembered sets of the collection set for references into the collection - // set. - // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code - // roots list for each region in the collection set. - void scan_rem_set(G1ParScanThreadState* pss, - uint worker_i, - G1GCPhaseTimes::GCParPhases scan_phase, - G1GCPhaseTimes::GCParPhases objcopy_phase, - G1GCPhaseTimes::GCParPhases coderoots_phase); + // Scan all cards in the non-collection set regions that potentially contain + // references into the current whole collection set. + void scan_heap_roots(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase); + + // Merge cards from various sources (remembered sets, hot card cache, log buffers) + // and calculate the cards that need to be scanned later (via scan_heap_roots()). + // If remembered_set_only is set, only merge remembered set cards. + void merge_heap_roots(bool remembered_set_only, G1GCPhaseTimes::GCParPhases merge_phase); - // Flush remaining refinement buffers for cross-region references to either evacuate references - // into the collection set or update the remembered set. - void update_rem_set(G1ParScanThreadState* pss, uint worker_i); - - // Prepare for and cleanup after scanning the remembered sets. Must be called + // Prepare for and cleanup after scanning the heap roots. Must be called // once before and after in sequential code. - void prepare_for_scan_rem_set(); - void cleanup_after_scan_rem_set(); - // Prepares the given region for remembered set scanning. - void prepare_for_scan_rem_set(uint region_idx); + void prepare_for_scan_heap_roots(); + // Cleans the card table from temporary duplicate detection information. + void cleanup_after_scan_heap_roots(); + // Prepares the given region for heap root scanning. + void prepare_for_scan_heap_roots(uint region_idx); - G1RemSetScanState* scan_state() const { return _scan_state; } + // Do work for regions in the current increment of the collection set, scanning + // non-card based (heap) roots. + void scan_collection_set_regions(G1ParScanThreadState* pss, + uint worker_id, + G1GCPhaseTimes::GCParPhases scan_phase, + G1GCPhaseTimes::GCParPhases coderoots_phase, + G1GCPhaseTimes::GCParPhases objcopy_phase); // Refine the card corresponding to "card_ptr". Safe to be called concurrently // to the mutator. void refine_card_concurrently(CardValue* card_ptr, uint worker_i); - // Refine the card corresponding to "card_ptr", applying the given closure to - // all references found. Must only be called during gc. - // Returns whether the card has been scanned. - bool refine_card_during_gc(CardValue* card_ptr, G1ScanCardClosure* update_rs_cl); - // Print accumulated summary info from the start of the VM. void print_summary_info(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1SATBMarkQueueSet.cpp --- a/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -35,12 +35,10 @@ G1SATBMarkQueueSet::G1SATBMarkQueueSet() : _g1h(NULL) {} void G1SATBMarkQueueSet::initialize(G1CollectedHeap* g1h, - Monitor* cbl_mon, BufferNode::Allocator* allocator, size_t process_completed_buffers_threshold, uint buffer_enqueue_threshold_percentage) { - SATBMarkQueueSet::initialize(cbl_mon, - allocator, + SATBMarkQueueSet::initialize(allocator, process_completed_buffers_threshold, buffer_enqueue_threshold_percentage); _g1h = g1h; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/g1SATBMarkQueueSet.hpp --- a/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,7 +38,6 @@ G1SATBMarkQueueSet(); void initialize(G1CollectedHeap* g1h, - Monitor* cbl_mon, BufferNode::Allocator* allocator, size_t process_completed_buffers_threshold, uint buffer_enqueue_threshold_percentage); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/heapRegion.cpp --- a/src/hotspot/share/gc/g1/heapRegion.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/heapRegion.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -49,6 +49,7 @@ int HeapRegion::LogOfHRGrainBytes = 0; int HeapRegion::LogOfHRGrainWords = 0; +int HeapRegion::LogCardsPerRegion = 0; size_t HeapRegion::GrainBytes = 0; size_t HeapRegion::GrainWords = 0; size_t HeapRegion::CardsPerRegion = 0; @@ -105,6 +106,8 @@ guarantee(CardsPerRegion == 0, "we should only set it once"); CardsPerRegion = GrainBytes >> G1CardTable::card_shift; + LogCardsPerRegion = log2_long((jlong) CardsPerRegion); + if (G1HeapRegionSize != GrainBytes) { FLAG_SET_ERGO(G1HeapRegionSize, GrainBytes); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/heapRegion.hpp --- a/src/hotspot/share/gc/g1/heapRegion.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/heapRegion.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -60,7 +60,6 @@ class G1CMBitMap; class G1IsAliveAndApplyClosure; class HeapRegionRemSet; -class HeapRegionRemSetIterator; class HeapRegion; class HeapRegionSetBase; class nmethod; @@ -315,6 +314,7 @@ static int LogOfHRGrainBytes; static int LogOfHRGrainWords; + static int LogCardsPerRegion; static size_t GrainBytes; static size_t GrainWords; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/heapRegionRemSet.cpp --- a/src/hotspot/share/gc/g1/heapRegionRemSet.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -27,7 +27,7 @@ #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1ConcurrentRefine.hpp" #include "gc/g1/heapRegionManager.inline.hpp" -#include "gc/g1/heapRegionRemSet.hpp" +#include "gc/g1/heapRegionRemSet.inline.hpp" #include "gc/shared/space.inline.hpp" #include "memory/allocation.hpp" #include "memory/padded.inline.hpp" @@ -42,195 +42,21 @@ const char* HeapRegionRemSet::_state_strings[] = {"Untracked", "Updating", "Complete"}; const char* HeapRegionRemSet::_short_state_strings[] = {"UNTRA", "UPDAT", "CMPLT"}; -class PerRegionTable: public CHeapObj { - friend class OtherRegionsTable; - friend class HeapRegionRemSetIterator; - - HeapRegion* _hr; - CHeapBitMap _bm; - jint _occupied; - - // next pointer for free/allocated 'all' list - PerRegionTable* _next; - - // prev pointer for the allocated 'all' list - PerRegionTable* _prev; - - // next pointer in collision list - PerRegionTable * _collision_list_next; - - // Global free list of PRTs - static PerRegionTable* volatile _free_list; - -protected: - // We need access in order to union things into the base table. - BitMap* bm() { return &_bm; } - - PerRegionTable(HeapRegion* hr) : - _hr(hr), - _bm(HeapRegion::CardsPerRegion, mtGC), - _occupied(0), - _next(NULL), _prev(NULL), - _collision_list_next(NULL) - {} - - void add_card_work(CardIdx_t from_card, bool par) { - if (!_bm.at(from_card)) { - if (par) { - if (_bm.par_at_put(from_card, 1)) { - Atomic::inc(&_occupied); - } - } else { - _bm.at_put(from_card, 1); - _occupied++; - } - } - } - - void add_reference_work(OopOrNarrowOopStar from, bool par) { - // Must make this robust in case "from" is not in "_hr", because of - // concurrency. - - HeapRegion* loc_hr = hr(); - // If the test below fails, then this table was reused concurrently - // with this operation. This is OK, since the old table was coarsened, - // and adding a bit to the new table is never incorrect. - if (loc_hr->is_in_reserved(from)) { - CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr); - add_card_work(from_card, par); +PerRegionTable* PerRegionTable::alloc(HeapRegion* hr) { + PerRegionTable* fl = _free_list; + while (fl != NULL) { + PerRegionTable* nxt = fl->next(); + PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl); + if (res == fl) { + fl->init(hr, true); + return fl; + } else { + fl = _free_list; } } - -public: - - HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); } - - jint occupied() const { - // Overkill, but if we ever need it... - // guarantee(_occupied == _bm.count_one_bits(), "Check"); - return _occupied; - } - - void init(HeapRegion* hr, bool clear_links_to_all_list) { - if (clear_links_to_all_list) { - set_next(NULL); - set_prev(NULL); - } - _collision_list_next = NULL; - _occupied = 0; - _bm.clear(); - // Make sure that the bitmap clearing above has been finished before publishing - // this PRT to concurrent threads. - OrderAccess::release_store(&_hr, hr); - } - - void add_reference(OopOrNarrowOopStar from) { - add_reference_work(from, /*parallel*/ true); - } - - void seq_add_reference(OopOrNarrowOopStar from) { - add_reference_work(from, /*parallel*/ false); - } - - void add_card(CardIdx_t from_card_index) { - add_card_work(from_card_index, /*parallel*/ true); - } - - void seq_add_card(CardIdx_t from_card_index) { - add_card_work(from_card_index, /*parallel*/ false); - } - - // (Destructively) union the bitmap of the current table into the given - // bitmap (which is assumed to be of the same size.) - void union_bitmap_into(BitMap* bm) { - bm->set_union(_bm); - } - - // Mem size in bytes. - size_t mem_size() const { - return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize; - } - - // Requires "from" to be in "hr()". - bool contains_reference(OopOrNarrowOopStar from) const { - assert(hr()->is_in_reserved(from), "Precondition."); - size_t card_ind = pointer_delta(from, hr()->bottom(), - G1CardTable::card_size); - return _bm.at(card_ind); - } - - // Bulk-free the PRTs from prt to last, assumes that they are - // linked together using their _next field. - static void bulk_free(PerRegionTable* prt, PerRegionTable* last) { - while (true) { - PerRegionTable* fl = _free_list; - last->set_next(fl); - PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl); - if (res == fl) { - return; - } - } - ShouldNotReachHere(); - } - - static void free(PerRegionTable* prt) { - bulk_free(prt, prt); - } - - // Returns an initialized PerRegionTable instance. - static PerRegionTable* alloc(HeapRegion* hr) { - PerRegionTable* fl = _free_list; - while (fl != NULL) { - PerRegionTable* nxt = fl->next(); - PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl); - if (res == fl) { - fl->init(hr, true); - return fl; - } else { - fl = _free_list; - } - } - assert(fl == NULL, "Loop condition."); - return new PerRegionTable(hr); - } - - PerRegionTable* next() const { return _next; } - void set_next(PerRegionTable* next) { _next = next; } - PerRegionTable* prev() const { return _prev; } - void set_prev(PerRegionTable* prev) { _prev = prev; } - - // Accessor and Modification routines for the pointer for the - // singly linked collision list that links the PRTs within the - // OtherRegionsTable::_fine_grain_regions hash table. - // - // It might be useful to also make the collision list doubly linked - // to avoid iteration over the collisions list during scrubbing/deletion. - // OTOH there might not be many collisions. - - PerRegionTable* collision_list_next() const { - return _collision_list_next; - } - - void set_collision_list_next(PerRegionTable* next) { - _collision_list_next = next; - } - - PerRegionTable** collision_list_next_addr() { - return &_collision_list_next; - } - - static size_t fl_mem_size() { - PerRegionTable* cur = _free_list; - size_t res = 0; - while (cur != NULL) { - res += cur->mem_size(); - cur = cur->next(); - } - return res; - } - - static void test_fl_mem_size(); -}; + assert(fl == NULL, "Loop condition."); + return new PerRegionTable(hr); +} PerRegionTable* volatile PerRegionTable::_free_list = NULL; @@ -696,175 +522,3 @@ size_t HeapRegionRemSet::strong_code_roots_mem_size() { return _code_roots.mem_size(); } - -HeapRegionRemSetIterator:: HeapRegionRemSetIterator(HeapRegionRemSet* hrrs) : - _hrrs(hrrs), - _coarse_map(&hrrs->_other_regions._coarse_map), - _bot(hrrs->_bot), - _g1h(G1CollectedHeap::heap()), - _n_yielded_fine(0), - _n_yielded_coarse(0), - _n_yielded_sparse(0), - _is(Sparse), - _cur_region_card_offset(0), - // Set these values so that we increment to the first region. - _coarse_cur_region_index(-1), - _coarse_cur_region_cur_card(HeapRegion::CardsPerRegion-1), - _fine_cur_prt(NULL), - _cur_card_in_prt(HeapRegion::CardsPerRegion), - _sparse_iter(&hrrs->_other_regions._sparse_table) {} - -bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) { - if (_hrrs->_other_regions._n_coarse_entries == 0) return false; - // Go to the next card. - _coarse_cur_region_cur_card++; - // Was the last the last card in the current region? - if (_coarse_cur_region_cur_card == HeapRegion::CardsPerRegion) { - // Yes: find the next region. This may leave _coarse_cur_region_index - // Set to the last index, in which case there are no more coarse - // regions. - _coarse_cur_region_index = - (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1); - if ((size_t)_coarse_cur_region_index < _coarse_map->size()) { - _coarse_cur_region_cur_card = 0; - HeapWord* r_bot = - _g1h->region_at((uint) _coarse_cur_region_index)->bottom(); - _cur_region_card_offset = _bot->index_for_raw(r_bot); - } else { - return false; - } - } - // If we didn't return false above, then we can yield a card. - card_index = _cur_region_card_offset + _coarse_cur_region_cur_card; - return true; -} - -bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) { - if (fine_has_next()) { - _cur_card_in_prt = - _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1); - } - if (_cur_card_in_prt == HeapRegion::CardsPerRegion) { - // _fine_cur_prt may still be NULL in case if there are not PRTs at all for - // the remembered set. - if (_fine_cur_prt == NULL || _fine_cur_prt->next() == NULL) { - return false; - } - PerRegionTable* next_prt = _fine_cur_prt->next(); - switch_to_prt(next_prt); - _cur_card_in_prt = _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1); - } - - card_index = _cur_region_card_offset + _cur_card_in_prt; - guarantee(_cur_card_in_prt < HeapRegion::CardsPerRegion, - "Card index " SIZE_FORMAT " must be within the region", _cur_card_in_prt); - return true; -} - -bool HeapRegionRemSetIterator::fine_has_next() { - return _cur_card_in_prt != HeapRegion::CardsPerRegion; -} - -void HeapRegionRemSetIterator::switch_to_prt(PerRegionTable* prt) { - assert(prt != NULL, "Cannot switch to NULL prt"); - _fine_cur_prt = prt; - - HeapWord* r_bot = _fine_cur_prt->hr()->bottom(); - _cur_region_card_offset = _bot->index_for_raw(r_bot); - - // The bitmap scan for the PRT always scans from _cur_region_cur_card + 1. - // To avoid special-casing this start case, and not miss the first bitmap - // entry, initialize _cur_region_cur_card with -1 instead of 0. - _cur_card_in_prt = (size_t)-1; -} - -bool HeapRegionRemSetIterator::has_next(size_t& card_index) { - switch (_is) { - case Sparse: { - if (_sparse_iter.has_next(card_index)) { - _n_yielded_sparse++; - return true; - } - // Otherwise, deliberate fall-through - _is = Fine; - PerRegionTable* initial_fine_prt = _hrrs->_other_regions._first_all_fine_prts; - if (initial_fine_prt != NULL) { - switch_to_prt(_hrrs->_other_regions._first_all_fine_prts); - } - } - case Fine: - if (fine_has_next(card_index)) { - _n_yielded_fine++; - return true; - } - // Otherwise, deliberate fall-through - _is = Coarse; - case Coarse: - if (coarse_has_next(card_index)) { - _n_yielded_coarse++; - return true; - } - // Otherwise... - break; - } - return false; -} - -#ifndef PRODUCT -void HeapRegionRemSet::test() { - os::sleep(Thread::current(), (jlong)5000, false); - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - - // Run with "-XX:G1LogRSetRegionEntries=2", so that 1 and 5 end up in same - // hash bucket. - HeapRegion* hr0 = g1h->region_at(0); - HeapRegion* hr1 = g1h->region_at(1); - HeapRegion* hr2 = g1h->region_at(5); - HeapRegion* hr3 = g1h->region_at(6); - HeapRegion* hr4 = g1h->region_at(7); - HeapRegion* hr5 = g1h->region_at(8); - - HeapWord* hr1_start = hr1->bottom(); - HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2; - HeapWord* hr1_last = hr1->end() - 1; - - HeapWord* hr2_start = hr2->bottom(); - HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2; - HeapWord* hr2_last = hr2->end() - 1; - - HeapWord* hr3_start = hr3->bottom(); - HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2; - HeapWord* hr3_last = hr3->end() - 1; - - HeapRegionRemSet* hrrs = hr0->rem_set(); - - // Make three references from region 0x101... - hrrs->add_reference((OopOrNarrowOopStar)hr1_start); - hrrs->add_reference((OopOrNarrowOopStar)hr1_mid); - hrrs->add_reference((OopOrNarrowOopStar)hr1_last); - - hrrs->add_reference((OopOrNarrowOopStar)hr2_start); - hrrs->add_reference((OopOrNarrowOopStar)hr2_mid); - hrrs->add_reference((OopOrNarrowOopStar)hr2_last); - - hrrs->add_reference((OopOrNarrowOopStar)hr3_start); - hrrs->add_reference((OopOrNarrowOopStar)hr3_mid); - hrrs->add_reference((OopOrNarrowOopStar)hr3_last); - - // Now cause a coarsening. - hrrs->add_reference((OopOrNarrowOopStar)hr4->bottom()); - hrrs->add_reference((OopOrNarrowOopStar)hr5->bottom()); - - // Now, does iteration yield these three? - HeapRegionRemSetIterator iter(hrrs); - size_t sum = 0; - size_t card_index; - while (iter.has_next(card_index)) { - HeapWord* card_start = g1h->bot()->address_for_index(card_index); - tty->print_cr(" Card " PTR_FORMAT ".", p2i(card_start)); - sum++; - } - guarantee(sum == 11 - 3 + 2048, "Failure"); - guarantee(sum == hrrs->occupied(), "Failure"); -} -#endif diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/heapRegionRemSet.hpp --- a/src/hotspot/share/gc/g1/heapRegionRemSet.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -28,6 +28,7 @@ #include "gc/g1/g1CodeCacheRemSet.hpp" #include "gc/g1/g1FromCardCache.hpp" #include "gc/g1/sparsePRT.hpp" +#include "utilities/bitMap.hpp" // Remembered set for a heap region. Represent a set of "cards" that // contain pointers into the owner heap region. Cards are defined somewhat @@ -37,7 +38,6 @@ class G1BlockOffsetTable; class G1CardLiveData; class HeapRegion; -class HeapRegionRemSetIterator; class PerRegionTable; class SparsePRT; class nmethod; @@ -67,8 +67,6 @@ // thinking the PRT is for a different region, does no harm. class OtherRegionsTable { - friend class HeapRegionRemSetIterator; - G1CollectedHeap* _g1h; Mutex* _m; @@ -125,6 +123,9 @@ // Create a new remembered set. The given mutex is used to ensure consistency. OtherRegionsTable(Mutex* m); + template + void iterate(Closure& v); + // Returns the card index of the given within_region pointer relative to the bottom // of the given heap region. static CardIdx_t card_within_region(OopOrNarrowOopStar within_region, HeapRegion* hr); @@ -157,9 +158,140 @@ void clear(); }; +class PerRegionTable: public CHeapObj { + friend class OtherRegionsTable; + + HeapRegion* _hr; + CHeapBitMap _bm; + jint _occupied; + + // next pointer for free/allocated 'all' list + PerRegionTable* _next; + + // prev pointer for the allocated 'all' list + PerRegionTable* _prev; + + // next pointer in collision list + PerRegionTable * _collision_list_next; + + // Global free list of PRTs + static PerRegionTable* volatile _free_list; + +protected: + PerRegionTable(HeapRegion* hr) : + _hr(hr), + _bm(HeapRegion::CardsPerRegion, mtGC), + _occupied(0), + _next(NULL), _prev(NULL), + _collision_list_next(NULL) + {} + + inline void add_card_work(CardIdx_t from_card, bool par); + + inline void add_reference_work(OopOrNarrowOopStar from, bool par); + +public: + // We need access in order to union things into the base table. + BitMap* bm() { return &_bm; } + + HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); } + + jint occupied() const { + // Overkill, but if we ever need it... + // guarantee(_occupied == _bm.count_one_bits(), "Check"); + return _occupied; + } + + void init(HeapRegion* hr, bool clear_links_to_all_list); + + inline void add_reference(OopOrNarrowOopStar from); + + inline void seq_add_reference(OopOrNarrowOopStar from); + + inline void add_card(CardIdx_t from_card_index); + + void seq_add_card(CardIdx_t from_card_index); + + // (Destructively) union the bitmap of the current table into the given + // bitmap (which is assumed to be of the same size.) + void union_bitmap_into(BitMap* bm) { + bm->set_union(_bm); + } + + // Mem size in bytes. + size_t mem_size() const { + return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize; + } + + // Requires "from" to be in "hr()". + bool contains_reference(OopOrNarrowOopStar from) const { + assert(hr()->is_in_reserved(from), "Precondition."); + size_t card_ind = pointer_delta(from, hr()->bottom(), + G1CardTable::card_size); + return _bm.at(card_ind); + } + + // Bulk-free the PRTs from prt to last, assumes that they are + // linked together using their _next field. + static void bulk_free(PerRegionTable* prt, PerRegionTable* last) { + while (true) { + PerRegionTable* fl = _free_list; + last->set_next(fl); + PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl); + if (res == fl) { + return; + } + } + ShouldNotReachHere(); + } + + static void free(PerRegionTable* prt) { + bulk_free(prt, prt); + } + + // Returns an initialized PerRegionTable instance. + static PerRegionTable* alloc(HeapRegion* hr); + + PerRegionTable* next() const { return _next; } + void set_next(PerRegionTable* next) { _next = next; } + PerRegionTable* prev() const { return _prev; } + void set_prev(PerRegionTable* prev) { _prev = prev; } + + // Accessor and Modification routines for the pointer for the + // singly linked collision list that links the PRTs within the + // OtherRegionsTable::_fine_grain_regions hash table. + // + // It might be useful to also make the collision list doubly linked + // to avoid iteration over the collisions list during scrubbing/deletion. + // OTOH there might not be many collisions. + + PerRegionTable* collision_list_next() const { + return _collision_list_next; + } + + void set_collision_list_next(PerRegionTable* next) { + _collision_list_next = next; + } + + PerRegionTable** collision_list_next_addr() { + return &_collision_list_next; + } + + static size_t fl_mem_size() { + PerRegionTable* cur = _free_list; + size_t res = 0; + while (cur != NULL) { + res += cur->mem_size(); + cur = cur->next(); + } + return res; + } + + static void test_fl_mem_size(); +}; + class HeapRegionRemSet : public CHeapObj { friend class VMStructs; - friend class HeapRegionRemSetIterator; private: G1BlockOffsetTable* _bot; @@ -182,18 +314,23 @@ // Setup sparse and fine-grain tables sizes. static void setup_remset_size(); - bool cardset_is_empty() const { - return _other_regions.is_empty(); - } - bool is_empty() const { - return (strong_code_roots_list_length() == 0) && cardset_is_empty(); + return (strong_code_roots_list_length() == 0) && _other_regions.is_empty(); } bool occupancy_less_or_equal_than(size_t occ) const { return (strong_code_roots_list_length() == 0) && _other_regions.occupancy_less_or_equal_than(occ); } + // For each PRT in the card (remembered) set call one of the following methods + // of the given closure: + // + // set_full_region_dirty(uint region_idx) - pass the region index for coarse PRTs + // set_bitmap_dirty(uint region_idx, BitMap* bitmap) - pass the region index and bitmap for fine PRTs + // set_cards_dirty(uint region_idx, elem_t* cards, uint num_cards) - pass region index and cards for sparse PRTs + template + inline void iterate_prts(Closure& cl); + size_t occupied() { MutexLocker x(&_m, Mutex::_no_safepoint_check_flag); return occupied_locked(); @@ -339,70 +476,4 @@ #endif }; -class HeapRegionRemSetIterator : public StackObj { -private: - // The region RSet over which we are iterating. - HeapRegionRemSet* _hrrs; - - // Local caching of HRRS fields. - const BitMap* _coarse_map; - - G1BlockOffsetTable* _bot; - G1CollectedHeap* _g1h; - - // The number of cards yielded since initialization. - size_t _n_yielded_fine; - size_t _n_yielded_coarse; - size_t _n_yielded_sparse; - - // Indicates what granularity of table that we are currently iterating over. - // We start iterating over the sparse table, progress to the fine grain - // table, and then finish with the coarse table. - enum IterState { - Sparse, - Fine, - Coarse - }; - IterState _is; - - // For both Coarse and Fine remembered set iteration this contains the - // first card number of the heap region we currently iterate over. - size_t _cur_region_card_offset; - - // Current region index for the Coarse remembered set iteration. - int _coarse_cur_region_index; - size_t _coarse_cur_region_cur_card; - - bool coarse_has_next(size_t& card_index); - - // The PRT we are currently iterating over. - PerRegionTable* _fine_cur_prt; - // Card offset within the current PRT. - size_t _cur_card_in_prt; - - // Update internal variables when switching to the given PRT. - void switch_to_prt(PerRegionTable* prt); - bool fine_has_next(); - bool fine_has_next(size_t& card_index); - - // The Sparse remembered set iterator. - SparsePRTIter _sparse_iter; - -public: - HeapRegionRemSetIterator(HeapRegionRemSet* hrrs); - - // If there remains one or more cards to be yielded, returns true and - // sets "card_index" to one of those cards (which is then considered - // yielded.) Otherwise, returns false (and leaves "card_index" - // undefined.) - bool has_next(size_t& card_index); - - size_t n_yielded_fine() { return _n_yielded_fine; } - size_t n_yielded_coarse() { return _n_yielded_coarse; } - size_t n_yielded_sparse() { return _n_yielded_sparse; } - size_t n_yielded() { - return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse(); - } -}; - #endif // SHARE_GC_G1_HEAPREGIONREMSET_HPP diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/share/gc/g1/heapRegionRemSet.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP +#define SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP + +#include "gc/g1/heapRegion.inline.hpp" +#include "gc/g1/heapRegionRemSet.hpp" +#include "gc/g1/sparsePRT.hpp" +#include "utilities/bitMap.inline.hpp" + +template +inline void HeapRegionRemSet::iterate_prts(Closure& cl) { + _other_regions.iterate(cl); +} + +inline void PerRegionTable::add_card_work(CardIdx_t from_card, bool par) { + if (!_bm.at(from_card)) { + if (par) { + if (_bm.par_set_bit(from_card)) { + Atomic::inc(&_occupied); + } + } else { + _bm.set_bit(from_card); + _occupied++; + } + } +} + +inline void PerRegionTable::add_reference_work(OopOrNarrowOopStar from, bool par) { + // Must make this robust in case "from" is not in "_hr", because of + // concurrency. + + HeapRegion* loc_hr = hr(); + // If the test below fails, then this table was reused concurrently + // with this operation. This is OK, since the old table was coarsened, + // and adding a bit to the new table is never incorrect. + if (loc_hr->is_in_reserved(from)) { + CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr); + add_card_work(from_card, par); + } +} + +inline void PerRegionTable::add_card(CardIdx_t from_card_index) { + add_card_work(from_card_index, /*parallel*/ true); +} + +inline void PerRegionTable::seq_add_card(CardIdx_t from_card_index) { + add_card_work(from_card_index, /*parallel*/ false); +} + +inline void PerRegionTable::add_reference(OopOrNarrowOopStar from) { + add_reference_work(from, /*parallel*/ true); +} + +inline void PerRegionTable::seq_add_reference(OopOrNarrowOopStar from) { + add_reference_work(from, /*parallel*/ false); +} + +inline void PerRegionTable::init(HeapRegion* hr, bool clear_links_to_all_list) { + if (clear_links_to_all_list) { + set_next(NULL); + set_prev(NULL); + } + _collision_list_next = NULL; + _occupied = 0; + _bm.clear(); + // Make sure that the bitmap clearing above has been finished before publishing + // this PRT to concurrent threads. + OrderAccess::release_store(&_hr, hr); +} + +template +void OtherRegionsTable::iterate(Closure& cl) { + if (_n_coarse_entries > 0) { + BitMap::idx_t cur = _coarse_map.get_next_one_offset(0); + while (cur != _coarse_map.size()) { + cl.next_coarse_prt((uint)cur); + cur = _coarse_map.get_next_one_offset(cur + 1); + } + } + { + PerRegionTable* cur = _first_all_fine_prts; + while (cur != NULL) { + cl.next_fine_prt(cur->hr()->hrm_index(), cur->bm()); + cur = cur->next(); + } + } + { + SparsePRTBucketIter iter(&_sparse_table); + SparsePRTEntry* cur; + while (iter.has_next(cur)) { + cl.next_sparse_prt(cur->r_ind(), cur->cards(), cur->num_valid_cards()); + } + } +} + +#endif // SHARE_VM_GC_G1_HEAPREGIONREMSET_INLINE_HPP diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/sparsePRT.cpp --- a/src/hotspot/share/gc/g1/sparsePRT.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/sparsePRT.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -275,6 +275,19 @@ return false; } +bool RSHashTableBucketIter::has_next(SparsePRTEntry*& entry) { + while (_bl_ind == RSHashTable::NullEntry) { + if (_tbl_ind == (int)_rsht->capacity() - 1) { + return false; + } + _tbl_ind++; + _bl_ind = _rsht->_buckets[_tbl_ind]; + } + entry = _rsht->entry(_bl_ind); + _bl_ind = entry->next_index(); + return true; +} + bool RSHashTable::contains_card(RegionIdx_t region_index, CardIdx_t card_index) const { SparsePRTEntry* e = get_entry(region_index); return (e != NULL && e->contains_card(card_index)); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/g1/sparsePRT.hpp --- a/src/hotspot/share/gc/g1/sparsePRT.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/g1/sparsePRT.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,10 +38,11 @@ // that might contain pointers into the owner region. class SparsePRTEntry: public CHeapObj { -private: +public: // The type of a card entry. typedef uint16_t card_elem_t; +private: // We need to make sizeof(SparsePRTEntry) an even multiple of maximum member size, // in order to force correct alignment that could otherwise cause SIGBUS errors // when reading the member variables. This calculates the minimum number of card @@ -96,6 +97,8 @@ // Copy the current entry's cards into the "_card" array of "e." inline void copy_cards(SparsePRTEntry* e) const; + card_elem_t* cards() { return _cards; } + inline CardIdx_t card(int i) const { assert(i >= 0, "must be nonnegative"); assert(i < cards_num(), "range checking"); @@ -106,7 +109,7 @@ class RSHashTable : public CHeapObj { friend class RSHashTableIter; - + friend class RSHashTableBucketIter; // Inverse maximum hash table occupancy used. static float TableOccupancyFactor; @@ -209,12 +212,29 @@ bool has_next(size_t& card_index); }; +// This is embedded in HRRS iterator. +class RSHashTableBucketIter { + int _tbl_ind; // [-1, 0.._rsht->_capacity) + int _bl_ind; // [-1, 0.._rsht->_capacity) + + RSHashTable* _rsht; + +public: + RSHashTableBucketIter(RSHashTable* rsht) : + _tbl_ind(0), + _bl_ind(rsht->_buckets[_tbl_ind]), + _rsht(rsht) { } + + bool has_next(SparsePRTEntry*& entry); +}; + // Concurrent access to a SparsePRT must be serialized by some external mutex. class SparsePRTIter; class SparsePRT { friend class SparsePRTIter; + friend class SparsePRTBucketIter; RSHashTable* _table; @@ -262,4 +282,14 @@ } }; +class SparsePRTBucketIter: public RSHashTableBucketIter { +public: + SparsePRTBucketIter(const SparsePRT* sprt) : + RSHashTableBucketIter(sprt->_table) {} + + bool has_next(SparsePRTEntry*& entry) { + return RSHashTableBucketIter::has_next(entry); + } +}; + #endif // SHARE_GC_G1_SPARSEPRT_HPP diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/parallel/psMarkSweep.cpp --- a/src/hotspot/share/gc/parallel/psMarkSweep.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/parallel/psMarkSweep.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -259,7 +259,6 @@ BiasedLocking::restore_marks(); heap->prune_scavengable_nmethods(); - JvmtiExport::gc_epilogue(); #if COMPILER2_OR_JVMCI DerivedPointerTable::update_pointers(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/parallel/psParallelCompact.cpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1064,7 +1064,6 @@ MetaspaceUtils::verify_metrics(); heap->prune_scavengable_nmethods(); - JvmtiExport::gc_epilogue(); #if COMPILER2_OR_JVMCI DerivedPointerTable::update_pointers(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/serial/genMarkSweep.cpp --- a/src/hotspot/share/gc/serial/genMarkSweep.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/serial/genMarkSweep.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -129,7 +129,6 @@ } gch->prune_scavengable_nmethods(); - JvmtiExport::gc_epilogue(); // refs processing: clean slate set_ref_processor(NULL); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/cardTable.hpp --- a/src/hotspot/share/gc/shared/cardTable.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/cardTable.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -103,15 +103,11 @@ enum CardValues { clean_card = (CardValue)-1, - // The mask contains zeros in places for all other values. - clean_card_mask = clean_card - 31, dirty_card = 0, precleaned_card = 1, - claimed_card = 2, - deferred_card = 4, - last_card = 8, - CT_MR_BS_last_reserved = 16 + last_card = 2, + CT_MR_BS_last_reserved = 4 }; // a word's worth (row) of clean card values @@ -242,11 +238,8 @@ }; static CardValue clean_card_val() { return clean_card; } - static CardValue clean_card_mask_val() { return clean_card_mask; } static CardValue dirty_card_val() { return dirty_card; } - static CardValue claimed_card_val() { return claimed_card; } static CardValue precleaned_card_val() { return precleaned_card; } - static CardValue deferred_card_val() { return deferred_card; } static intptr_t clean_card_row_val() { return clean_card_row; } // Card marking array base (adjusted for heap low boundary) diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/ptrQueue.cpp --- a/src/hotspot/share/gc/shared/ptrQueue.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/ptrQueue.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -250,28 +250,15 @@ return removed; } -PtrQueueSet::PtrQueueSet(bool notify_when_complete) : +PtrQueueSet::PtrQueueSet() : _allocator(NULL), - _cbl_mon(NULL), - _completed_buffers_head(NULL), - _completed_buffers_tail(NULL), - _n_completed_buffers(0), - _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever), - _process_completed_buffers(false), - _notify_when_complete(notify_when_complete), _all_active(false) {} -PtrQueueSet::~PtrQueueSet() { - // There are presently only a couple (derived) instances ever - // created, and they are permanent, so no harm currently done by - // doing nothing here. -} +PtrQueueSet::~PtrQueueSet() {} -void PtrQueueSet::initialize(Monitor* cbl_mon, - BufferNode::Allocator* allocator) { - assert(cbl_mon != NULL && allocator != NULL, "Init order issue?"); - _cbl_mon = cbl_mon; +void PtrQueueSet::initialize(BufferNode::Allocator* allocator) { + assert(allocator != NULL, "Init order issue?"); _allocator = allocator; } @@ -284,121 +271,3 @@ _allocator->release(node); } -void PtrQueueSet::enqueue_completed_buffer(BufferNode* cbn) { - MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); - cbn->set_next(NULL); - if (_completed_buffers_tail == NULL) { - assert(_completed_buffers_head == NULL, "Well-formedness"); - _completed_buffers_head = cbn; - _completed_buffers_tail = cbn; - } else { - _completed_buffers_tail->set_next(cbn); - _completed_buffers_tail = cbn; - } - _n_completed_buffers++; - - if (!_process_completed_buffers && - (_n_completed_buffers > _process_completed_buffers_threshold)) { - _process_completed_buffers = true; - if (_notify_when_complete) { - _cbl_mon->notify(); - } - } - assert_completed_buffers_list_len_correct_locked(); -} - -BufferNode* PtrQueueSet::get_completed_buffer(size_t stop_at) { - MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); - - if (_n_completed_buffers <= stop_at) { - return NULL; - } - - assert(_n_completed_buffers > 0, "invariant"); - assert(_completed_buffers_head != NULL, "invariant"); - assert(_completed_buffers_tail != NULL, "invariant"); - - BufferNode* bn = _completed_buffers_head; - _n_completed_buffers--; - _completed_buffers_head = bn->next(); - if (_completed_buffers_head == NULL) { - assert(_n_completed_buffers == 0, "invariant"); - _completed_buffers_tail = NULL; - _process_completed_buffers = false; - } - assert_completed_buffers_list_len_correct_locked(); - bn->set_next(NULL); - return bn; -} - -void PtrQueueSet::abandon_completed_buffers() { - BufferNode* buffers_to_delete = NULL; - { - MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); - buffers_to_delete = _completed_buffers_head; - _completed_buffers_head = NULL; - _completed_buffers_tail = NULL; - _n_completed_buffers = 0; - _process_completed_buffers = false; - } - while (buffers_to_delete != NULL) { - BufferNode* bn = buffers_to_delete; - buffers_to_delete = bn->next(); - bn->set_next(NULL); - deallocate_buffer(bn); - } -} - -#ifdef ASSERT - -void PtrQueueSet::assert_completed_buffers_list_len_correct_locked() { - assert_lock_strong(_cbl_mon); - size_t n = 0; - for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) { - ++n; - } - assert(n == _n_completed_buffers, - "Completed buffer length is wrong: counted: " SIZE_FORMAT - ", expected: " SIZE_FORMAT, n, _n_completed_buffers); -} - -#endif // ASSERT - -// Merge lists of buffers. Notify the processing threads. -// The source queue is emptied as a result. The queues -// must share the monitor. -void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { - assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); - MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); - if (_completed_buffers_tail == NULL) { - assert(_completed_buffers_head == NULL, "Well-formedness"); - _completed_buffers_head = src->_completed_buffers_head; - _completed_buffers_tail = src->_completed_buffers_tail; - } else { - assert(_completed_buffers_head != NULL, "Well formedness"); - if (src->_completed_buffers_head != NULL) { - _completed_buffers_tail->set_next(src->_completed_buffers_head); - _completed_buffers_tail = src->_completed_buffers_tail; - } - } - _n_completed_buffers += src->_n_completed_buffers; - - src->_n_completed_buffers = 0; - src->_completed_buffers_head = NULL; - src->_completed_buffers_tail = NULL; - src->_process_completed_buffers = false; - - assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || - _completed_buffers_head != NULL && _completed_buffers_tail != NULL, - "Sanity"); - assert_completed_buffers_list_len_correct_locked(); -} - -void PtrQueueSet::notify_if_necessary() { - MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); - if (_n_completed_buffers > _process_completed_buffers_threshold) { - _process_completed_buffers = true; - if (_notify_when_complete) - _cbl_mon->notify(); - } -} diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/ptrQueue.hpp --- a/src/hotspot/share/gc/shared/ptrQueue.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/ptrQueue.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -296,35 +296,16 @@ class PtrQueueSet { BufferNode::Allocator* _allocator; - Monitor* _cbl_mon; // Protects the fields below. - BufferNode* _completed_buffers_head; - BufferNode* _completed_buffers_tail; - volatile size_t _n_completed_buffers; - - size_t _process_completed_buffers_threshold; - volatile bool _process_completed_buffers; - - // If true, notify_all on _cbl_mon when the threshold is reached. - bool _notify_when_complete; - - void assert_completed_buffers_list_len_correct_locked() NOT_DEBUG_RETURN; - protected: bool _all_active; // Create an empty ptr queue set. - PtrQueueSet(bool notify_when_complete = false); + PtrQueueSet(); ~PtrQueueSet(); // Because of init-order concerns, we can't pass these as constructor // arguments. - void initialize(Monitor* cbl_mon, BufferNode::Allocator* allocator); - - // For (unlocked!) iteration over the completed buffers. - BufferNode* completed_buffers_head() const { return _completed_buffers_head; } - - // Deallocate all of the completed buffers. - void abandon_completed_buffers(); + void initialize(BufferNode::Allocator* allocator); public: @@ -339,38 +320,13 @@ // is ready to be processed by the collector. It need not be full. // Adds node to the completed buffer list. - void enqueue_completed_buffer(BufferNode* node); - - // If the number of completed buffers is > stop_at, then remove and - // return a completed buffer from the list. Otherwise, return NULL. - BufferNode* get_completed_buffer(size_t stop_at = 0); - - bool process_completed_buffers() { return _process_completed_buffers; } - void set_process_completed_buffers(bool x) { _process_completed_buffers = x; } + virtual void enqueue_completed_buffer(BufferNode* node) = 0; bool is_active() { return _all_active; } size_t buffer_size() const { return _allocator->buffer_size(); } - - // Get/Set the number of completed buffers that triggers log processing. - // Log processing should be done when the number of buffers exceeds the - // threshold. - void set_process_completed_buffers_threshold(size_t sz) { - _process_completed_buffers_threshold = sz; - } - size_t process_completed_buffers_threshold() const { - return _process_completed_buffers_threshold; - } - static const size_t ProcessCompletedBuffersThresholdNever = ~size_t(0); - - size_t completed_buffers_num() const { return _n_completed_buffers; } - - void merge_bufferlists(PtrQueueSet* src); - - // Notify the consumer if the number of buffers crossed the threshold - void notify_if_necessary(); }; #endif // SHARE_GC_SHARED_PTRQUEUE_HPP diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/satbMarkQueue.cpp --- a/src/hotspot/share/gc/shared/satbMarkQueue.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/satbMarkQueue.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -28,12 +28,15 @@ #include "logging/log.hpp" #include "memory/allocation.inline.hpp" #include "oops/oop.inline.hpp" +#include "runtime/atomic.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/os.hpp" #include "runtime/safepoint.hpp" #include "runtime/thread.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vmThread.hpp" +#include "utilities/globalCounter.inline.hpp" SATBMarkQueue::SATBMarkQueue(SATBMarkQueueSet* qset) : // SATB queues are only active during marking cycles. We create @@ -107,15 +110,66 @@ SATBMarkQueueSet::SATBMarkQueueSet() : PtrQueueSet(), + _list(), + _count_and_process_flag(0), + _process_completed_buffers_threshold(SIZE_MAX), _buffer_enqueue_threshold(0) {} -void SATBMarkQueueSet::initialize(Monitor* cbl_mon, - BufferNode::Allocator* allocator, +SATBMarkQueueSet::~SATBMarkQueueSet() { + abandon_completed_buffers(); +} + +// _count_and_process_flag has flag in least significant bit, count in +// remaining bits. _process_completed_buffers_threshold is scaled +// accordingly, with the lsbit set, so a _count_and_process_flag value +// is directly comparable with the recorded threshold value. The +// process flag is set whenever the count exceeds the threshold, and +// remains set until the count is reduced to zero. + +// Increment count. If count > threshold, set flag, else maintain flag. +static void increment_count(volatile size_t* cfptr, size_t threshold) { + size_t old; + size_t value = Atomic::load(cfptr); + do { + old = value; + value += 2; + assert(value > old, "overflow"); + if (value > threshold) value |= 1; + value = Atomic::cmpxchg(value, cfptr, old); + } while (value != old); +} + +// Decrement count. If count == 0, clear flag, else maintain flag. +static void decrement_count(volatile size_t* cfptr) { + size_t old; + size_t value = Atomic::load(cfptr); + do { + assert((value >> 1) != 0, "underflow"); + old = value; + value -= 2; + if (value <= 1) value = 0; + value = Atomic::cmpxchg(value, cfptr, old); + } while (value != old); +} + +// Scale requested threshold to align with count field. If scaling +// overflows, just use max value. Set process flag field to make +// comparison in increment_count exact. +static size_t scale_threshold(size_t value) { + size_t scaled_value = value << 1; + if ((scaled_value >> 1) != value) { + scaled_value = SIZE_MAX; + } + return scaled_value | 1; +} + +void SATBMarkQueueSet::initialize(BufferNode::Allocator* allocator, size_t process_completed_buffers_threshold, uint buffer_enqueue_threshold_percentage) { - PtrQueueSet::initialize(cbl_mon, allocator); - set_process_completed_buffers_threshold(process_completed_buffers_threshold); + PtrQueueSet::initialize(allocator); + _process_completed_buffers_threshold = + scale_threshold(process_completed_buffers_threshold); assert(buffer_size() != 0, "buffer size not initialized"); // Minimum threshold of 1 ensures enqueuing of completely full buffers. size_t size = buffer_size(); @@ -207,6 +261,38 @@ } } +// SATB buffer life-cycle - Per-thread queues obtain buffers from the +// qset's buffer allocator, fill them, and push them onto the qset's +// list. The GC concurrently pops buffers from the qset, processes +// them, and returns them to the buffer allocator for re-use. Both +// the allocator and the qset use lock-free stacks. The ABA problem +// is solved by having both allocation pops and GC pops performed +// within GlobalCounter critical sections, while the return of buffers +// to the allocator performs a GlobalCounter synchronize before +// pushing onto the allocator's list. + +void SATBMarkQueueSet::enqueue_completed_buffer(BufferNode* node) { + assert(node != NULL, "precondition"); + // Increment count and update flag appropriately. Done before + // pushing buffer so count is always at least the actual number in + // the list, and decrement never underflows. + increment_count(&_count_and_process_flag, _process_completed_buffers_threshold); + _list.push(*node); +} + +BufferNode* SATBMarkQueueSet::get_completed_buffer() { + BufferNode* node; + { + GlobalCounter::CriticalSection cs(Thread::current()); + node = _list.pop(); + } + if (node != NULL) { + // Got a buffer so decrement count and update flag appropriately. + decrement_count(&_count_and_process_flag); + } + return node; +} + #ifndef PRODUCT // Helpful for debugging @@ -219,7 +305,7 @@ tty->cr(); tty->print_cr("SATB BUFFERS [%s]", msg); - BufferNode* nd = completed_buffers_head(); + BufferNode* nd = _list.top(); int i = 0; while (nd != NULL) { void** buf = BufferNode::make_buffer_from_node(nd); @@ -248,6 +334,17 @@ } #endif // PRODUCT +void SATBMarkQueueSet::abandon_completed_buffers() { + Atomic::store(size_t(0), &_count_and_process_flag); + BufferNode* buffers_to_delete = _list.pop_all(); + while (buffers_to_delete != NULL) { + BufferNode* bn = buffers_to_delete; + buffers_to_delete = bn->next(); + bn->set_next(NULL); + deallocate_buffer(bn); + } +} + void SATBMarkQueueSet::abandon_partial_marking() { assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); abandon_completed_buffers(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/satbMarkQueue.hpp --- a/src/hotspot/share/gc/shared/satbMarkQueue.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/satbMarkQueue.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -27,6 +27,7 @@ #include "gc/shared/ptrQueue.hpp" #include "memory/allocation.hpp" +#include "memory/padded.hpp" class Thread; class Monitor; @@ -93,7 +94,17 @@ }; class SATBMarkQueueSet: public PtrQueueSet { + + DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, 0); + PaddedEnd _list; + volatile size_t _count_and_process_flag; + // These are rarely (if ever) changed, so same cache line as count. + size_t _process_completed_buffers_threshold; size_t _buffer_enqueue_threshold; + DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, 3 * sizeof(size_t)); + + BufferNode* get_completed_buffer(); + void abandon_completed_buffers(); #ifdef ASSERT void dump_active_states(bool expected_active); @@ -102,15 +113,14 @@ protected: SATBMarkQueueSet(); - ~SATBMarkQueueSet() {} + ~SATBMarkQueueSet(); template void apply_filter(Filter filter, SATBMarkQueue* queue) { queue->apply_filter(filter); } - void initialize(Monitor* cbl_mon, - BufferNode::Allocator* allocator, + void initialize(BufferNode::Allocator* allocator, size_t process_completed_buffers_threshold, uint buffer_enqueue_threshold_percentage); @@ -132,6 +142,19 @@ // buffer; the leading entries may be excluded due to filtering. bool apply_closure_to_completed_buffer(SATBBufferClosure* cl); + virtual void enqueue_completed_buffer(BufferNode* node); + + // The number of buffers in the list. Racy and not updated atomically + // with the set of completed buffers. + size_t completed_buffers_num() const { + return _count_and_process_flag >> 1; + } + + // Return true if completed buffers should be processed. + bool process_completed_buffers() const { + return (_count_and_process_flag & 1) != 0; + } + #ifndef PRODUCT // Helpful for debugging void print_all(const char* msg); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/workerDataArray.hpp --- a/src/hotspot/share/gc/shared/workerDataArray.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/workerDataArray.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -34,7 +34,7 @@ class WorkerDataArray : public CHeapObj { friend class WDAPrinter; public: - static const uint MaxThreadWorkItems = 5; + static const uint MaxThreadWorkItems = 6; private: T* _data; uint _length; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shared/workerDataArray.inline.hpp --- a/src/hotspot/share/gc/shared/workerDataArray.inline.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shared/workerDataArray.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -101,7 +101,7 @@ template void WorkerDataArray::add(uint worker_i, T value) { assert(worker_i < _length, "Worker %d is greater than max: %d", worker_i, _length); - assert(_data[worker_i] != uninitialized(), "No data to add to for worker %d", worker_i); + assert(_data[worker_i] != uninitialized(), "No data to add to %s for worker %d", _title, worker_i); _data[worker_i] += value; } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -31,6 +31,7 @@ #include "gc/shared/gcTimer.hpp" #include "gc/shared/referenceProcessor.hpp" #include "gc/shared/referenceProcessorPhaseTimes.hpp" +#include "gc/shared/strongRootsScope.hpp" #include "gc/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc/shenandoah/shenandoahClosures.inline.hpp" diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -348,7 +348,6 @@ // The call below uses stuff (the SATB* things) that are in G1, but probably // belong into a shared location. ShenandoahBarrierSet::satb_mark_queue_set().initialize(this, - SATB_Q_CBL_mon, 20 /* G1SATBProcessCompletedThreshold */, 60 /* G1SATBBufferEnqueueingThresholdPercent */); @@ -1513,7 +1512,9 @@ // From here on, we need to update references. set_has_forwarded_objects(true); - evacuate_and_update_roots(); + if (!is_degenerated_gc_in_progress()) { + evacuate_and_update_roots(); + } if (ShenandoahPacing) { pacer()->setup_for_evac(); @@ -1521,7 +1522,9 @@ if (ShenandoahVerify) { if (ShenandoahConcurrentRoots::should_do_concurrent_roots()) { - verifier()->verify_roots_no_forwarded_except(ShenandoahRootVerifier::JNIHandleRoots); + ShenandoahRootVerifier::RootTypes types = ShenandoahRootVerifier::combine(ShenandoahRootVerifier::JNIHandleRoots, ShenandoahRootVerifier::WeakRoots); + types = ShenandoahRootVerifier::combine(types, ShenandoahRootVerifier::CLDGRoots); + verifier()->verify_roots_no_forwarded_except(types); } else { verifier()->verify_roots_no_forwarded(); } @@ -1588,6 +1591,8 @@ class ShenandoahConcurrentRootsEvacUpdateTask : public AbstractGangTask { private: ShenandoahJNIHandleRoots _jni_roots; + ShenandoahWeakRoots _weak_roots; + ShenandoahClassLoaderDataRoots _cld_roots; public: ShenandoahConcurrentRootsEvacUpdateTask() : @@ -1597,7 +1602,11 @@ void work(uint worker_id) { ShenandoahEvacOOMScope oom; ShenandoahEvacuateUpdateRootsClosure cl; + CLDToOopClosure clds(&cl, ClassLoaderData::_claim_strong); + _jni_roots.oops_do(&cl); + _cld_roots.cld_do(&clds); + _weak_roots.oops_do(&cl); } }; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -175,8 +175,6 @@ _preserved_marks->restore(&exec); BiasedLocking::restore_marks(); _preserved_marks->reclaim(); - - JvmtiExport::gc_epilogue(); } // Resize metaspace diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -27,16 +27,13 @@ #include "classfile/stringTable.hpp" #include "classfile/systemDictionary.hpp" #include "code/codeCache.hpp" -#include "gc/shenandoah/shenandoahClosures.inline.hpp" #include "gc/shenandoah/shenandoahRootProcessor.inline.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" -#include "gc/shenandoah/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahPhaseTimings.hpp" #include "gc/shenandoah/shenandoahStringDedup.hpp" #include "gc/shenandoah/shenandoahTimingTracker.hpp" #include "gc/shenandoah/shenandoahVMOperations.hpp" -#include "gc/shared/weakProcessor.inline.hpp" -#include "memory/allocation.inline.hpp" +#include "jfr/jfr.hpp" #include "memory/iterator.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" @@ -71,6 +68,40 @@ _jvmti_root.oops_do(cl, worker_id); } +ShenandoahWeakSerialRoot::ShenandoahWeakSerialRoot(ShenandoahWeakSerialRoot::WeakOopsDo weak_oops_do, ShenandoahPhaseTimings::GCParPhases phase) : + _weak_oops_do(weak_oops_do), _phase(phase) { +} + +void ShenandoahWeakSerialRoot::weak_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, uint worker_id) { + if (_claimed.try_set()) { + ShenandoahWorkerTimings* worker_times = ShenandoahHeap::heap()->phase_timings()->worker_times(); + ShenandoahWorkerTimingsTracker timer(worker_times, _phase, worker_id); + _weak_oops_do(is_alive, keep_alive); + } +} + +#if INCLUDE_JVMTI +ShenandoahJVMTIWeakRoot::ShenandoahJVMTIWeakRoot() : + ShenandoahWeakSerialRoot(&JvmtiExport::weak_oops_do, ShenandoahPhaseTimings::JVMTIWeakRoots) { +} +#endif // INCLUDE_JVMTI + +#if INCLUDE_JFR +ShenandoahJFRWeakRoot::ShenandoahJFRWeakRoot() : + ShenandoahWeakSerialRoot(&Jfr::weak_oops_do, ShenandoahPhaseTimings::JFRWeakRoots) { +} +#endif // INCLUDE_JFR + +void ShenandoahSerialWeakRoots::weak_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, uint worker_id) { + JVMTI_ONLY(_jvmti_weak_roots.weak_oops_do(is_alive, keep_alive, worker_id);) + JFR_ONLY(_jfr_weak_roots.weak_oops_do(is_alive, keep_alive, worker_id);) +} + +void ShenandoahSerialWeakRoots::weak_oops_do(OopClosure* cl, uint worker_id) { + AlwaysTrueClosure always_true; + weak_oops_do(&always_true, cl, worker_id); +} + ShenandoahThreadRoots::ShenandoahThreadRoots(bool is_par) : _is_par(is_par) { Threads::change_thread_claim_token(); } @@ -93,17 +124,6 @@ Threads::assert_all_threads_claimed(); } -ShenandoahWeakRoots::ShenandoahWeakRoots(uint n_workers) : - _process_timings(n_workers), - _task(&_process_timings, n_workers) { -} - -ShenandoahWeakRoots::~ShenandoahWeakRoots() { - ShenandoahWorkerTimings* worker_times = ShenandoahHeap::heap()->phase_timings()->worker_times(); - ShenandoahTimingConverter::weak_processing_timing_to_shenandoah_timing(&_process_timings, - worker_times); -} - ShenandoahStringDedupRoots::ShenandoahStringDedupRoots() { if (ShenandoahStringDedup::is_enabled()) { StringDedup::gc_prologue(false); @@ -137,39 +157,37 @@ ShenandoahRootEvacuator::ShenandoahRootEvacuator(uint n_workers, ShenandoahPhaseTimings::Phase phase, bool include_concurrent_roots) : ShenandoahRootProcessor(phase), _thread_roots(n_workers > 1), - _weak_roots(n_workers), _include_concurrent_roots(include_concurrent_roots) { } void ShenandoahRootEvacuator::roots_do(uint worker_id, OopClosure* oops) { MarkingCodeBlobClosure blobsCl(oops, CodeBlobToOopClosure::FixRelocations); - CLDToOopClosure clds(oops, ClassLoaderData::_claim_strong); AlwaysTrueClosure always_true; _serial_roots.oops_do(oops, worker_id); + _serial_weak_roots.weak_oops_do(oops, worker_id); if (_include_concurrent_roots) { + CLDToOopClosure clds(oops, ClassLoaderData::_claim_strong); _jni_roots.oops_do(oops, worker_id); + _cld_roots.cld_do(&clds, worker_id); + _weak_roots.oops_do(oops, worker_id); } _thread_roots.oops_do(oops, NULL, worker_id); - _cld_roots.cld_do(&clds, worker_id); _code_roots.code_blobs_do(&blobsCl, worker_id); - _weak_roots.oops_do(&always_true, oops, worker_id); _dedup_roots.oops_do(&always_true, oops, worker_id); } ShenandoahRootUpdater::ShenandoahRootUpdater(uint n_workers, ShenandoahPhaseTimings::Phase phase, bool update_code_cache) : ShenandoahRootProcessor(phase), _thread_roots(n_workers > 1), - _weak_roots(n_workers), _update_code_cache(update_code_cache) { } ShenandoahRootAdjuster::ShenandoahRootAdjuster(uint n_workers, ShenandoahPhaseTimings::Phase phase) : ShenandoahRootProcessor(phase), - _thread_roots(n_workers > 1), - _weak_roots(n_workers) { + _thread_roots(n_workers > 1) { assert(ShenandoahHeap::heap()->is_full_gc_in_progress(), "Full GC only"); } @@ -185,7 +203,8 @@ _cld_roots.cld_do(&adjust_cld_closure, worker_id); _code_roots.code_blobs_do(&adjust_code_closure, worker_id); - _weak_roots.oops_do(&always_true, oops, worker_id); + _serial_weak_roots.weak_oops_do(oops, worker_id); + _weak_roots.oops_do(oops, worker_id); _dedup_roots.oops_do(&always_true, oops, worker_id); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -29,11 +29,7 @@ #include "gc/shenandoah/shenandoahCodeRoots.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahPhaseTimings.hpp" -#include "gc/shared/strongRootsScope.hpp" -#include "gc/shared/weakProcessor.hpp" -#include "gc/shared/weakProcessorPhaseTimes.hpp" -#include "gc/shared/workgroup.hpp" -#include "memory/allocation.hpp" +#include "gc/shenandoah/shenandoahSharedVariables.hpp" #include "memory/iterator.hpp" class ShenandoahSerialRoot { @@ -61,10 +57,102 @@ void oops_do(OopClosure* cl, uint worker_id); }; +class ShenandoahWeakSerialRoot { + typedef void (*WeakOopsDo)(BoolObjectClosure*, OopClosure*); +private: + ShenandoahSharedFlag _claimed; + const WeakOopsDo _weak_oops_do; + const ShenandoahPhaseTimings::GCParPhases _phase; + +public: + ShenandoahWeakSerialRoot(WeakOopsDo oops_do, ShenandoahPhaseTimings::GCParPhases); + void weak_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, uint worker_id); +}; + +#if INCLUDE_JVMTI +class ShenandoahJVMTIWeakRoot : public ShenandoahWeakSerialRoot { +public: + ShenandoahJVMTIWeakRoot(); +}; +#endif // INCLUDE_JVMTI + +#if INCLUDE_JFR +class ShenandoahJFRWeakRoot : public ShenandoahWeakSerialRoot { +public: + ShenandoahJFRWeakRoot(); +}; +#endif // INCLUDE_JFR + +class ShenandoahSerialWeakRoots { +private: + JVMTI_ONLY(ShenandoahJVMTIWeakRoot _jvmti_weak_roots;) + JFR_ONLY(ShenandoahJFRWeakRoot _jfr_weak_roots;) +public: + void weak_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, uint worker_id); + void weak_oops_do(OopClosure* cl, uint worker_id); +}; + +template +class ShenandoahWeakRoot { +private: + OopStorage::ParState _itr; + const ShenandoahPhaseTimings::GCParPhases _phase; +public: + ShenandoahWeakRoot(OopStorage* storage, ShenandoahPhaseTimings::GCParPhases phase); + + template + void oops_do(Closure* cl, uint worker_id); +}; + +template <> +class ShenandoahWeakRoot { +private: + OopStorage::ParState _itr; + const ShenandoahPhaseTimings::GCParPhases _phase; + +public: + ShenandoahWeakRoot(OopStorage* storage, ShenandoahPhaseTimings::GCParPhases phase); + + template + void weak_oops_do(IsAliveClosure* is_alive, KeepAliveClosure* keep_alive, uint worker_id); +}; + +template +class ShenandoahWeakRoots { +private: + ShenandoahWeakRoot _jni_roots; + ShenandoahWeakRoot _string_table_roots; + ShenandoahWeakRoot _resolved_method_table_roots; + ShenandoahWeakRoot _vm_roots; + +public: + ShenandoahWeakRoots(); + + template + void oops_do(Closure* cl, uint worker_id = 0); +}; + +template <> +class ShenandoahWeakRoots { +private: + ShenandoahWeakRoot _jni_roots; + ShenandoahWeakRoot _string_table_roots; + ShenandoahWeakRoot _resolved_method_table_roots; + ShenandoahWeakRoot _vm_roots; +public: + ShenandoahWeakRoots(); + + template + void oops_do(Closure* cl, uint worker_id = 0); + + template + void weak_oops_do(IsAliveClosure* is_alive, KeepAliveClosure* keep_alive, uint worker_id); +}; + template class ShenandoahJNIHandleRoots { private: - OopStorage::ParState _itr; + OopStorage::ParState _itr; public: ShenandoahJNIHandleRoots(); @@ -83,18 +171,6 @@ void threads_do(ThreadClosure* tc, uint worker_id); }; -class ShenandoahWeakRoots { -private: - WeakProcessorPhaseTimes _process_timings; - WeakProcessor::Task _task; -public: - ShenandoahWeakRoots(uint n_workers); - ~ShenandoahWeakRoots(); - - template - void oops_do(IsAlive* is_alive, KeepAlive* keep_alive, uint worker_id); -}; - class ShenandoahStringDedupRoots { public: ShenandoahStringDedupRoots(); @@ -114,13 +190,14 @@ void code_blobs_do(CodeBlobClosure* blob_cl, uint worker_id); }; -template +template class ShenandoahClassLoaderDataRoots { public: ShenandoahClassLoaderDataRoots(); + ~ShenandoahClassLoaderDataRoots(); - void always_strong_cld_do(CLDClosure* clds, uint worker_id); - void cld_do(CLDClosure* clds, uint worker_id); + void always_strong_cld_do(CLDClosure* clds, uint worker_id = 0); + void cld_do(CLDClosure* clds, uint worker_id = 0); }; class ShenandoahRootProcessor : public StackObj { @@ -141,7 +218,8 @@ ShenandoahThreadRoots _thread_roots; ShenandoahCodeCacheRoots _code_roots; ShenandoahJNIHandleRoots _jni_roots; - ShenandoahClassLoaderDataRoots _cld_roots; + ShenandoahClassLoaderDataRoots + _cld_roots; public: ShenandoahRootScanner(uint n_workers, ShenandoahPhaseTimings::Phase phase); @@ -166,7 +244,8 @@ ShenandoahSerialRoots _serial_roots; ShenandoahThreadRoots _thread_roots; ShenandoahJNIHandleRoots _jni_roots; - ShenandoahClassLoaderDataRoots _cld_roots; + ShenandoahClassLoaderDataRoots + _cld_roots; ShenandoahCodeCacheRoots _code_roots; public: @@ -181,9 +260,11 @@ private: ShenandoahSerialRoots _serial_roots; ShenandoahJNIHandleRoots _jni_roots; - ShenandoahClassLoaderDataRoots _cld_roots; + ShenandoahClassLoaderDataRoots + _cld_roots; ShenandoahThreadRoots _thread_roots; - ShenandoahWeakRoots _weak_roots; + ShenandoahSerialWeakRoots _serial_weak_roots; + ShenandoahWeakRoots _weak_roots; ShenandoahStringDedupRoots _dedup_roots; ShenandoahCodeCacheRoots _code_roots; bool _include_concurrent_roots; @@ -199,9 +280,11 @@ private: ShenandoahSerialRoots _serial_roots; ShenandoahJNIHandleRoots _jni_roots; - ShenandoahClassLoaderDataRoots _cld_roots; + ShenandoahClassLoaderDataRoots + _cld_roots; ShenandoahThreadRoots _thread_roots; - ShenandoahWeakRoots _weak_roots; + ShenandoahSerialWeakRoots _serial_weak_roots; + ShenandoahWeakRoots _weak_roots; ShenandoahStringDedupRoots _dedup_roots; ShenandoahCodeCacheRoots _code_roots; const bool _update_code_cache; @@ -218,9 +301,11 @@ private: ShenandoahSerialRoots _serial_roots; ShenandoahJNIHandleRoots _jni_roots; - ShenandoahClassLoaderDataRoots _cld_roots; + ShenandoahClassLoaderDataRoots + _cld_roots; ShenandoahThreadRoots _thread_roots; - ShenandoahWeakRoots _weak_roots; + ShenandoahSerialWeakRoots _serial_weak_roots; + ShenandoahWeakRoots _weak_roots; ShenandoahStringDedupRoots _dedup_roots; ShenandoahCodeCacheRoots _code_roots; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -25,15 +25,84 @@ #define SHARE_GC_SHENANDOAH_SHENANDOAHROOTPROCESSOR_INLINE_HPP #include "classfile/classLoaderDataGraph.hpp" +#include "classfile/stringTable.hpp" +#include "classfile/systemDictionary.hpp" #include "gc/shared/oopStorageParState.inline.hpp" #include "gc/shenandoah/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahRootProcessor.hpp" #include "gc/shenandoah/shenandoahTimingTracker.hpp" #include "gc/shenandoah/shenandoahUtils.hpp" #include "memory/resourceArea.hpp" +#include "prims/resolvedMethodTable.hpp" #include "runtime/safepoint.hpp" template +inline ShenandoahWeakRoot::ShenandoahWeakRoot(OopStorage* storage, ShenandoahPhaseTimings::GCParPhases phase) : + _itr(storage), _phase(phase) { +} + +template +template +inline void ShenandoahWeakRoot::oops_do(Closure* cl, uint worker_id) { + if (CONCURRENT) { + _itr.oops_do(cl); + } else { + ShenandoahWorkerTimings* worker_times = ShenandoahHeap::heap()->phase_timings()->worker_times(); + ShenandoahWorkerTimingsTracker timer(worker_times, ShenandoahPhaseTimings::ThreadRoots, worker_id); + _itr.oops_do(cl); + } +} + +inline ShenandoahWeakRoot::ShenandoahWeakRoot(OopStorage* storage, ShenandoahPhaseTimings::GCParPhases phase) : + _itr(storage), _phase(phase) { +} + +template +void ShenandoahWeakRoot::weak_oops_do(IsAliveClosure* is_alive, KeepAliveClosure* keep_alive, uint worker_id) { + ShenandoahWorkerTimings* worker_times = ShenandoahHeap::heap()->phase_timings()->worker_times(); + ShenandoahWorkerTimingsTracker timer(worker_times, _phase, worker_id); + _itr.weak_oops_do(is_alive, keep_alive); +} + +template +ShenandoahWeakRoots::ShenandoahWeakRoots() : + _jni_roots(JNIHandles::weak_global_handles(), ShenandoahPhaseTimings::JNIWeakRoots), + _string_table_roots(StringTable::weak_storage(), ShenandoahPhaseTimings::StringTableRoots), + _resolved_method_table_roots(ResolvedMethodTable::weak_storage(), ShenandoahPhaseTimings::ResolvedMethodTableRoots), + _vm_roots(SystemDictionary::vm_weak_oop_storage(), ShenandoahPhaseTimings::VMWeakRoots) { +} + +template +template +void ShenandoahWeakRoots::oops_do(Closure* cl, uint worker_id) { + _jni_roots.oops_do(cl, worker_id); + _string_table_roots.oops_do(cl, worker_id); + _resolved_method_table_roots.oops_do(cl, worker_id); + _vm_roots.oops_do(cl, worker_id); +} + +inline ShenandoahWeakRoots::ShenandoahWeakRoots() : + _jni_roots(JNIHandles::weak_global_handles(), ShenandoahPhaseTimings::JNIWeakRoots), + _string_table_roots(StringTable::weak_storage(), ShenandoahPhaseTimings::StringTableRoots), + _resolved_method_table_roots(ResolvedMethodTable::weak_storage(), ShenandoahPhaseTimings::ResolvedMethodTableRoots), + _vm_roots(SystemDictionary::vm_weak_oop_storage(), ShenandoahPhaseTimings::VMWeakRoots) { +} + +template +void ShenandoahWeakRoots::weak_oops_do(IsAliveClosure* is_alive, KeepAliveClosure* keep_alive, uint worker_id) { + _jni_roots.weak_oops_do(is_alive, keep_alive, worker_id); + _string_table_roots.weak_oops_do(is_alive, keep_alive, worker_id); + _resolved_method_table_roots.weak_oops_do(is_alive, keep_alive, worker_id); + _vm_roots.weak_oops_do(is_alive, keep_alive, worker_id); +} + +template +void ShenandoahWeakRoots::oops_do(Closure* cl, uint worker_id) { + AlwaysTrueClosure always_true; + weak_oops_do(&always_true, cl, worker_id); +} + +template ShenandoahJNIHandleRoots::ShenandoahJNIHandleRoots() : _itr(JNIHandles::global_handles()) { } @@ -50,24 +119,32 @@ } } -template -void ShenandoahWeakRoots::oops_do(IsAlive* is_alive, KeepAlive* keep_alive, uint worker_id) { - _task.work(worker_id, is_alive, keep_alive); -} - -template -ShenandoahClassLoaderDataRoots::ShenandoahClassLoaderDataRoots() { +template +ShenandoahClassLoaderDataRoots::ShenandoahClassLoaderDataRoots() { if (!SINGLE_THREADED) { ClassLoaderDataGraph::clear_claimed_marks(); } + if (CONCURRENT) { + ClassLoaderDataGraph_lock->lock(); + } } -template -void ShenandoahClassLoaderDataRoots::always_strong_cld_do(CLDClosure* clds, uint worker_id) { +template +ShenandoahClassLoaderDataRoots::~ShenandoahClassLoaderDataRoots() { + if (CONCURRENT) { + ClassLoaderDataGraph_lock->unlock(); + } +} + + +template +void ShenandoahClassLoaderDataRoots::always_strong_cld_do(CLDClosure* clds, uint worker_id) { if (SINGLE_THREADED) { assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Single threaded CLDG iteration can only be done by VM thread"); ClassLoaderDataGraph::always_strong_cld_do(clds); + } else if (CONCURRENT) { + ClassLoaderDataGraph::always_strong_cld_do(clds); } else { ShenandoahWorkerTimings* worker_times = ShenandoahHeap::heap()->phase_timings()->worker_times(); ShenandoahWorkerTimingsTracker timer(worker_times, ShenandoahPhaseTimings::CLDGRoots, worker_id); @@ -75,13 +152,15 @@ } } -template -void ShenandoahClassLoaderDataRoots::cld_do(CLDClosure* clds, uint worker_id) { +template +void ShenandoahClassLoaderDataRoots::cld_do(CLDClosure* clds, uint worker_id) { if (SINGLE_THREADED) { assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Single threaded CLDG iteration can only be done by VM thread"); ClassLoaderDataGraph::cld_do(clds); - } else { + } else if (CONCURRENT) { + ClassLoaderDataGraph::cld_do(clds); + } else { ShenandoahWorkerTimings* worker_times = ShenandoahHeap::heap()->phase_timings()->worker_times(); ShenandoahWorkerTimingsTracker timer(worker_times, ShenandoahPhaseTimings::CLDGRoots, worker_id); ClassLoaderDataGraph::cld_do(clds); @@ -197,7 +276,8 @@ _code_roots.code_blobs_do(&update_blobs, worker_id); } - _weak_roots.oops_do(is_alive, keep_alive, worker_id); + _serial_weak_roots.weak_oops_do(is_alive, keep_alive, worker_id); + _weak_roots.weak_oops_do(is_alive, keep_alive, worker_id); _dedup_roots.oops_do(is_alive, keep_alive, worker_id); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -54,6 +54,10 @@ return (_types & type) != 0; } +ShenandoahRootVerifier::RootTypes ShenandoahRootVerifier::combine(RootTypes t1, RootTypes t2) { + return static_cast(static_cast(t1) | static_cast(t2)); +} + void ShenandoahRootVerifier::oops_do(OopClosure* oops) { CodeBlobToOopClosure blobs(oops, !CodeBlobToOopClosure::FixRelocations); if (verify(CodeRoots)) { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootVerifier.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -52,6 +52,8 @@ // Used to seed ShenandoahVerifier, do not honor root type filter void roots_do(OopClosure* cl); void strong_roots_do(OopClosure* cl); + + static RootTypes combine(RootTypes t1, RootTypes t2); private: bool verify(RootTypes type) const; }; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -33,11 +33,9 @@ {} void ShenandoahSATBMarkQueueSet::initialize(ShenandoahHeap* const heap, - Monitor* cbl_mon, int process_completed_threshold, uint buffer_enqueue_threshold_percentage) { - SATBMarkQueueSet::initialize(cbl_mon, - &_satb_mark_queue_buffer_allocator, + SATBMarkQueueSet::initialize(&_satb_mark_queue_buffer_allocator, process_completed_threshold, buffer_enqueue_threshold_percentage); _heap = heap; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -44,7 +44,6 @@ ShenandoahSATBMarkQueueSet(); void initialize(ShenandoahHeap* const heap, - Monitor* cbl_mon, int process_completed_threshold, uint buffer_enqueue_threshold_percentage); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/z/zBarrierSetNMethod.cpp --- a/src/hotspot/share/gc/z/zBarrierSetNMethod.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/z/zBarrierSetNMethod.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,7 +45,7 @@ // We don't need to take the lock when unlinking nmethods from // the Method, because it is only concurrently unlinked by // the entry barrier, which acquires the per nmethod lock. - nm->unlink_from_method(); + nm->unlink_from_method(false /* acquire_lock */); // We can end up calling nmethods that are unloading // since we clear compiled ICs lazily. Returning false diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/z/zNMethod.cpp --- a/src/hotspot/share/gc/z/zNMethod.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/z/zNMethod.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -285,7 +285,7 @@ // We don't need to take the lock when unlinking nmethods from // the Method, because it is only concurrently unlinked by // the entry barrier, which acquires the per nmethod lock. - nm->unlink_from_method(); + nm->unlink_from_method(false /* acquire_lock */); return; } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/gc/z/zRootsIterator.cpp --- a/src/hotspot/share/gc/z/zRootsIterator.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/gc/z/zRootsIterator.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -198,7 +198,6 @@ } else { ZNMethod::oops_do_end(); } - JvmtiExport::gc_epilogue(); COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); Threads::assert_all_threads_claimed(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/jvmci/jvmciCompilerToVM.cpp --- a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -568,6 +568,33 @@ return JVMCIENV->get_jobject(result); C2V_END +C2V_VMENTRY_NULL(jobject, getArrayType, (JNIEnv* env, jobject, jobject jvmci_type)) + if (jvmci_type == NULL) { + JVMCI_THROW_0(NullPointerException); + } + + JVMCIObject jvmci_type_object = JVMCIENV->wrap(jvmci_type); + JVMCIKlassHandle array_klass(THREAD); + if (JVMCIENV->isa_HotSpotResolvedPrimitiveType(jvmci_type_object)) { + BasicType type = JVMCIENV->kindToBasicType(JVMCIENV->get_HotSpotResolvedPrimitiveType_kind(jvmci_type_object), JVMCI_CHECK_0); + if (type == T_VOID) { + return NULL; + } + array_klass = Universe::typeArrayKlassObj(type); + if (array_klass == NULL) { + JVMCI_THROW_MSG_NULL(InternalError, err_msg("No array klass for primitive type %s", type2name(type))); + } + } else { + Klass* klass = JVMCIENV->asKlass(jvmci_type); + if (klass == NULL) { + JVMCI_THROW_0(NullPointerException); + } + array_klass = klass->array_klass(CHECK_NULL); + } + JVMCIObject result = JVMCIENV->get_jvmci_type(array_klass, JVMCI_CHECK_NULL); + return JVMCIENV->get_jobject(result); +C2V_END + C2V_VMENTRY_NULL(jobject, lookupClass, (JNIEnv* env, jobject, jclass mirror)) requireInHotSpot("lookupClass", JVMCI_CHECK_NULL); if (mirror == NULL) { @@ -582,12 +609,6 @@ return JVMCIENV->get_jobject(result); } -C2V_VMENTRY_NULL(jobject, resolveConstantInPool, (JNIEnv* env, jobject, jobject jvmci_constant_pool, jint index)) - constantPoolHandle cp = JVMCIENV->asConstantPool(jvmci_constant_pool); - oop result = cp->resolve_constant_at(index, CHECK_NULL); - return JVMCIENV->get_jobject(JVMCIENV->get_object_constant(result)); -C2V_END - C2V_VMENTRY_NULL(jobject, resolvePossiblyCachedConstantInPool, (JNIEnv* env, jobject, jobject jvmci_constant_pool, jint index)) constantPoolHandle cp = JVMCIENV->asConstantPool(jvmci_constant_pool); oop result = cp->resolve_possibly_cached_constant_at(index, CHECK_NULL); @@ -2578,6 +2599,18 @@ return FailedSpeculation::add_failed_speculation(NULL, (FailedSpeculation**)(address) failed_speculations_address, (address) speculation, speculation_len); } +C2V_VMENTRY(void, callSystemExit, (JNIEnv* env, jobject, jint status)) + JavaValue result(T_VOID); + JavaCallArguments jargs(1); + jargs.push_int(status); + JavaCalls::call_static(&result, + SystemDictionary::System_klass(), + vmSymbols::exit_method_name(), + vmSymbols::int_void_signature(), + &jargs, + CHECK); +} + #define CC (char*) /*cast a literal from (const char*)*/ #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(c2v_ ## f)) @@ -2624,6 +2657,7 @@ {CC "hasNeverInlineDirective", CC "(" HS_RESOLVED_METHOD ")Z", FN_PTR(hasNeverInlineDirective)}, {CC "shouldInlineMethod", CC "(" HS_RESOLVED_METHOD ")Z", FN_PTR(shouldInlineMethod)}, {CC "lookupType", CC "(" STRING HS_RESOLVED_KLASS "Z)" HS_RESOLVED_TYPE, FN_PTR(lookupType)}, + {CC "getArrayType", CC "(" HS_RESOLVED_TYPE ")" HS_RESOLVED_KLASS, FN_PTR(getArrayType)}, {CC "lookupClass", CC "(" CLASS ")" HS_RESOLVED_TYPE, FN_PTR(lookupClass)}, {CC "lookupNameInPool", CC "(" HS_CONSTANT_POOL "I)" STRING, FN_PTR(lookupNameInPool)}, {CC "lookupNameAndTypeRefIndexInPool", CC "(" HS_CONSTANT_POOL "I)I", FN_PTR(lookupNameAndTypeRefIndexInPool)}, @@ -2633,7 +2667,6 @@ {CC "lookupAppendixInPool", CC "(" HS_CONSTANT_POOL "I)" OBJECTCONSTANT, FN_PTR(lookupAppendixInPool)}, {CC "lookupMethodInPool", CC "(" HS_CONSTANT_POOL "IB)" HS_RESOLVED_METHOD, FN_PTR(lookupMethodInPool)}, {CC "constantPoolRemapInstructionOperandFromCache", CC "(" HS_CONSTANT_POOL "I)I", FN_PTR(constantPoolRemapInstructionOperandFromCache)}, - {CC "resolveConstantInPool", CC "(" HS_CONSTANT_POOL "I)" OBJECTCONSTANT, FN_PTR(resolveConstantInPool)}, {CC "resolvePossiblyCachedConstantInPool", CC "(" HS_CONSTANT_POOL "I)" OBJECTCONSTANT, FN_PTR(resolvePossiblyCachedConstantInPool)}, {CC "resolveTypeInPool", CC "(" HS_CONSTANT_POOL "I)" HS_RESOLVED_KLASS, FN_PTR(resolveTypeInPool)}, {CC "resolveFieldInPool", CC "(" HS_CONSTANT_POOL "I" HS_RESOLVED_METHOD "B[I)" HS_RESOLVED_KLASS, FN_PTR(resolveFieldInPool)}, @@ -2723,6 +2756,7 @@ {CC "getFailedSpeculationsAddress", CC "(" HS_RESOLVED_METHOD ")J", FN_PTR(getFailedSpeculationsAddress)}, {CC "releaseFailedSpeculations", CC "(J)V", FN_PTR(releaseFailedSpeculations)}, {CC "addFailedSpeculation", CC "(J[B)Z", FN_PTR(addFailedSpeculation)}, + {CC "callSystemExit", CC "(I)V", FN_PTR(callSystemExit)}, }; int CompilerToVM::methods_count() { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/jvmci/jvmciEnv.cpp --- a/src/hotspot/share/jvmci/jvmciEnv.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/jvmci/jvmciEnv.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -31,7 +31,6 @@ #include "memory/universe.hpp" #include "oops/objArrayKlass.hpp" #include "oops/typeArrayOop.inline.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/jniHandles.inline.hpp" #include "runtime/javaCalls.hpp" #include "jvmci/jniAccessMark.inline.hpp" @@ -1361,6 +1360,9 @@ return Handle(THREAD, obj); } else if (isa_IndirectHotSpotObjectConstantImpl(constant)) { jlong object_handle = get_IndirectHotSpotObjectConstantImpl_objectHandle(constant); + if (object_handle == 0L) { + JVMCI_THROW_MSG_(NullPointerException, "Foreign object reference has been cleared", Handle()); + } oop result = resolve_handle(object_handle); if (result == NULL) { JVMCI_THROW_MSG_(InternalError, "Constant was unexpectedly NULL", Handle()); @@ -1490,7 +1492,8 @@ // Invalidating the HotSpotNmethod means we want the nmethod // to be deoptimized. nm->mark_for_deoptimization(); - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } // A HotSpotNmethod instance can only reference a single nmethod diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/oops/method.cpp --- a/src/hotspot/share/oops/method.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/oops/method.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -103,7 +103,7 @@ // Fix and bury in Method* set_interpreter_entry(NULL); // sets i2i entry and from_int set_adapter_entry(NULL); - Method::clear_code(); // from_c/from_i get set to c2i/i2i + clear_code(false /* don't need a lock */); // from_c/from_i get set to c2i/i2i if (access_flags.is_native()) { clear_native_function(); @@ -819,7 +819,7 @@ set_native_function( SharedRuntime::native_method_throw_unsatisfied_link_error_entry(), !native_bind_event_is_interesting); - this->unlink_code(); + clear_code(); } address Method::critical_native_function() { @@ -943,7 +943,8 @@ } // Revert to using the interpreter and clear out the nmethod -void Method::clear_code() { +void Method::clear_code(bool acquire_lock /* = true */) { + MutexLocker pl(acquire_lock ? Patching_lock : NULL, Mutex::_no_safepoint_check_flag); // this may be NULL if c2i adapters have not been made yet // Only should happen at allocate time. if (adapter() == NULL) { @@ -957,25 +958,6 @@ _code = NULL; } -void Method::unlink_code(CompiledMethod *compare) { - MutexLocker ml(CompiledMethod_lock->owned_by_self() ? NULL : CompiledMethod_lock, Mutex::_no_safepoint_check_flag); - // We need to check if either the _code or _from_compiled_code_entry_point - // refer to this nmethod because there is a race in setting these two fields - // in Method* as seen in bugid 4947125. - // If the vep() points to the zombie nmethod, the memory for the nmethod - // could be flushed and the compiler and vtable stubs could still call - // through it. - if (code() == compare || - from_compiled_entry() == compare->verified_entry_point()) { - clear_code(); - } -} - -void Method::unlink_code() { - MutexLocker ml(CompiledMethod_lock->owned_by_self() ? NULL : CompiledMethod_lock, Mutex::_no_safepoint_check_flag); - clear_code(); -} - #if INCLUDE_CDS // Called by class data sharing to remove any entry points (which are not shared) void Method::unlink_method() { @@ -1202,7 +1184,7 @@ // Install compiled code. Instantly it can execute. void Method::set_code(const methodHandle& mh, CompiledMethod *code) { - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); assert( code, "use clear_code to remove code" ); assert( mh->check_code(), "" ); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/oops/method.hpp --- a/src/hotspot/share/oops/method.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/oops/method.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -463,17 +463,7 @@ address verified_code_entry(); bool check_code() const; // Not inline to avoid circular ref CompiledMethod* volatile code() const; - - // Locks CompiledMethod_lock if not held. - void unlink_code(CompiledMethod *compare); - // Locks CompiledMethod_lock if not held. - void unlink_code(); - -private: - // Either called with CompiledMethod_lock held or from constructor. - void clear_code(); - -public: + void clear_code(bool acquire_lock = true); // Clear out any compiled code static void set_code(const methodHandle& mh, CompiledMethod* code); void set_adapter_entry(AdapterHandlerEntry* adapter) { constMethod()->set_adapter_entry(adapter); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/opto/library_call.cpp --- a/src/hotspot/share/opto/library_call.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/opto/library_call.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -4240,6 +4240,14 @@ // Do not let writes of the copy source or destination float below the copy. insert_mem_bar(Op_MemBarCPUOrder); + Node* thread = _gvn.transform(new ThreadLocalNode()); + Node* doing_unsafe_access_addr = basic_plus_adr(top(), thread, in_bytes(JavaThread::doing_unsafe_access_offset())); + BasicType doing_unsafe_access_bt = T_BYTE; + assert((sizeof(bool) * CHAR_BIT) == 8, "not implemented"); + + // update volatile field + store_to_memory(control(), doing_unsafe_access_addr, intcon(1), doing_unsafe_access_bt, Compile::AliasIdxRaw, MemNode::unordered); + // Call it. Note that the length argument is not scaled. make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::fast_arraycopy_Type(), @@ -4248,6 +4256,8 @@ TypeRawPtr::BOTTOM, src, dst, size XTOP); + store_to_memory(control(), doing_unsafe_access_addr, intcon(0), doing_unsafe_access_bt, Compile::AliasIdxRaw, MemNode::unordered); + // Do not let reads of the copy destination float above the copy. insert_mem_bar(Op_MemBarCPUOrder); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/jvmtiEventController.cpp --- a/src/hotspot/share/prims/jvmtiEventController.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/jvmtiEventController.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -32,7 +32,6 @@ #include "prims/jvmtiExport.hpp" #include "prims/jvmtiImpl.hpp" #include "prims/jvmtiThreadState.inline.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" @@ -240,7 +239,8 @@ } } if (num_marked > 0) { - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/jvmtiExport.cpp --- a/src/hotspot/share/prims/jvmtiExport.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/jvmtiExport.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -2615,10 +2615,6 @@ JvmtiTagMap::weak_oops_do(is_alive, f); } -void JvmtiExport::gc_epilogue() { - JvmtiCurrentBreakpoints::gc_epilogue(); -} - // Onload raw monitor transition. void JvmtiExport::transition_pending_onload_raw_monitors() { JvmtiPendingMonitors::transition_raw_monitors(); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/jvmtiExport.hpp --- a/src/hotspot/share/prims/jvmtiExport.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/jvmtiExport.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -388,7 +388,6 @@ static void oops_do(OopClosure* f) NOT_JVMTI_RETURN; static void weak_oops_do(BoolObjectClosure* b, OopClosure* f) NOT_JVMTI_RETURN; - static void gc_epilogue() NOT_JVMTI_RETURN; static void transition_pending_onload_raw_monitors() NOT_JVMTI_RETURN; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/jvmtiImpl.cpp --- a/src/hotspot/share/prims/jvmtiImpl.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/jvmtiImpl.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -225,13 +225,6 @@ } } -void GrowableCache::gc_epilogue() { - int len = _elements->length(); - for (int i=0; iat(i)->getCacheValue(); - } -} - // // class JvmtiBreakpoint // @@ -389,10 +382,6 @@ _bps.metadata_do(f); } -void JvmtiBreakpoints::gc_epilogue() { - _bps.gc_epilogue(); -} - void JvmtiBreakpoints::print() { #ifndef PRODUCT LogTarget(Trace, jvmti) log; @@ -514,12 +503,6 @@ } } -void JvmtiCurrentBreakpoints::gc_epilogue() { - if (_jvmti_breakpoints != NULL) { - _jvmti_breakpoints->gc_epilogue(); - } -} - /////////////////////////////////////////////////////////////// // // class VM_GetOrSetLocal diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/jvmtiImpl.hpp --- a/src/hotspot/share/prims/jvmtiImpl.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/jvmtiImpl.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -120,8 +120,6 @@ void oops_do(OopClosure* f); // walk metadata to preserve for RedefineClasses void metadata_do(void f(Metadata*)); - // update the cache after a full gc - void gc_epilogue(); }; @@ -154,7 +152,6 @@ void clear() { _cache.clear(); } void oops_do(OopClosure* f) { _cache.oops_do(f); } void metadata_do(void f(Metadata*)) { _cache.metadata_do(f); } - void gc_epilogue() { _cache.gc_epilogue(); } }; @@ -257,7 +254,6 @@ int set(JvmtiBreakpoint& bp); int clear(JvmtiBreakpoint& bp); void clearall_in_class_at_safepoint(Klass* klass); - void gc_epilogue(); }; @@ -299,7 +295,6 @@ static void oops_do(OopClosure* f); static void metadata_do(void f(Metadata*)) NOT_JVMTI_RETURN; - static void gc_epilogue(); }; /////////////////////////////////////////////////////////////// diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/methodHandles.cpp --- a/src/hotspot/share/prims/methodHandles.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/methodHandles.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -42,7 +42,6 @@ #include "oops/typeArrayOop.inline.hpp" #include "prims/methodHandles.hpp" #include "runtime/compilationPolicy.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/fieldDescriptor.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" @@ -1110,7 +1109,8 @@ } if (marked > 0) { // At least one nmethod has been marked for deoptimization. - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } } @@ -1506,7 +1506,8 @@ } if (marked > 0) { // At least one nmethod has been marked for deoptimization - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/resolvedMethodTable.cpp --- a/src/hotspot/share/prims/resolvedMethodTable.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/resolvedMethodTable.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -56,15 +56,15 @@ return name_hash ^ signature_hash; } -typedef ConcurrentHashTable, - ResolvedMethodTableConfig, +typedef ConcurrentHashTable ResolvedMethodTableHash; -class ResolvedMethodTableConfig : public ResolvedMethodTableHash::BaseConfig { +class ResolvedMethodTableConfig : public AllStatic { private: public: - static uintx get_hash(WeakHandle const& value, - bool* is_dead) { + typedef WeakHandle Value; + + static uintx get_hash(Value const& value, bool* is_dead) { oop val_oop = value.peek(); if (val_oop == NULL) { *is_dead = true; @@ -76,13 +76,13 @@ } // We use default allocation/deallocation but counted - static void* allocate_node(size_t size, WeakHandle const& value) { + static void* allocate_node(size_t size, Value const& value) { ResolvedMethodTable::item_added(); - return ResolvedMethodTableHash::BaseConfig::allocate_node(size, value); + return AllocateHeap(size, mtClass); } - static void free_node(void* memory, WeakHandle const& value) { + static void free_node(void* memory, Value const& value) { value.release(); - ResolvedMethodTableHash::BaseConfig::free_node(memory, value); + FreeHeap(memory); ResolvedMethodTable::item_removed(); } }; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/unsafe.cpp --- a/src/hotspot/share/prims/unsafe.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/unsafe.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -149,6 +149,25 @@ ///// Data read/writes on the Java heap and in native (off-heap) memory /** + * Helper class to wrap memory accesses in JavaThread::doing_unsafe_access() + */ +class GuardUnsafeAccess { + JavaThread* _thread; + +public: + GuardUnsafeAccess(JavaThread* thread) : _thread(thread) { + // native/off-heap access which may raise SIGBUS if accessing + // memory mapped file data in a region of the file which has + // been truncated and is now invalid. + _thread->set_doing_unsafe_access(true); + } + + ~GuardUnsafeAccess() { + _thread->set_doing_unsafe_access(false); + } +}; + +/** * Helper class for accessing memory. * * Normalizes values and wraps accesses in @@ -189,25 +208,6 @@ return x != 0; } - /** - * Helper class to wrap memory accesses in JavaThread::doing_unsafe_access() - */ - class GuardUnsafeAccess { - JavaThread* _thread; - - public: - GuardUnsafeAccess(JavaThread* thread) : _thread(thread) { - // native/off-heap access which may raise SIGBUS if accessing - // memory mapped file data in a region of the file which has - // been truncated and is now invalid - _thread->set_doing_unsafe_access(true); - } - - ~GuardUnsafeAccess() { - _thread->set_doing_unsafe_access(false); - } - }; - public: MemoryAccess(JavaThread* thread, jobject obj, jlong offset) : _thread(thread), _obj(JNIHandles::resolve(obj)), _offset((ptrdiff_t)offset) { @@ -399,8 +399,14 @@ void* src = index_oop_from_field_offset_long(srcp, srcOffset); void* dst = index_oop_from_field_offset_long(dstp, dstOffset); - - Copy::conjoint_memory_atomic(src, dst, sz); + { + GuardUnsafeAccess guard(thread); + if (StubRoutines::unsafe_arraycopy() != NULL) { + StubRoutines::UnsafeArrayCopy_stub()(src, dst, sz); + } else { + Copy::conjoint_memory_atomic(src, dst, sz); + } + } } UNSAFE_END // This function is a leaf since if the source and destination are both in native memory @@ -416,7 +422,11 @@ address src = (address)srcOffset; address dst = (address)dstOffset; - Copy::conjoint_swap(src, dst, sz, esz); + { + JavaThread* thread = JavaThread::thread_from_jni_environment(env); + GuardUnsafeAccess guard(thread); + Copy::conjoint_swap(src, dst, sz, esz); + } } else { // At least one of src/dst are on heap, transition to VM to access raw pointers @@ -427,7 +437,10 @@ address src = (address)index_oop_from_field_offset_long(srcp, srcOffset); address dst = (address)index_oop_from_field_offset_long(dstp, dstOffset); - Copy::conjoint_swap(src, dst, sz, esz); + { + GuardUnsafeAccess guard(thread); + Copy::conjoint_swap(src, dst, sz, esz); + } } JVM_END } } UNSAFE_END diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/prims/whitebox.cpp --- a/src/hotspot/share/prims/whitebox.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/prims/whitebox.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -822,8 +822,10 @@ WB_END WB_ENTRY(void, WB_DeoptimizeAll(JNIEnv* env, jobject o)) + MutexLocker mu(Compile_lock); CodeCache::mark_all_nmethods_for_deoptimization(); - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); WB_END WB_ENTRY(jint, WB_DeoptimizeMethod(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) @@ -840,7 +842,8 @@ } result += CodeCache::mark_for_deoptimization(mh()); if (result > 0) { - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } return result; WB_END diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/arguments.cpp --- a/src/hotspot/share/runtime/arguments.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/arguments.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1815,6 +1815,10 @@ // was not specified. if (reasonable_max > max_coop_heap) { if (FLAG_IS_ERGO(UseCompressedOops) && override_coop_limit) { + log_info(cds)("UseCompressedOops and UseCompressedClassPointers have been disabled due to" + " max heap " SIZE_FORMAT " > compressed oop heap " SIZE_FORMAT ". " + "Please check the setting of MaxRAMPercentage %5.2f." + ,(size_t)reasonable_max, (size_t)max_coop_heap, MaxRAMPercentage); FLAG_SET_ERGO(UseCompressedOops, false); FLAG_SET_ERGO(UseCompressedClassPointers, false); } else { diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/biasedLocking.cpp --- a/src/hotspot/share/runtime/biasedLocking.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/biasedLocking.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -548,7 +548,7 @@ if (biased_locker != NULL) { _biased_locker_id = JFR_THREAD_ID(biased_locker); } - _safepoint_id = SafepointSynchronize::safepoint_counter(); + _safepoint_id = SafepointSynchronize::safepoint_id(); clean_up_cached_monitor_info(); return; } else { @@ -589,7 +589,7 @@ virtual void doit() { _status_code = bulk_revoke_or_rebias_at_safepoint((*_obj)(), _bulk_rebias, _attempt_rebias_of_object, _requesting_thread); - _safepoint_id = SafepointSynchronize::safepoint_counter(); + _safepoint_id = SafepointSynchronize::safepoint_id(); clean_up_cached_monitor_info(); } @@ -628,29 +628,6 @@ event->commit(); } -BiasedLocking::Condition BiasedLocking::revoke_own_locks_in_handshake(Handle obj, TRAPS) { - markOop mark = obj->mark(); - - if (!mark->has_bias_pattern()) { - return NOT_BIASED; - } - - Klass *k = obj->klass(); - markOop prototype_header = k->prototype_header(); - assert(mark->biased_locker() == THREAD && - prototype_header->bias_epoch() == mark->bias_epoch(), "Revoke failed, unhandled biased lock state"); - ResourceMark rm; - log_info(biasedlocking)("Revoking bias by walking my own stack:"); - EventBiasedLockSelfRevocation event; - BiasedLocking::Condition cond = revoke_bias(obj(), false, false, (JavaThread*) THREAD, NULL); - ((JavaThread*) THREAD)->set_cached_monitor_info(NULL); - assert(cond == BIAS_REVOKED, "why not?"); - if (event.should_commit()) { - post_self_revocation_event(&event, k); - } - return cond; -} - BiasedLocking::Condition BiasedLocking::revoke_and_rebias(Handle obj, bool attempt_rebias, TRAPS) { assert(!SafepointSynchronize::is_at_safepoint(), "must not be called while at safepoint"); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/biasedLocking.hpp --- a/src/hotspot/share/runtime/biasedLocking.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/biasedLocking.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -175,7 +175,6 @@ // This should be called by JavaThreads to revoke the bias of an object static Condition revoke_and_rebias(Handle obj, bool attempt_rebias, TRAPS); - static Condition revoke_own_locks_in_handshake(Handle obj, TRAPS); // These do not allow rebiasing; they are used by deoptimization to // ensure that monitors on the stack can be migrated diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/deoptimization.cpp --- a/src/hotspot/share/runtime/deoptimization.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/deoptimization.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -779,35 +779,10 @@ return bt; JRT_END -class DeoptimizeMarkedTC : public ThreadClosure { - bool _in_handshake; - public: - DeoptimizeMarkedTC(bool in_handshake) : _in_handshake(in_handshake) {} - virtual void do_thread(Thread* thread) { - assert(thread->is_Java_thread(), "must be"); - JavaThread* jt = (JavaThread*)thread; - jt->deoptimize_marked_methods(_in_handshake); - } -}; -void Deoptimization::deoptimize_all_marked() { - ResourceMark rm; - DeoptimizationMarker dm; - - if (SafepointSynchronize::is_at_safepoint()) { - DeoptimizeMarkedTC deopt(false); - // Make the dependent methods not entrant - CodeCache::make_marked_nmethods_not_entrant(); - Threads::java_threads_do(&deopt); - } else { - // Make the dependent methods not entrant - { - MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); - CodeCache::make_marked_nmethods_not_entrant(); - } - DeoptimizeMarkedTC deopt(true); - Handshake::execute(&deopt); - } +int Deoptimization::deoptimize_dependents() { + Threads::deoptimized_wrt_marked_nmethods(); + return 0; } Deoptimization::DeoptAction Deoptimization::_unloaded_action @@ -1412,7 +1387,14 @@ } } -static void get_monitors_from_stack(GrowableArray* objects_to_revoke, JavaThread* thread, frame fr, RegisterMap* map) { + +void Deoptimization::revoke_biases_of_monitors(JavaThread* thread, frame fr, RegisterMap* map) { + if (!UseBiasedLocking) { + return; + } + + GrowableArray* objects_to_revoke = new GrowableArray(); + // Unfortunately we don't have a RegisterMap available in most of // the places we want to call this routine so we need to walk the // stack again to update the register map. @@ -1436,14 +1418,6 @@ cvf = compiledVFrame::cast(cvf->sender()); } collect_monitors(cvf, objects_to_revoke); -} - -void Deoptimization::revoke_using_safepoint(JavaThread* thread, frame fr, RegisterMap* map) { - if (!UseBiasedLocking) { - return; - } - GrowableArray* objects_to_revoke = new GrowableArray(); - get_monitors_from_stack(objects_to_revoke, thread, fr, map); if (SafepointSynchronize::is_at_safepoint()) { BiasedLocking::revoke_at_safepoint(objects_to_revoke); @@ -1452,21 +1426,6 @@ } } -void Deoptimization::revoke_using_handshake(JavaThread* thread, frame fr, RegisterMap* map) { - if (!UseBiasedLocking) { - return; - } - GrowableArray* objects_to_revoke = new GrowableArray(); - get_monitors_from_stack(objects_to_revoke, thread, fr, map); - - int len = objects_to_revoke->length(); - for (int i = 0; i < len; i++) { - oop obj = (objects_to_revoke->at(i))(); - BiasedLocking::revoke_own_locks_in_handshake(objects_to_revoke->at(i), thread); - assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now"); - } -} - void Deoptimization::deoptimize_single_frame(JavaThread* thread, frame fr, Deoptimization::DeoptReason reason) { assert(fr.can_be_deoptimized(), "checking frame type"); @@ -1495,16 +1454,11 @@ fr.deoptimize(thread); } -void Deoptimization::deoptimize(JavaThread* thread, frame fr, RegisterMap *map, bool in_handshake) { - deopt_thread(in_handshake, thread, fr, map, Reason_constraint); +void Deoptimization::deoptimize(JavaThread* thread, frame fr, RegisterMap *map) { + deoptimize(thread, fr, map, Reason_constraint); } void Deoptimization::deoptimize(JavaThread* thread, frame fr, RegisterMap *map, DeoptReason reason) { - deopt_thread(false, thread, fr, map, reason); -} - -void Deoptimization::deopt_thread(bool in_handshake, JavaThread* thread, - frame fr, RegisterMap *map, DeoptReason reason) { // Deoptimize only if the frame comes from compile code. // Do not deoptimize the frame which is already patched // during the execution of the loops below. @@ -1514,11 +1468,7 @@ ResourceMark rm; DeoptimizationMarker dm; if (UseBiasedLocking) { - if (in_handshake) { - revoke_using_handshake(thread, fr, map); - } else { - revoke_using_safepoint(thread, fr, map); - } + revoke_biases_of_monitors(thread, fr, map); } deoptimize_single_frame(thread, fr, reason); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/deoptimization.hpp --- a/src/hotspot/share/runtime/deoptimization.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/deoptimization.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -137,19 +137,12 @@ Unpack_LIMIT = 4 }; - static void deoptimize_all_marked(); - - private: // Checks all compiled methods. Invalid methods are deleted and // corresponding activations are deoptimized. static int deoptimize_dependents(); - static void revoke_using_handshake(JavaThread* thread, frame fr, RegisterMap* map); - static void revoke_using_safepoint(JavaThread* thread, frame fr, RegisterMap* map); - static void deopt_thread(bool in_handshake, JavaThread* thread, frame fr, RegisterMap *map, DeoptReason reason); - public: // Deoptimizes a frame lazily. nmethod gets patched deopt happens on return to the frame - static void deoptimize(JavaThread* thread, frame fr, RegisterMap *map, bool in_handshake = false); + static void deoptimize(JavaThread* thread, frame fr, RegisterMap *reg_map); static void deoptimize(JavaThread* thread, frame fr, RegisterMap *reg_map, DeoptReason reason); #if INCLUDE_JVMCI @@ -163,9 +156,7 @@ // Helper function to revoke biases of all monitors in frame if UseBiasedLocking // is enabled - static void revoke_biases_of_monitors(JavaThread* thread, frame fr, RegisterMap* map) { - revoke_using_safepoint(thread, fr, map); - } + static void revoke_biases_of_monitors(JavaThread* thread, frame fr, RegisterMap* map); #if COMPILER2_OR_JVMCI JVMCI_ONLY(public:) diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/mutex.hpp --- a/src/hotspot/share/runtime/mutex.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/mutex.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -62,7 +62,7 @@ event, access = event + 1, tty = access + 2, - special = tty + 2, + special = tty + 1, suspend_resume = special + 1, vmweak = suspend_resume + 2, leaf = vmweak + 2, diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/mutexLocker.cpp --- a/src/hotspot/share/runtime/mutexLocker.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/mutexLocker.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -39,7 +39,6 @@ // Consider using GCC's __read_mostly. Mutex* Patching_lock = NULL; -Mutex* CompiledMethod_lock = NULL; Monitor* SystemDictionary_lock = NULL; Mutex* ProtectionDomainSet_lock = NULL; Mutex* SharedDictionary_lock = NULL; @@ -83,7 +82,6 @@ Monitor* CGC_lock = NULL; Monitor* STS_lock = NULL; Monitor* FullGCCount_lock = NULL; -Monitor* SATB_Q_CBL_mon = NULL; Monitor* DirtyCardQ_CBL_mon = NULL; Mutex* Shared_DirtyCardQ_lock = NULL; Mutex* MarkStackFreeList_lock = NULL; @@ -229,8 +227,6 @@ def(FullGCCount_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); // in support of ExplicitGCInvokesConcurrent if (UseG1GC) { - def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); - def(DirtyCardQ_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); def(Shared_DirtyCardQ_lock , PaddedMutex , access + 1, true, Monitor::_safepoint_check_never); @@ -247,8 +243,6 @@ def(MonitoringSupport_lock , PaddedMutex , native , true, Monitor::_safepoint_check_never); // used for serviceability monitoring support } if (UseShenandoahGC) { - def(SATB_Q_CBL_mon , PaddedMonitor, access, true, Monitor::_safepoint_check_never); - def(StringDedupQueue_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); def(StringDedupTable_lock , PaddedMutex , leaf, true, Monitor::_safepoint_check_never); } @@ -262,8 +256,6 @@ def(ClassLoaderDataGraph_lock , PaddedMutex , nonleaf, true, Monitor::_safepoint_check_always); def(Patching_lock , PaddedMutex , special, true, Monitor::_safepoint_check_never); // used for safepointing and code patching. - def(OsrList_lock , PaddedMutex , special-1, true, Monitor::_safepoint_check_never); - def(CompiledMethod_lock , PaddedMutex , special-1, true, Monitor::_safepoint_check_never); def(Service_lock , PaddedMonitor, special, true, Monitor::_safepoint_check_never); // used for service thread operations def(JmethodIdCreation_lock , PaddedMutex , leaf, true, Monitor::_safepoint_check_always); // used for creating jmethodIDs. @@ -279,6 +271,7 @@ def(SymbolArena_lock , PaddedMutex , leaf+2, true, Monitor::_safepoint_check_never); def(ProfilePrint_lock , PaddedMutex , leaf, false, Monitor::_safepoint_check_always); // serial profile printing def(ExceptionCache_lock , PaddedMutex , leaf, false, Monitor::_safepoint_check_always); // serial profile printing + def(OsrList_lock , PaddedMutex , leaf, true, Monitor::_safepoint_check_never); def(Debug1_lock , PaddedMutex , leaf, true, Monitor::_safepoint_check_never); #ifndef PRODUCT def(FullGCALot_lock , PaddedMutex , leaf, false, Monitor::_safepoint_check_always); // a lock to make FullGCALot MT safe diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/mutexLocker.hpp --- a/src/hotspot/share/runtime/mutexLocker.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/mutexLocker.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -32,7 +32,6 @@ // Mutexes used in the VM. extern Mutex* Patching_lock; // a lock used to guard code patching of compiled code -extern Mutex* CompiledMethod_lock; // a lock used to guard a compiled method extern Monitor* SystemDictionary_lock; // a lock on the system dictionary extern Mutex* ProtectionDomainSet_lock; // a lock on the pd_set list in the system dictionary extern Mutex* SharedDictionary_lock; // a lock on the CDS shared dictionary @@ -78,8 +77,6 @@ // fore- & background GC threads. extern Monitor* STS_lock; // used for joining/leaving SuspendibleThreadSet. extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc -extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q - // completed buffer queue. extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q // completed buffer queue. extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/safepoint.cpp --- a/src/hotspot/share/runtime/safepoint.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/safepoint.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -118,12 +118,22 @@ } } +// SafepointCheck +SafepointStateTracker::SafepointStateTracker(uint64_t safepoint_id, bool at_safepoint) + : _safepoint_id(safepoint_id), _at_safepoint(at_safepoint) {} + +bool SafepointStateTracker::safepoint_state_changed() { + return _safepoint_id != SafepointSynchronize::safepoint_id() || + _at_safepoint != SafepointSynchronize::is_at_safepoint(); +} + // -------------------------------------------------------------------------------------------------- // Implementation of Safepoint begin/end SafepointSynchronize::SynchronizeState volatile SafepointSynchronize::_state = SafepointSynchronize::_not_synchronized; int SafepointSynchronize::_waiting_to_block = 0; volatile uint64_t SafepointSynchronize::_safepoint_counter = 0; +uint64_t SafepointSynchronize::_safepoint_id = 0; const uint64_t SafepointSynchronize::InactiveSafepointCounter = 0; int SafepointSynchronize::_current_jni_active_count = 0; @@ -154,7 +164,7 @@ --_waiting_to_block; } -static bool thread_not_running(ThreadSafepointState *cur_state) { +bool SafepointSynchronize::thread_not_running(ThreadSafepointState *cur_state) { if (!cur_state->is_running()) { return true; } @@ -408,6 +418,9 @@ OrderAccess::fence(); + // Set the new id + ++_safepoint_id; + #ifdef ASSERT // Make sure all the threads were visited. for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) { @@ -419,7 +432,7 @@ GCLocker::set_jni_lock_count(_current_jni_active_count); post_safepoint_synchronize_event(sync_event, - _safepoint_counter, + _safepoint_id, initial_running, _waiting_to_block, iterations); @@ -429,14 +442,14 @@ // needs cleanup to be completed before running the GC op. EventSafepointCleanup cleanup_event; do_cleanup_tasks(); - post_safepoint_cleanup_event(cleanup_event, _safepoint_counter); + post_safepoint_cleanup_event(cleanup_event, _safepoint_id); - post_safepoint_begin_event(begin_event, _safepoint_counter, nof_threads, _current_jni_active_count); + post_safepoint_begin_event(begin_event, _safepoint_id, nof_threads, _current_jni_active_count); SafepointTracing::cleanup(); } void SafepointSynchronize::disarm_safepoint() { - uint64_t safepoint_id = _safepoint_counter; + uint64_t active_safepoint_counter = _safepoint_counter; { JavaThreadIteratorWithHandle jtiwh; #ifdef ASSERT @@ -475,7 +488,7 @@ jtiwh.rewind(); for (; JavaThread *current = jtiwh.next(); ) { // Clear the visited flag to ensure that the critical counts are collected properly. - DEBUG_ONLY(current->reset_visited_for_critical_count(safepoint_id);) + DEBUG_ONLY(current->reset_visited_for_critical_count(active_safepoint_counter);) ThreadSafepointState* cur_state = current->safepoint_state(); assert(!cur_state->is_running(), "Thread not suspended at safepoint"); cur_state->restart(); // TSS _running @@ -497,7 +510,6 @@ void SafepointSynchronize::end() { assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); EventSafepointEnd event; - uint64_t safepoint_id = _safepoint_counter; assert(Thread::current()->is_VM_thread(), "Only VM thread can execute a safepoint"); disarm_safepoint(); @@ -506,7 +518,7 @@ SafepointTracing::end(); - post_safepoint_end_event(event, safepoint_id); + post_safepoint_end_event(event, safepoint_id()); } bool SafepointSynchronize::is_cleanup_needed() { @@ -554,7 +566,7 @@ _counters(counters) {} void work(uint worker_id) { - uint64_t safepoint_id = SafepointSynchronize::safepoint_counter(); + uint64_t safepoint_id = SafepointSynchronize::safepoint_id(); // All threads deflate monitors and mark nmethods (if necessary). Threads::possibly_parallel_threads_do(true, &_cleanup_threads_cl); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/safepoint.hpp --- a/src/hotspot/share/runtime/safepoint.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/safepoint.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -48,6 +48,14 @@ class ThreadSafepointState; +class SafepointStateTracker { + uint64_t _safepoint_id; + bool _at_safepoint; +public: + SafepointStateTracker(uint64_t safepoint_id, bool at_safepoint); + bool safepoint_state_changed(); +}; + // // Implements roll-forward to safepoint (safepoint synchronization) // @@ -77,6 +85,7 @@ friend class SafepointMechanism; friend class ThreadSafepointState; friend class HandshakeState; + friend class SafepointStateTracker; // Threads might read this flag directly, without acquiring the Threads_lock: static volatile SynchronizeState _state; @@ -91,6 +100,11 @@ // safepoint. static volatile uint64_t _safepoint_counter; + // A change in this counter or a change in the result of + // is_at_safepoint() are used by SafepointStateTracker:: + // safepoint_state_changed() to determine its answer. + static uint64_t _safepoint_id; + // JavaThreads that need to block for the safepoint will stop on the // _wait_barrier, where they can quickly be started again. static WaitBarrier* _wait_barrier; @@ -114,6 +128,7 @@ static void disarm_safepoint(); static void increment_jni_active_count(); static void decrement_waiting_to_block(); + static bool thread_not_running(ThreadSafepointState *cur_state); // Used in safepoint_safe to do a stable load of the thread state. static bool try_stable_load_state(JavaThreadState *state, @@ -127,6 +142,8 @@ // If true the VMThread may safely process the handshake operation for the JavaThread. static bool handshake_safe(JavaThread *thread); + static uint64_t safepoint_counter() { return _safepoint_counter; } + public: static void init(Thread* vmthread); @@ -141,8 +158,15 @@ // Query static bool is_at_safepoint() { return _state == _synchronized; } static bool is_synchronizing() { return _state == _synchronizing; } - static uint64_t safepoint_counter() { return _safepoint_counter; } - static bool is_same_safepoint(uint64_t counter) { return (SafepointSynchronize::safepoint_counter() - counter) < 2; } + + static uint64_t safepoint_id() { + return _safepoint_id; + } + + static SafepointStateTracker safepoint_state_tracker() { + return SafepointStateTracker(safepoint_id(), is_at_safepoint()); + } + // Exception handling for page polling static void handle_polling_page_exception(JavaThread *thread); diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/sharedRuntime.cpp --- a/src/hotspot/share/runtime/sharedRuntime.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/sharedRuntime.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2020,7 +2020,7 @@ const char* caster_name = caster_klass->external_name(); assert(target_klass != NULL || target_klass_name != NULL, "one must be provided"); - const char* target_name = target_klass == NULL ? target_klass_name->as_C_string() : + const char* target_name = target_klass == NULL ? target_klass_name->as_klass_external_name() : target_klass->external_name(); size_t msglen = strlen(caster_name) + strlen("class ") + strlen(" cannot be cast to class ") + strlen(target_name) + 1; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/stubRoutines.cpp --- a/src/hotspot/share/runtime/stubRoutines.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/stubRoutines.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/codeBuffer.hpp" +#include "asm/macroAssembler.inline.hpp" #include "memory/resourceArea.hpp" #include "oops/access.inline.hpp" #include "oops/oop.inline.hpp" @@ -38,6 +39,10 @@ #include "opto/runtime.hpp" #endif +UnsafeCopyMemory* UnsafeCopyMemory::_table = NULL; +int UnsafeCopyMemory::_table_length = 0; +int UnsafeCopyMemory::_table_max_length = 0; +address UnsafeCopyMemory::_common_exit_stub_pc = NULL; // Implementation of StubRoutines - for a description // of how to extend it, see the header file. @@ -113,7 +118,6 @@ address StubRoutines::_unsafe_arraycopy = NULL; address StubRoutines::_generic_arraycopy = NULL; - address StubRoutines::_jbyte_fill; address StubRoutines::_jshort_fill; address StubRoutines::_jint_fill; @@ -177,6 +181,31 @@ extern void StubGenerator_generate(CodeBuffer* code, bool all); // only interface to generators +void UnsafeCopyMemory::create_table(int max_size) { + UnsafeCopyMemory::_table = new UnsafeCopyMemory[max_size]; + UnsafeCopyMemory::_table_max_length = max_size; +} + +bool UnsafeCopyMemory::contains_pc(address pc) { + for (int i = 0; i < UnsafeCopyMemory::_table_length; i++) { + UnsafeCopyMemory* entry = &UnsafeCopyMemory::_table[i]; + if (pc >= entry->start_pc() && pc < entry->end_pc()) { + return true; + } + } + return false; +} + +address UnsafeCopyMemory::page_error_continue_pc(address pc) { + for (int i = 0; i < UnsafeCopyMemory::_table_length; i++) { + UnsafeCopyMemory* entry = &UnsafeCopyMemory::_table[i]; + if (pc >= entry->start_pc() && pc < entry->end_pc()) { + return entry->error_exit_pc(); + } + } + return NULL; +} + void StubRoutines::initialize1() { if (_code1 == NULL) { ResourceMark rm; @@ -569,3 +598,25 @@ #undef RETURN_STUB #undef RETURN_STUB_PARM } + +UnsafeCopyMemoryMark::UnsafeCopyMemoryMark(StubCodeGenerator* cgen, bool add_entry, bool continue_at_scope_end, address error_exit_pc) { + _cgen = cgen; + _ucm_entry = NULL; + if (add_entry) { + address err_exit_pc = NULL; + if (!continue_at_scope_end) { + err_exit_pc = error_exit_pc != NULL ? error_exit_pc : UnsafeCopyMemory::common_exit_stub_pc(); + } + assert(err_exit_pc != NULL || continue_at_scope_end, "error exit not set"); + _ucm_entry = UnsafeCopyMemory::add_to_table(_cgen->assembler()->pc(), NULL, err_exit_pc); + } +} + +UnsafeCopyMemoryMark::~UnsafeCopyMemoryMark() { + if (_ucm_entry != NULL) { + _ucm_entry->set_end_pc(_cgen->assembler()->pc()); + if (_ucm_entry->error_exit_pc() == NULL) { + _ucm_entry->set_error_exit_pc(_cgen->assembler()->pc()); + } + } +} diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/stubRoutines.hpp --- a/src/hotspot/share/runtime/stubRoutines.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/stubRoutines.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -74,6 +74,51 @@ // 4. implement the corresponding generator function in the platform-dependent // stubGenerator_.cpp file and call the function in generate_all() of that file +class UnsafeCopyMemory : public CHeapObj { + private: + address _start_pc; + address _end_pc; + address _error_exit_pc; + public: + static address _common_exit_stub_pc; + static UnsafeCopyMemory* _table; + static int _table_length; + static int _table_max_length; + UnsafeCopyMemory() : _start_pc(NULL), _end_pc(NULL), _error_exit_pc(NULL) {} + void set_start_pc(address pc) { _start_pc = pc; } + void set_end_pc(address pc) { _end_pc = pc; } + void set_error_exit_pc(address pc) { _error_exit_pc = pc; } + address start_pc() const { return _start_pc; } + address end_pc() const { return _end_pc; } + address error_exit_pc() const { return _error_exit_pc; } + + static void set_common_exit_stub_pc(address pc) { _common_exit_stub_pc = pc; } + static address common_exit_stub_pc() { return _common_exit_stub_pc; } + + static UnsafeCopyMemory* add_to_table(address start_pc, address end_pc, address error_exit_pc) { + guarantee(_table_length < _table_max_length, "Incorrect UnsafeCopyMemory::_table_max_length"); + UnsafeCopyMemory* entry = &_table[_table_length]; + entry->set_start_pc(start_pc); + entry->set_end_pc(end_pc); + entry->set_error_exit_pc(error_exit_pc); + + _table_length++; + return entry; + } + + static bool contains_pc(address pc); + static address page_error_continue_pc(address pc); + static void create_table(int max_size); +}; + +class UnsafeCopyMemoryMark : public StackObj { + private: + UnsafeCopyMemory* _ucm_entry; + StubCodeGenerator* _cgen; + public: + UnsafeCopyMemoryMark(StubCodeGenerator* cgen, bool add_entry, bool continue_at_scope_end, address error_exit_pc = NULL); + ~UnsafeCopyMemoryMark(); +}; class StubRoutines: AllStatic { @@ -310,11 +355,14 @@ static address arrayof_oop_disjoint_arraycopy(bool dest_uninitialized = false) { return dest_uninitialized ? _arrayof_oop_disjoint_arraycopy_uninit : _arrayof_oop_disjoint_arraycopy; } - static address checkcast_arraycopy(bool dest_uninitialized = false) { return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy; } - static address unsafe_arraycopy() { return _unsafe_arraycopy; } + static address unsafe_arraycopy() { return _unsafe_arraycopy; } + + typedef void (*UnsafeArrayCopyStub)(const void* src, void* dst, size_t count); + static UnsafeArrayCopyStub UnsafeArrayCopy_stub() { return CAST_TO_FN_PTR(UnsafeArrayCopyStub, _unsafe_arraycopy); } + static address generic_arraycopy() { return _generic_arraycopy; } static address jbyte_fill() { return _jbyte_fill; } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/thread.cpp --- a/src/hotspot/share/runtime/thread.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/thread.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -2903,17 +2903,18 @@ #endif // PRODUCT -void JavaThread::deoptimize_marked_methods(bool in_handshake) { +void JavaThread::deoptimized_wrt_marked_nmethods() { if (!has_last_Java_frame()) return; // BiasedLocking needs an updated RegisterMap for the revoke monitors pass StackFrameStream fst(this, UseBiasedLocking); for (; !fst.is_done(); fst.next()) { if (fst.current()->should_be_deoptimized()) { - Deoptimization::deoptimize(this, *fst.current(), fst.register_map(), in_handshake); + Deoptimization::deoptimize(this, *fst.current(), fst.register_map()); } } } + // If the caller is a NamedThread, then remember, in the current scope, // the given JavaThread in its _processed_thread field. class RememberProcessedThread: public StackObj { @@ -4652,6 +4653,13 @@ threads_do(&handles_closure); } +void Threads::deoptimized_wrt_marked_nmethods() { + ALL_JAVA_THREADS(p) { + p->deoptimized_wrt_marked_nmethods(); + } +} + + // Get count Java threads that are waiting to enter the specified monitor. GrowableArray* Threads::get_pending_threads(ThreadsList * t_list, int count, diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/thread.hpp --- a/src/hotspot/share/runtime/thread.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/thread.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -1794,6 +1794,7 @@ static ByteSize should_post_on_exceptions_flag_offset() { return byte_offset_of(JavaThread, _should_post_on_exceptions_flag); } + static ByteSize doing_unsafe_access_offset() { return byte_offset_of(JavaThread, _doing_unsafe_access); } // Returns the jni environment for this thread JNIEnv* jni_environment() { return &_jni_environment; } @@ -1923,7 +1924,7 @@ void deoptimize(); void make_zombies(); - void deoptimize_marked_methods(bool in_handshake); + void deoptimized_wrt_marked_nmethods(); public: // Returns the running thread as a JavaThread diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/vmOperations.cpp --- a/src/hotspot/share/runtime/vmOperations.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/vmOperations.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -118,6 +118,18 @@ } } +void VM_Deoptimize::doit() { + // We do not want any GCs to happen while we are in the middle of this VM operation + ResourceMark rm; + DeoptimizationMarker dm; + + // Deoptimize all activations depending on marked nmethods + Deoptimization::deoptimize_dependents(); + + // Make the dependent methods not entrant + CodeCache::make_marked_nmethods_not_entrant(); +} + void VM_MarkActiveNMethods::doit() { NMethodSweeper::mark_active_nmethods(); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/vmOperations.hpp --- a/src/hotspot/share/runtime/vmOperations.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/vmOperations.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -49,6 +49,7 @@ template(ClearICs) \ template(ForceSafepoint) \ template(ForceAsyncSafepoint) \ + template(Deoptimize) \ template(DeoptimizeFrame) \ template(DeoptimizeAll) \ template(ZombieAll) \ @@ -318,6 +319,14 @@ VM_GTestExecuteAtSafepoint() {} }; +class VM_Deoptimize: public VM_Operation { + public: + VM_Deoptimize() {} + VMOp_Type type() const { return VMOp_Deoptimize; } + void doit(); + bool allow_nested_vm_operations() const { return true; } +}; + class VM_MarkActiveNMethods: public VM_Operation { public: VM_MarkActiveNMethods() {} diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/runtime/vmThread.cpp --- a/src/hotspot/share/runtime/vmThread.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/runtime/vmThread.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -388,7 +388,7 @@ // For concurrent vm operations, the thread id is set to 0 indicating thread is unknown. // This is because the caller thread could have exited already. event->set_caller(is_concurrent ? 0 : JFR_THREAD_ID(op->calling_thread())); - event->set_safepointId(evaluate_at_safepoint ? SafepointSynchronize::safepoint_counter() : 0); + event->set_safepointId(evaluate_at_safepoint ? SafepointSynchronize::safepoint_id() : 0); event->commit(); } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/services/diagnosticCommand.cpp --- a/src/hotspot/share/services/diagnosticCommand.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/services/diagnosticCommand.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -129,7 +129,7 @@ // Debug on cmd (only makes sense with JVMTI since the agentlib needs it). #if INCLUDE_JVMTI - DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl(full_export, true, false)); + DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl(full_export, true, true)); #endif // INCLUDE_JVMTI } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/services/dtraceAttacher.cpp --- a/src/hotspot/share/services/dtraceAttacher.cpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/services/dtraceAttacher.cpp Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,23 @@ #ifdef SOLARIS +class VM_DeoptimizeTheWorld : public VM_Operation { + public: + VMOp_Type type() const { + return VMOp_DeoptimizeTheWorld; + } + void doit() { + CodeCache::mark_all_nmethods_for_deoptimization(); + ResourceMark rm; + DeoptimizationMarker dm; + // Deoptimize all activations depending on marked methods + Deoptimization::deoptimize_dependents(); + + // Mark the dependent methods non entrant + CodeCache::make_marked_nmethods_not_entrant(); + } +}; + static void set_bool_flag(const char* flag, bool value) { JVMFlag::boolAtPut((char*)flag, strlen(flag), &value, JVMFlag::ATTACH_ON_DEMAND); @@ -57,8 +74,8 @@ if (changed) { // one or more flags changed, need to deoptimize - CodeCache::mark_all_nmethods_for_deoptimization(); - Deoptimization::deoptimize_all_marked(); + VM_DeoptimizeTheWorld op; + VMThread::execute(&op); } } @@ -80,8 +97,8 @@ } if (changed) { // one or more flags changed, need to deoptimize - CodeCache::mark_all_nmethods_for_deoptimization(); - Deoptimization::deoptimize_all_marked(); + VM_DeoptimizeTheWorld op; + VMThread::execute(&op); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/utilities/concurrentHashTable.hpp --- a/src/hotspot/share/utilities/concurrentHashTable.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/utilities/concurrentHashTable.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -38,8 +38,9 @@ class Thread; class Mutex; -template +template class ConcurrentHashTable : public CHeapObj { + typedef typename CONFIG::Value VALUE; private: // This is the internal node structure. // Only constructed with placement new from memory allocated with MEMFLAGS of @@ -252,10 +253,10 @@ class ScopedCS: public StackObj { protected: Thread* _thread; - ConcurrentHashTable* _cht; + ConcurrentHashTable* _cht; GlobalCounter::CSContext _cs_context; public: - ScopedCS(Thread* thread, ConcurrentHashTable* cht); + ScopedCS(Thread* thread, ConcurrentHashTable* cht); ~ScopedCS(); }; @@ -473,26 +474,12 @@ const char* table_name); // Moves all nodes from this table to to_cht - bool try_move_nodes_to(Thread* thread, ConcurrentHashTable* to_cht); - - // This is a Curiously Recurring Template Pattern (CRPT) interface for the - // specialization. - struct BaseConfig { - public: - // Called when the hash table needs the hash for a VALUE. - static uintx get_hash(const VALUE& value, bool* dead) { - return CONFIG::get_hash(value, dead); - } - // Default node allocation. - static void* allocate_node(size_t size, const VALUE& value); - // Default node reclamation. - static void free_node(void* memory, const VALUE& value); - }; + bool try_move_nodes_to(Thread* thread, ConcurrentHashTable* to_cht); // Scoped multi getter. class MultiGetHandle : private ScopedCS { public: - MultiGetHandle(Thread* thread, ConcurrentHashTable* cht) + MultiGetHandle(Thread* thread, ConcurrentHashTable* cht) : ScopedCS(thread, cht) {} // In the MultiGetHandle scope you can lookup items matching LOOKUP_FUNC. // The VALUEs are safe as long as you never save the VALUEs outside the diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/utilities/concurrentHashTable.inline.hpp --- a/src/hotspot/share/utilities/concurrentHashTable.inline.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/utilities/concurrentHashTable.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -53,28 +53,28 @@ #endif // Node -template -inline typename ConcurrentHashTable::Node* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::Node* +ConcurrentHashTable:: Node::next() const { return OrderAccess::load_acquire(&_next); } // Bucket -template -inline typename ConcurrentHashTable::Node* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::Node* +ConcurrentHashTable:: Bucket::first_raw() const { return OrderAccess::load_acquire(&_first); } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: Bucket::release_assign_node_ptr( - typename ConcurrentHashTable::Node* const volatile * dst, - typename ConcurrentHashTable::Node* node) const + typename ConcurrentHashTable::Node* const volatile * dst, + typename ConcurrentHashTable::Node* node) const { // Due to this assert this methods is not static. assert(is_locked(), "Must be locked."); @@ -82,31 +82,31 @@ OrderAccess::release_store(tmp, clear_set_state(node, *dst)); } -template -inline typename ConcurrentHashTable::Node* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::Node* +ConcurrentHashTable:: Bucket::first() const { // We strip the states bit before returning the ptr. return clear_state(OrderAccess::load_acquire(&_first)); } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: Bucket::have_redirect() const { return is_state(first_raw(), STATE_REDIRECT_BIT); } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: Bucket::is_locked() const { return is_state(first_raw(), STATE_LOCK_BIT); } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: Bucket::lock() { int i = 0; @@ -123,10 +123,10 @@ } } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: Bucket::release_assign_last_node_next( - typename ConcurrentHashTable::Node* node) + typename ConcurrentHashTable::Node* node) { assert(is_locked(), "Must be locked."); Node* const volatile * ret = first_ptr(); @@ -136,10 +136,10 @@ release_assign_node_ptr(ret, node); } -template -inline bool ConcurrentHashTable:: - Bucket::cas_first(typename ConcurrentHashTable::Node* node, - typename ConcurrentHashTable::Node* expect +template +inline bool ConcurrentHashTable:: + Bucket::cas_first(typename ConcurrentHashTable::Node* node, + typename ConcurrentHashTable::Node* expect ) { if (is_locked()) { @@ -151,8 +151,8 @@ return false; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: Bucket::trylock() { if (is_locked()) { @@ -166,8 +166,8 @@ return false; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: Bucket::unlock() { assert(is_locked(), "Must be locked."); @@ -176,8 +176,8 @@ OrderAccess::release_store(&_first, clear_state(first())); } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: Bucket::redirect() { assert(is_locked(), "Must be locked."); @@ -185,8 +185,8 @@ } // InternalTable -template -inline ConcurrentHashTable:: +template +inline ConcurrentHashTable:: InternalTable::InternalTable(size_t log2_size) : _log2_size(log2_size), _size(((size_t)1ul) << _log2_size), _hash_mask(~(~((size_t)0) << _log2_size)) @@ -201,17 +201,17 @@ } } -template -inline ConcurrentHashTable:: +template +inline ConcurrentHashTable:: InternalTable::~InternalTable() { FREE_C_HEAP_ARRAY(Bucket, _buckets); } // ScopedCS -template -inline ConcurrentHashTable:: - ScopedCS::ScopedCS(Thread* thread, ConcurrentHashTable* cht) +template +inline ConcurrentHashTable:: + ScopedCS::ScopedCS(Thread* thread, ConcurrentHashTable* cht) : _thread(thread), _cht(cht), _cs_context(GlobalCounter::critical_section_begin(_thread)) @@ -222,40 +222,25 @@ } } -template -inline ConcurrentHashTable:: +template +inline ConcurrentHashTable:: ScopedCS::~ScopedCS() { GlobalCounter::critical_section_end(_thread, _cs_context); } -// BaseConfig -template -inline void* ConcurrentHashTable:: - BaseConfig::allocate_node(size_t size, const VALUE& value) -{ - return AllocateHeap(size, F); -} - -template -inline void ConcurrentHashTable:: - BaseConfig::free_node(void* memory, const VALUE& value) -{ - FreeHeap(memory); -} - -template +template template -inline VALUE* ConcurrentHashTable:: +inline typename CONFIG::Value* ConcurrentHashTable:: MultiGetHandle::get(LOOKUP_FUNC& lookup_f, bool* grow_hint) { return ScopedCS::_cht->internal_get(ScopedCS::_thread, lookup_f, grow_hint); } // HaveDeletables -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: HaveDeletables::have_deletable(Bucket* bucket, EVALUATE_FUNC& eval_f, Bucket* prefetch_bucket) @@ -281,9 +266,9 @@ return false; } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: HaveDeletables::have_deletable(Bucket* bucket, EVALUATE_FUNC& eval_f, Bucket* preb) @@ -297,8 +282,8 @@ } // ConcurrentHashTable -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: write_synchonize_on_visible_epoch(Thread* thread) { assert(_resize_lock_owner == thread, "Re-size lock not held"); @@ -314,8 +299,8 @@ GlobalCounter::write_synchronize(); } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: try_resize_lock(Thread* locker) { if (_resize_lock->try_lock()) { @@ -333,8 +318,8 @@ return true; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: lock_resize_lock(Thread* locker) { size_t i = 0; @@ -358,8 +343,8 @@ _invisible_epoch = 0; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: unlock_resize_lock(Thread* locker) { _invisible_epoch = 0; @@ -368,8 +353,8 @@ _resize_lock->unlock(); } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: free_nodes() { // We assume we are not MT during freeing. @@ -384,25 +369,25 @@ } } -template -inline typename ConcurrentHashTable::InternalTable* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::InternalTable* +ConcurrentHashTable:: get_table() const { return OrderAccess::load_acquire(&_table); } -template -inline typename ConcurrentHashTable::InternalTable* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::InternalTable* +ConcurrentHashTable:: get_new_table() const { return OrderAccess::load_acquire(&_new_table); } -template -inline typename ConcurrentHashTable::InternalTable* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::InternalTable* +ConcurrentHashTable:: set_table_from_new() { InternalTable* old_table = _table; @@ -416,8 +401,8 @@ return old_table; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: internal_grow_range(Thread* thread, size_t start, size_t stop) { assert(stop <= _table->_size, "Outside backing array"); @@ -456,9 +441,9 @@ } } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: internal_remove(Thread* thread, LOOKUP_FUNC& lookup_f, DELETE_FUNC& delete_f) { Bucket* bucket = get_bucket_locked(thread, lookup_f.get_hash()); @@ -489,9 +474,9 @@ return true; } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: do_bulk_delete_locked_for(Thread* thread, size_t start_idx, size_t stop_idx, EVALUATE_FUNC& eval_f, DELETE_FUNC& del_f, bool is_mt) { @@ -542,9 +527,9 @@ GlobalCounter::critical_section_end(thread, cs_context); } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: delete_in_bucket(Thread* thread, Bucket* bucket, LOOKUP_FUNC& lookup_f) { assert(bucket->is_locked(), "Must be locked."); @@ -579,9 +564,9 @@ } } -template -inline typename ConcurrentHashTable::Bucket* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::Bucket* +ConcurrentHashTable:: get_bucket(uintx hash) const { InternalTable* table = get_table(); @@ -593,9 +578,9 @@ return bucket; } -template -inline typename ConcurrentHashTable::Bucket* -ConcurrentHashTable:: +template +inline typename ConcurrentHashTable::Bucket* +ConcurrentHashTable:: get_bucket_locked(Thread* thread, const uintx hash) { Bucket* bucket; @@ -624,10 +609,10 @@ } // Always called within critical section -template +template template -typename ConcurrentHashTable::Node* -ConcurrentHashTable:: +typename ConcurrentHashTable::Node* +ConcurrentHashTable:: get_node(const Bucket* const bucket, LOOKUP_FUNC& lookup_f, bool* have_dead, size_t* loops) const { @@ -650,8 +635,8 @@ return node; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: unzip_bucket(Thread* thread, InternalTable* old_table, InternalTable* new_table, size_t even_index, size_t odd_index) { @@ -708,8 +693,8 @@ return true; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: internal_shrink_prolog(Thread* thread, size_t log2_size) { if (!try_resize_lock(thread)) { @@ -725,8 +710,8 @@ return true; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: internal_shrink_epilog(Thread* thread) { assert(_resize_lock_owner == thread, "Re-size lock not held"); @@ -744,8 +729,8 @@ delete old_table; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: internal_shrink_range(Thread* thread, size_t start, size_t stop) { // The state is also copied here. @@ -781,8 +766,8 @@ } } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: internal_shrink(Thread* thread, size_t log2_size) { if (!internal_shrink_prolog(thread, log2_size)) { @@ -796,8 +781,8 @@ return true; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: internal_grow_prolog(Thread* thread, size_t log2_size) { // This double checking of _size_limit_reached/is_max_size_reached() @@ -825,8 +810,8 @@ return true; } -template -inline void ConcurrentHashTable:: +template +inline void ConcurrentHashTable:: internal_grow_epilog(Thread* thread) { assert(_resize_lock_owner == thread, "Should be locked"); @@ -843,8 +828,8 @@ delete old_table; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: internal_grow(Thread* thread, size_t log2_size) { if (!internal_grow_prolog(thread, log2_size)) { @@ -859,9 +844,9 @@ } // Always called within critical section -template +template template -inline VALUE* ConcurrentHashTable:: +inline typename CONFIG::Value* ConcurrentHashTable:: internal_get(Thread* thread, LOOKUP_FUNC& lookup_f, bool* grow_hint) { bool clean = false; @@ -880,9 +865,9 @@ return ret; } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: internal_insert(Thread* thread, LOOKUP_FUNC& lookup_f, const VALUE& value, bool* grow_hint, bool* clean_hint) { @@ -945,9 +930,9 @@ return ret; } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: visit_nodes(Bucket* bucket, FUNC& visitor_f) { Node* current_node = bucket->first(); @@ -960,9 +945,9 @@ return true; } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: do_scan_locked(Thread* thread, FUNC& scan_f) { assert(_resize_lock_owner == thread, "Re-size lock not held"); @@ -977,9 +962,9 @@ } /* ends critical section */ } -template +template template -inline size_t ConcurrentHashTable:: +inline size_t ConcurrentHashTable:: delete_check_nodes(Bucket* bucket, EVALUATE_FUNC& eval_f, size_t num_del, Node** ndel) { @@ -1004,8 +989,8 @@ } // Constructor -template -inline ConcurrentHashTable:: +template +inline ConcurrentHashTable:: ConcurrentHashTable(size_t log2size, size_t log2size_limit, size_t grow_hint) : _new_table(NULL), _log2_size_limit(log2size_limit), _log2_start_size(log2size), _grow_hint(grow_hint), @@ -1021,8 +1006,8 @@ _size_limit_reached = _table->_log2_size == _log2_size_limit; } -template -inline ConcurrentHashTable:: +template +inline ConcurrentHashTable:: ~ConcurrentHashTable() { delete _resize_lock; @@ -1030,16 +1015,16 @@ delete _table; } -template -inline size_t ConcurrentHashTable:: +template +inline size_t ConcurrentHashTable:: get_size_log2(Thread* thread) { ScopedCS cs(thread, this); return _table->_log2_size; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: shrink(Thread* thread, size_t size_limit_log2) { size_t tmp = size_limit_log2 == 0 ? _log2_start_size : size_limit_log2; @@ -1047,17 +1032,17 @@ return ret; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: grow(Thread* thread, size_t size_limit_log2) { size_t tmp = size_limit_log2 == 0 ? _log2_size_limit : size_limit_log2; return internal_grow(thread, tmp); } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: get(Thread* thread, LOOKUP_FUNC& lookup_f, FOUND_FUNC& found_f, bool* grow_hint) { bool ret = false; @@ -1070,8 +1055,8 @@ return ret; } -template -inline bool ConcurrentHashTable:: +template +inline bool ConcurrentHashTable:: unsafe_insert(const VALUE& value) { bool dead_hash = false; size_t hash = CONFIG::get_hash(value, &dead_hash); @@ -1090,9 +1075,9 @@ return true; } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: try_scan(Thread* thread, SCAN_FUNC& scan_f) { if (!try_resize_lock(thread)) { @@ -1103,9 +1088,9 @@ return true; } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: do_scan(Thread* thread, SCAN_FUNC& scan_f) { assert(!SafepointSynchronize::is_at_safepoint(), @@ -1117,9 +1102,9 @@ assert(_resize_lock_owner != thread, "Re-size lock held"); } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: do_safepoint_scan(SCAN_FUNC& scan_f) { // We only allow this method to be used during a safepoint. @@ -1160,9 +1145,9 @@ } } -template +template template -inline bool ConcurrentHashTable:: +inline bool ConcurrentHashTable:: try_bulk_delete(Thread* thread, EVALUATE_FUNC& eval_f, DELETE_FUNC& del_f) { if (!try_resize_lock(thread)) { @@ -1174,9 +1159,9 @@ return true; } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: bulk_delete(Thread* thread, EVALUATE_FUNC& eval_f, DELETE_FUNC& del_f) { assert(!SafepointSynchronize::is_at_safepoint(), @@ -1186,9 +1171,9 @@ unlock_resize_lock(thread); } -template +template template -inline TableStatistics ConcurrentHashTable:: +inline TableStatistics ConcurrentHashTable:: statistics_calculate(Thread* thread, VALUE_SIZE_FUNC& vs_f) { NumberSeq summary; @@ -1213,9 +1198,9 @@ return TableStatistics(_stats_rate, summary, literal_bytes, sizeof(Bucket), sizeof(Node)); } -template +template template -inline TableStatistics ConcurrentHashTable:: +inline TableStatistics ConcurrentHashTable:: statistics_get(Thread* thread, VALUE_SIZE_FUNC& vs_f, TableStatistics old) { if (!try_resize_lock(thread)) { @@ -1228,9 +1213,9 @@ return ts; } -template +template template -inline void ConcurrentHashTable:: +inline void ConcurrentHashTable:: statistics_to(Thread* thread, VALUE_SIZE_FUNC& vs_f, outputStream* st, const char* table_name) { @@ -1245,9 +1230,9 @@ ts.print(st, table_name); } -template -inline bool ConcurrentHashTable:: - try_move_nodes_to(Thread* thread, ConcurrentHashTable* to_cht) +template +inline bool ConcurrentHashTable:: + try_move_nodes_to(Thread* thread, ConcurrentHashTable* to_cht) { if (!try_resize_lock(thread)) { return false; diff -r 4a31db8d42bd -r dd706e28e6cc src/hotspot/share/utilities/concurrentHashTableTasks.inline.hpp --- a/src/hotspot/share/utilities/concurrentHashTableTasks.inline.hpp Thu Jun 27 17:44:18 2019 -0400 +++ b/src/hotspot/share/utilities/concurrentHashTableTasks.inline.hpp Thu Jun 27 19:14:42 2019 -0400 @@ -32,10 +32,10 @@ // operations, which they are serialized with each other. // Base class for pause and/or parallel bulk operations. -template -class ConcurrentHashTable::BucketsOperation { +template +class ConcurrentHashTable::BucketsOperation { protected: - ConcurrentHashTable* _cht; + ConcurrentHashTable* _cht; // Default size of _task_size_log2 static const size_t DEFAULT_TASK_SIZE_LOG2 = 12; @@ -47,7 +47,7 @@ size_t _size_log2; // Table size. bool _is_mt; - BucketsOperation(ConcurrentHashTable* cht, bool is_mt = false) + BucketsOperation(ConcurrentHashTable* cht, bool is_mt = false) : _cht(cht), _next_to_claim(0), _task_size_log2(DEFAULT_TASK_SIZE_LOG2), _stop_task(0), _size_log2(0), _is_mt(is_mt) {} @@ -116,12 +116,12 @@ }; // For doing pausable/parallel bulk delete. -template -class ConcurrentHashTable::BulkDeleteTask : +template +class ConcurrentHashTable::BulkDeleteTask : public BucketsOperation { public: - BulkDeleteTask(ConcurrentHashTable* cht, bool is_mt = false) + BulkDeleteTask(ConcurrentHashTable* cht, bool is_mt = false) : BucketsOperation(cht, is_mt) { } // Before start prepare must be called. @@ -160,12 +160,12 @@ } }; -template -class ConcurrentHashTable::GrowTask : +template +class ConcurrentHashTable::GrowTask : public BucketsOperation { public: - GrowTask(ConcurrentHashTable* cht) : BucketsOperation(cht) { + GrowTask(ConcurrentHashTable* cht) : BucketsOperation(cht) { } // Before start prepare must be called. bool prepare(Thread* thread) { diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/aix/native/libjli/java_md_aix.c --- a/src/java.base/aix/native/libjli/java_md_aix.c Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/aix/native/libjli/java_md_aix.c Thu Jun 27 19:14:42 2019 -0400 @@ -27,7 +27,7 @@ #include "java_md_aix.h" -static unsigned char dladdr_buffer[0x4000]; +static unsigned char dladdr_buffer[0x8000]; static int fill_dll_info(void) { return loadquery(L_GETINFO, dladdr_buffer, sizeof(dladdr_buffer)); diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/com/sun/crypto/provider/PBES1Core.java --- a/src/java.base/share/classes/com/sun/crypto/provider/PBES1Core.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/com/sun/crypto/provider/PBES1Core.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -268,17 +268,20 @@ if (algo.equals("DES")) { // P || S (password concatenated with salt) - byte[] concat = new byte[Math.addExact(passwdBytes.length, salt.length)]; - System.arraycopy(passwdBytes, 0, concat, 0, passwdBytes.length); - System.arraycopy(salt, 0, concat, passwdBytes.length, salt.length); - - // digest P || S with c iterations - byte[] toBeHashed = concat; - for (int i = 0; i < iCount; i++) { + md.update(passwdBytes); + md.update(salt); + // digest P || S with iCount iterations + // first iteration + byte[] toBeHashed = md.digest(); // this resets the digest + // remaining (iCount - 1) iterations + for (int i = 1; i < iCount; ++i) { md.update(toBeHashed); - toBeHashed = md.digest(); // this resets the digest + try { + md.digest(toBeHashed, 0, toBeHashed.length); + } catch (DigestException e) { + throw new ProviderException("Internal error", e); + } } - Arrays.fill(concat, (byte)0x00); result = toBeHashed; } else if (algo.equals("DESede")) { // if the 2 salt halves are the same, invert one of them @@ -305,13 +308,19 @@ result = new byte[DESedeKeySpec.DES_EDE_KEY_LEN + DESConstants.DES_BLOCK_SIZE]; for (i = 0; i < 2; i++) { - toBeHashed = new byte[salt.length/2]; - System.arraycopy(salt, i*(salt.length/2), toBeHashed, 0, - toBeHashed.length); - for (int j=0; j < iCount; j++) { + // first iteration + md.update(salt, i * (salt.length / 2), salt.length / 2); + md.update(passwdBytes); + toBeHashed = md.digest(); + // remaining (iCount - 1) iterations + for (int j = 1; j < iCount; ++j) { md.update(toBeHashed); md.update(passwdBytes); - toBeHashed = md.digest(); + try { + md.digest(toBeHashed, 0, toBeHashed.length); + } catch (DigestException e) { + throw new ProviderException("Internal error", e); + } } System.arraycopy(toBeHashed, 0, result, i*16, toBeHashed.length); diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/lang/constant/MethodTypeDescImpl.java --- a/src/java.base/share/classes/java/lang/constant/MethodTypeDescImpl.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/lang/constant/MethodTypeDescImpl.java Thu Jun 27 19:14:42 2019 -0400 @@ -131,8 +131,14 @@ } @Override - public MethodType resolveConstantDesc(MethodHandles.Lookup lookup) { - return MethodType.fromMethodDescriptorString(descriptorString(), lookup.lookupClass().getClassLoader()); + public MethodType resolveConstantDesc(MethodHandles.Lookup lookup) throws ReflectiveOperationException { + MethodType mtype = MethodType.fromMethodDescriptorString(descriptorString(), lookup.lookupClass().getClassLoader()); + // let's check that the lookup has access to all the types in the method type + lookup.accessClass(mtype.returnType()); + for (Class paramType: mtype.parameterArray()) { + lookup.accessClass(paramType); + } + return mtype; } /** diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/net/DatagramSocketImpl.java --- a/src/java.base/share/classes/java/net/DatagramSocketImpl.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/net/DatagramSocketImpl.java Thu Jun 27 19:14:42 2019 -0400 @@ -268,7 +268,7 @@ * * @implSpec * The default implementation of this method first checks that the given - * socket option {code name} is not null, then throws {@code + * socket option {@code name} is not null, then throws {@code * UnsupportedOperationException}. Subclasses should override this method * with an appropriate implementation. * @@ -296,7 +296,7 @@ * * @implSpec * The default implementation of this method first checks that the given - * socket option {code name} is not null, then throws {@code + * socket option {@code name} is not null, then throws {@code * UnsupportedOperationException}. Subclasses should override this method * with an appropriate implementation. * diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/net/URLStreamHandler.java --- a/src/java.base/share/classes/java/net/URLStreamHandler.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/net/URLStreamHandler.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1995, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -81,7 +81,7 @@ * * @implSpec * The default implementation of this method first checks that the given - * {code URL} and {code Proxy} are not null, then throws {@code + * {@code URL} and {@code Proxy} are not null, then throws {@code * UnsupportedOperationException}. Subclasses should override this method * with an appropriate implementation. * diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/nio/MappedByteBuffer.java --- a/src/java.base/share/classes/java/nio/MappedByteBuffer.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/nio/MappedByteBuffer.java Thu Jun 27 19:14:42 2019 -0400 @@ -235,8 +235,11 @@ * is made. * *

If this buffer was not mapped in read/write mode ({@link - * java.nio.channels.FileChannel.MapMode#READ_WRITE}) then invoking this - * method has no effect.

+ * java.nio.channels.FileChannel.MapMode#READ_WRITE}) then + * invoking this method may have no effect. In particular, the + * method has no effect for buffers mapped in read-only or private + * mapping modes. This method may or may not have an effect for + * implementation-specific mapping modes.

* * @return This buffer */ @@ -271,7 +274,10 @@ * *

If this buffer was not mapped in read/write mode ({@link * java.nio.channels.FileChannel.MapMode#READ_WRITE}) then - * invoking this method has no effect.

+ * invoking this method may have no effect. In particular, the + * method has no effect for buffers mapped in read-only or private + * mapping modes. This method may or may not have an effect for + * implementation-specific mapping modes.

* * @param index * The index of the first byte in the buffer region that is diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/util/DoubleSummaryStatistics.java --- a/src/java.base/share/classes/java/util/DoubleSummaryStatistics.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/util/DoubleSummaryStatistics.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,6 +59,8 @@ * implementation of {@link java.util.stream.Stream#collect Stream.collect()} * provides the necessary partitioning, isolation, and merging of results for * safe and efficient parallel execution. + * + *

This implementation does not check for overflow of the count. * @since 1.8 */ public class DoubleSummaryStatistics implements DoubleConsumer { diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/util/IntSummaryStatistics.java --- a/src/java.base/share/classes/java/util/IntSummaryStatistics.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/util/IntSummaryStatistics.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,7 +59,7 @@ * provides the necessary partitioning, isolation, and merging of results for * safe and efficient parallel execution. * - *

This implementation does not check for overflow of the sum. + *

This implementation does not check for overflow of the count or the sum. * @since 1.8 */ public class IntSummaryStatistics implements IntConsumer { diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/util/LongSummaryStatistics.java --- a/src/java.base/share/classes/java/util/LongSummaryStatistics.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/util/LongSummaryStatistics.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -60,7 +60,7 @@ * provides the necessary partitioning, isolation, and merging of results for * safe and efficient parallel execution. * - *

This implementation does not check for overflow of the sum. + *

This implementation does not check for overflow of the count or the sum. * @since 1.8 */ public class LongSummaryStatistics implements LongConsumer, IntConsumer { diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/java/util/regex/Pattern.java --- a/src/java.base/share/classes/java/util/regex/Pattern.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/java/util/regex/Pattern.java Thu Jun 27 19:14:42 2019 -0400 @@ -357,11 +357,11 @@ * d m s * u x U * on - off - * (?idmsux-idmsux:X{@code )}   + * (?idmsuxU-idmsuxU:X{@code )}   * X, as a non-capturing group with the * given flags i d * m s u - * x on - off + * x U on - off * {@code (?=}X{@code )} * X, via zero-width positive lookahead * {@code (?!}X{@code )} diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/javax/net/ssl/KeyManagerFactory.java --- a/src/java.base/share/classes/javax/net/ssl/KeyManagerFactory.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/javax/net/ssl/KeyManagerFactory.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -127,9 +127,10 @@ * * @param algorithm the standard name of the requested algorithm. * See the - * Java Security Standard Algorithm Names document - * for information about standard algorithm names. + * "{@docRoot}/../specs/security/standard-names.html#keymanagerfactory-algorithms"> + * KeyManagerFactory section in the Java Security Standard + * Algorithm Names Specification for information about standard + * algorithm names. * * @return the new {@code KeyManagerFactory} object * @@ -165,9 +166,10 @@ * @param algorithm the standard name of the requested algorithm. * See the - * Java Security Standard Algorithm Names document - * for information about standard algorithm names. + * "{@docRoot}/../specs/security/standard-names.html#keymanagerfactory-algorithms"> + * KeyManagerFactory section in the Java Security Standard + * Algorithm Names Specification for information about standard + * algorithm names. * * @param provider the name of the provider. * @@ -209,9 +211,10 @@ * * @param algorithm the standard name of the requested algorithm. * See the - * Java Security Standard Algorithm Names document - * for information about standard algorithm names. + * "{@docRoot}/../specs/security/standard-names.html#keymanagerfactory-algorithms"> + * KeyManagerFactory section in the Java Security Standard + * Algorithm Names Specification for information about standard + * algorithm names. * * @param provider an instance of the provider. * diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/javax/net/ssl/TrustManagerFactory.java --- a/src/java.base/share/classes/javax/net/ssl/TrustManagerFactory.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/javax/net/ssl/TrustManagerFactory.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -141,9 +141,10 @@ * * @param algorithm the standard name of the requested trust management * algorithm. See the - * Java Security Standard Algorithm Names document - * for information about standard algorithm names. + * "{@docRoot}/../specs/security/standard-names.html#trustmanagerfactory-algorithms"> + * TrustManagerFactory section in the Java Security Standard + * Algorithm Names Specification for information about standard + * algorithm names. * * @return the new {@code TrustManagerFactory} object * @@ -179,9 +180,10 @@ * * @param algorithm the standard name of the requested trust management * algorithm. See the - * Java Security Standard Algorithm Names document - * for information about standard algorithm names. + * "{@docRoot}/../specs/security/standard-names.html#trustmanagerfactory-algorithms"> + * TrustManagerFactory section in the Java Security Standard + * Algorithm Names Specification for information about standard + * algorithm names. * * @param provider the name of the provider. * @@ -223,9 +225,10 @@ * * @param algorithm the standard name of the requested trust management * algorithm. See the - * Java Security Standard Algorithm Names document - * for information about standard algorithm names. + * "{@docRoot}/../specs/security/standard-names.html#trustmanagerfactory-algorithms"> + * TrustManagerFactory section in the Java Security Standard + * Algorithm Names Specification for information about standard + * algorithm names. * * @param provider an instance of the provider. * diff -r 4a31db8d42bd -r dd706e28e6cc src/java.base/share/classes/sun/security/util/CurveDB.java --- a/src/java.base/share/classes/sun/security/util/CurveDB.java Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.base/share/classes/sun/security/util/CurveDB.java Thu Jun 27 19:14:42 2019 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -68,7 +68,7 @@ return spec; } - return nameMap.get(name); + return nameMap.get(name.toLowerCase(Locale.ENGLISH)); } // Return EC parameters for the specified field size. If there are known @@ -151,7 +151,8 @@ String[] commonNames = nameSplitPattern.split(name); for (String commonName : commonNames) { - if (nameMap.put(commonName.trim(), params) != null) { + if (nameMap.put(commonName.trim().toLowerCase(Locale.ENGLISH), + params) != null) { throw new RuntimeException("Duplication name: " + commonName); } } diff -r 4a31db8d42bd -r dd706e28e6cc src/java.desktop/aix/native/libawt/porting_aix.c --- a/src/java.desktop/aix/native/libawt/porting_aix.c Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.desktop/aix/native/libawt/porting_aix.c Thu Jun 27 19:14:42 2019 -0400 @@ -30,7 +30,7 @@ #include "porting_aix.h" -static unsigned char dladdr_buffer[0x4000]; +static unsigned char dladdr_buffer[0x8000]; static void fill_dll_info(void) { int rc = loadquery(L_GETINFO,dladdr_buffer, sizeof(dladdr_buffer)); diff -r 4a31db8d42bd -r dd706e28e6cc src/java.desktop/share/classes/javax/swing/plaf/nimbus/doc-files/properties.html --- a/src/java.desktop/share/classes/javax/swing/plaf/nimbus/doc-files/properties.html Thu Jun 27 17:44:18 2019 -0400 +++ b/src/java.desktop/share/classes/javax/swing/plaf/nimbus/doc-files/properties.html Thu Jun 27 19:14:42 2019 -0400 @@ -2,10 +2,10 @@ - Nimbus colors + Colors Used in Nimbus Look and Feel