# HG changeset patch # User tschatzl # Date 1474018427 -7200 # Node ID e567be0973152935bc98fe1f90dcf8a02b57dc86 # Parent 3869072fc2e1c23c855f70af9e23fc6796d5648e 8157952: Parallelize Memory Pretouch Summary: Use multiple threads to pretouch memory using -XX:+AlwaysPreTouch to use more memory bandwidth Reviewed-by: jmasa, sangheki diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Fri Sep 16 11:33:47 2016 +0200 @@ -1479,7 +1479,7 @@ "Capacity: " SIZE_FORMAT "B occupancy: " SIZE_FORMAT "B min_desired_capacity: " SIZE_FORMAT "B (" UINTX_FORMAT " %%)", capacity_after_gc, used_after_gc, minimum_desired_capacity, MinHeapFreeRatio); - expand(expand_bytes); + expand(expand_bytes, _workers); // No expansion, now see if we want to shrink } else if (capacity_after_gc > maximum_desired_capacity) { @@ -1599,7 +1599,7 @@ word_size * HeapWordSize); - if (expand(expand_bytes)) { + if (expand(expand_bytes, _workers)) { _hrm.verify_optional(); _verifier->verify_region_sets_optional(); return attempt_allocation_at_safepoint(word_size, @@ -1609,7 +1609,7 @@ return NULL; } -bool G1CollectedHeap::expand(size_t expand_bytes, double* expand_time_ms) { +bool G1CollectedHeap::expand(size_t expand_bytes, WorkGang* pretouch_workers, double* expand_time_ms) { size_t aligned_expand_bytes = ReservedSpace::page_align_size_up(expand_bytes); aligned_expand_bytes = align_size_up(aligned_expand_bytes, HeapRegion::GrainBytes); @@ -1626,7 +1626,7 @@ uint regions_to_expand = (uint)(aligned_expand_bytes / HeapRegion::GrainBytes); assert(regions_to_expand > 0, "Must expand by at least one region"); - uint expanded_by = _hrm.expand_by(regions_to_expand); + uint expanded_by = _hrm.expand_by(regions_to_expand, pretouch_workers); if (expand_time_ms != NULL) { *expand_time_ms = (os::elapsedTime() - expand_heap_start_time_sec) * MILLIUNITS; } @@ -1927,7 +1927,7 @@ _cmThread = _cm->cmThread(); // Now expand into the initial heap size. - if (!expand(init_byte_size)) { + if (!expand(init_byte_size, _workers)) { vm_shutdown_during_initialization("Failed to allocate initial heap."); return JNI_ENOMEM; } @@ -3240,7 +3240,7 @@ // No need for an ergo logging here, // expansion_amount() does this when it returns a value > 0. double expand_ms; - if (!expand(expand_bytes, &expand_ms)) { + if (!expand(expand_bytes, _workers, &expand_ms)) { // We failed to expand the heap. Cannot do anything about it. } g1_policy()->phase_times()->record_expand_heap_time(expand_ms); diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -557,7 +557,7 @@ // Returns true if the heap was expanded by the requested amount; // false otherwise. // (Rounds up to a HeapRegion boundary.) - bool expand(size_t expand_bytes, double* expand_time_ms = NULL); + bool expand(size_t expand_bytes, WorkGang* pretouch_workers = NULL, double* expand_time_ms = NULL); // Returns the PLAB statistics for a given destination. inline G1EvacStats* alloc_buffer_stats(InCSetState dest); diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.cpp --- a/hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.cpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.cpp Fri Sep 16 11:33:47 2016 +0200 @@ -24,8 +24,10 @@ #include "precompiled.hpp" #include "gc/g1/g1PageBasedVirtualSpace.hpp" +#include "gc/shared/workgroup.hpp" #include "oops/markOop.hpp" #include "oops/oop.inline.hpp" +#include "runtime/atomic.hpp" #include "runtime/os.inline.hpp" #include "services/memTracker.hpp" #include "utilities/bitMap.inline.hpp" @@ -177,7 +179,7 @@ guarantee(start_page < end_page, "Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page); - os::pretouch_memory(page_start(start_page), bounded_end_addr(end_page)); + os::pretouch_memory(page_start(start_page), bounded_end_addr(end_page), _page_size); } bool G1PageBasedVirtualSpace::commit(size_t start_page, size_t size_in_pages) { @@ -198,9 +200,6 @@ } _committed.set_range(start_page, end_page); - if (AlwaysPreTouch) { - pretouch_internal(start_page, end_page); - } return zero_filled; } @@ -227,6 +226,53 @@ _committed.clear_range(start_page, end_page); } +class G1PretouchTask : public AbstractGangTask { +private: + char* volatile _cur_addr; + char* const _start_addr; + char* const _end_addr; + size_t const _page_size; +public: + G1PretouchTask(char* start_address, char* end_address, size_t page_size) : + AbstractGangTask("G1 PreTouch", + Universe::is_fully_initialized() ? GCId::current_raw() : + // During VM initialization there is + // no GC cycle that this task can be + // associated with. + GCId::undefined()), + _cur_addr(start_address), + _start_addr(start_address), + _end_addr(end_address), + _page_size(page_size) { + } + + virtual void work(uint worker_id) { + size_t const actual_chunk_size = MAX2(chunk_size(), _page_size); + while (true) { + char* touch_addr = (char*)Atomic::add_ptr((intptr_t)actual_chunk_size, (volatile void*) &_cur_addr) - actual_chunk_size; + if (touch_addr < _start_addr || touch_addr >= _end_addr) { + break; + } + char* end_addr = touch_addr + MIN2(actual_chunk_size, pointer_delta(_end_addr, touch_addr, sizeof(char))); + os::pretouch_memory(touch_addr, end_addr, _page_size); + } + } + + static size_t chunk_size() { return PreTouchParallelChunkSize; } +}; + +void G1PageBasedVirtualSpace::pretouch(size_t start_page, size_t size_in_pages, WorkGang* pretouch_gang) { + guarantee(pretouch_gang != NULL, "No pretouch gang specified."); + + size_t num_chunks = MAX2((size_t)1, size_in_pages * _page_size / MAX2(G1PretouchTask::chunk_size(), _page_size)); + + uint num_workers = MIN2((uint)num_chunks, pretouch_gang->active_workers()); + G1PretouchTask cl(page_start(start_page), bounded_end_addr(start_page + size_in_pages), _page_size); + log_debug(gc, heap)("Running %s with %u workers for " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT "B.", + cl.name(), num_workers, num_chunks, size_in_pages * _page_size); + pretouch_gang->run_task(&cl, num_workers); +} + bool G1PageBasedVirtualSpace::contains(const void* p) const { return _low_boundary <= (const char*) p && (const char*) p < _high_boundary; } diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.hpp --- a/hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1PageBasedVirtualSpace.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -30,6 +30,8 @@ #include "memory/virtualspace.hpp" #include "utilities/bitMap.hpp" +class WorkGang; + // Virtual space management helper for a virtual space with an OS page allocation // granularity. // (De-)Allocation requests are always OS page aligned by passing a page index @@ -117,6 +119,8 @@ // Uncommit the given area of pages starting at start being size_in_pages large. void uncommit(size_t start_page, size_t size_in_pages); + void pretouch(size_t start_page, size_t size_in_pages, WorkGang* pretouch_gang = NULL); + // Initialize the given reserved space with the given base address and the size // actually used. // Prefer to commit in page_size chunks. diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/g1RegionToSpaceMapper.cpp --- a/hotspot/src/share/vm/gc/g1/g1RegionToSpaceMapper.cpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1RegionToSpaceMapper.cpp Fri Sep 16 11:33:47 2016 +0200 @@ -66,8 +66,12 @@ guarantee(alloc_granularity >= page_size, "allocation granularity smaller than commit granularity"); } - virtual void commit_regions(uint start_idx, size_t num_regions) { - bool zero_filled = _storage.commit((size_t)start_idx * _pages_per_region, num_regions * _pages_per_region); + virtual void commit_regions(uint start_idx, size_t num_regions, WorkGang* pretouch_gang) { + size_t const start_page = (size_t)start_idx * _pages_per_region; + bool zero_filled = _storage.commit(start_page, num_regions * _pages_per_region); + if (AlwaysPreTouch) { + _storage.pretouch(start_page, num_regions * _pages_per_region, pretouch_gang); + } _commit_map.set_range(start_idx, start_idx + num_regions); fire_on_commit(start_idx, num_regions, zero_filled); } @@ -110,19 +114,38 @@ _refcounts.initialize((HeapWord*)rs.base(), (HeapWord*)(rs.base() + align_size_up(rs.size(), page_size)), page_size); } - virtual void commit_regions(uint start_idx, size_t num_regions) { + virtual void commit_regions(uint start_idx, size_t num_regions, WorkGang* pretouch_gang) { + size_t const NoPage = ~(size_t)0; + + size_t first_committed = NoPage; + size_t num_committed = 0; + + bool all_zero_filled = true; + for (uint i = start_idx; i < start_idx + num_regions; i++) { assert(!_commit_map.at(i), "Trying to commit storage at region %u that is already committed", i); size_t idx = region_idx_to_page_idx(i); uint old_refcount = _refcounts.get_by_index(idx); + bool zero_filled = false; if (old_refcount == 0) { + if (first_committed == NoPage) { + first_committed = idx; + num_committed = 1; + } else { + num_committed++; + } zero_filled = _storage.commit(idx, 1); } + all_zero_filled &= zero_filled; + _refcounts.set_by_index(idx, old_refcount + 1); _commit_map.set_bit(i); - fire_on_commit(i, 1, zero_filled); } + if (AlwaysPreTouch && num_committed > 0) { + _storage.pretouch(first_committed, num_committed, pretouch_gang); + } + fire_on_commit(start_idx, num_regions, all_zero_filled); } virtual void uncommit_regions(uint start_idx, size_t num_regions) { diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/g1RegionToSpaceMapper.hpp --- a/hotspot/src/share/vm/gc/g1/g1RegionToSpaceMapper.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/g1RegionToSpaceMapper.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -29,6 +29,8 @@ #include "memory/allocation.hpp" #include "utilities/debug.hpp" +class WorkGang; + class G1MappingChangedListener VALUE_OBJ_CLASS_SPEC { public: // Fired after commit of the memory, i.e. the memory this listener is registered @@ -68,7 +70,7 @@ return _commit_map.at(idx); } - virtual void commit_regions(uint start_idx, size_t num_regions = 1) = 0; + virtual void commit_regions(uint start_idx, size_t num_regions = 1, WorkGang* pretouch_workers = NULL) = 0; virtual void uncommit_regions(uint start_idx, size_t num_regions = 1) = 0; // Creates an appropriate G1RegionToSpaceMapper for the given parameters. diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/heapRegionManager.cpp --- a/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp Fri Sep 16 11:33:47 2016 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -72,22 +72,22 @@ return g1h->new_heap_region(hrm_index, mr); } -void HeapRegionManager::commit_regions(uint index, size_t num_regions) { +void HeapRegionManager::commit_regions(uint index, size_t num_regions, WorkGang* pretouch_gang) { guarantee(num_regions > 0, "Must commit more than zero regions"); guarantee(_num_committed + num_regions <= max_length(), "Cannot commit more than the maximum amount of regions"); _num_committed += (uint)num_regions; - _heap_mapper->commit_regions(index, num_regions); + _heap_mapper->commit_regions(index, num_regions, pretouch_gang); // Also commit auxiliary data - _prev_bitmap_mapper->commit_regions(index, num_regions); - _next_bitmap_mapper->commit_regions(index, num_regions); + _prev_bitmap_mapper->commit_regions(index, num_regions, pretouch_gang); + _next_bitmap_mapper->commit_regions(index, num_regions, pretouch_gang); - _bot_mapper->commit_regions(index, num_regions); - _cardtable_mapper->commit_regions(index, num_regions); + _bot_mapper->commit_regions(index, num_regions, pretouch_gang); + _cardtable_mapper->commit_regions(index, num_regions, pretouch_gang); - _card_counts_mapper->commit_regions(index, num_regions); + _card_counts_mapper->commit_regions(index, num_regions, pretouch_gang); } void HeapRegionManager::uncommit_regions(uint start, size_t num_regions) { @@ -117,9 +117,9 @@ _card_counts_mapper->uncommit_regions(start, num_regions); } -void HeapRegionManager::make_regions_available(uint start, uint num_regions) { +void HeapRegionManager::make_regions_available(uint start, uint num_regions, WorkGang* pretouch_gang) { guarantee(num_regions > 0, "No point in calling this for zero regions"); - commit_regions(start, num_regions); + commit_regions(start, num_regions, pretouch_gang); for (uint i = start; i < start + num_regions; i++) { if (_regions.get_by_index(i) == NULL) { HeapRegion* new_hr = new_heap_region(i); @@ -163,11 +163,11 @@ return MemoryUsage(0, used_sz, committed_sz, committed_sz); } -uint HeapRegionManager::expand_by(uint num_regions) { - return expand_at(0, num_regions); +uint HeapRegionManager::expand_by(uint num_regions, WorkGang* pretouch_workers) { + return expand_at(0, num_regions, pretouch_workers); } -uint HeapRegionManager::expand_at(uint start, uint num_regions) { +uint HeapRegionManager::expand_at(uint start, uint num_regions, WorkGang* pretouch_workers) { if (num_regions == 0) { return 0; } @@ -181,7 +181,7 @@ while (expanded < num_regions && (num_last_found = find_unavailable_from_idx(cur, &idx_last_found)) > 0) { uint to_expand = MIN2(num_regions - expanded, num_last_found); - make_regions_available(idx_last_found, to_expand); + make_regions_available(idx_last_found, to_expand, pretouch_workers); expanded += to_expand; cur = idx_last_found + num_last_found + 1; } diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/g1/heapRegionManager.hpp --- a/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ class HeapRegionClosure; class HeapRegionClaimer; class FreeRegionList; +class WorkGang; class G1HeapRegionTable : public G1BiasedMappedArray { protected: @@ -94,10 +95,10 @@ HeapWord* heap_bottom() const { return _regions.bottom_address_mapped(); } HeapWord* heap_end() const {return _regions.end_address_mapped(); } - void make_regions_available(uint index, uint num_regions = 1); + void make_regions_available(uint index, uint num_regions = 1, WorkGang* pretouch_gang = NULL); // Pass down commit calls to the VirtualSpace. - void commit_regions(uint index, size_t num_regions = 1); + void commit_regions(uint index, size_t num_regions = 1, WorkGang* pretouch_gang = NULL); void uncommit_regions(uint index, size_t num_regions = 1); // Notify other data structures about change in the heap layout. @@ -209,12 +210,12 @@ // HeapRegions, or re-use existing ones. Returns the number of regions the // sequence was expanded by. If a HeapRegion allocation fails, the resulting // number of regions might be smaller than what's desired. - uint expand_by(uint num_regions); + uint expand_by(uint num_regions, WorkGang* pretouch_workers = NULL); // Makes sure that the regions from start to start+num_regions-1 are available // for allocation. Returns the number of regions that were committed to achieve // this. - uint expand_at(uint start, uint num_regions); + uint expand_at(uint start, uint num_regions, WorkGang* pretouch_workers = NULL); // Find a contiguous set of empty regions of length num. Returns the start index of // that set, or G1_NO_HRM_INDEX. diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/gc/shared/workgroup.hpp --- a/hotspot/src/share/vm/gc/shared/workgroup.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -62,7 +62,12 @@ AbstractGangTask(const char* name) : _name(name), _gc_id(GCId::current_raw()) - {} + {} + + AbstractGangTask(const char* name, const uint gc_id) : + _name(name), + _gc_id(gc_id) + {} // The abstract work method. // The argument tells you which member of the gang you are. diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/runtime/globals.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -1596,6 +1596,10 @@ product(bool, AlwaysPreTouch, false, \ "Force all freshly committed pages to be pre-touched") \ \ + product(size_t, PreTouchParallelChunkSize, 1 * G, \ + "Per-thread chunk size for parallel memory pre-touch.") \ + range(1, SIZE_MAX / 2) \ + \ product_pd(size_t, CMSYoungGenPerWorker, \ "The maximum size of young gen chosen by default per GC worker " \ "thread available") \ diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/runtime/os.cpp --- a/hotspot/src/share/vm/runtime/os.cpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/runtime/os.cpp Fri Sep 16 11:33:47 2016 +0200 @@ -1705,8 +1705,8 @@ return res; } -void os::pretouch_memory(void* start, void* end) { - for (volatile char *p = (char*)start; p < (char*)end; p += os::vm_page_size()) { +void os::pretouch_memory(void* start, void* end, size_t page_size) { + for (volatile char *p = (char*)start; p < (char*)end; p += page_size) { *p = 0; } } diff -r 3869072fc2e1 -r e567be097315 hotspot/src/share/vm/runtime/os.hpp --- a/hotspot/src/share/vm/runtime/os.hpp Thu Sep 15 12:10:43 2016 -0400 +++ b/hotspot/src/share/vm/runtime/os.hpp Fri Sep 16 11:33:47 2016 +0200 @@ -324,7 +324,7 @@ // to make the OS back the memory range with actual memory. // Current implementation may not touch the last page if unaligned addresses // are passed. - static void pretouch_memory(void* start, void* end); + static void pretouch_memory(void* start, void* end, size_t page_size = vm_page_size()); enum ProtType { MEM_PROT_NONE, MEM_PROT_READ, MEM_PROT_RW, MEM_PROT_RWX }; static bool protect_memory(char* addr, size_t bytes, ProtType prot,