|
1 /* |
|
2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 * |
|
23 */ |
|
24 |
|
25 #ifndef SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP |
|
26 #define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP |
|
27 |
|
28 #include "classfile/javaClasses.hpp" |
|
29 #include "gc/g1/g1ConcurrentMarkBitMap.hpp" |
|
30 #include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp" |
|
31 #include "gc/g1/g1RegionToSpaceMapper.hpp" |
|
32 #include "gc/g1/heapRegionSet.hpp" |
|
33 #include "gc/shared/taskqueue.hpp" |
|
34 |
|
35 class G1CollectedHeap; |
|
36 class G1CMTask; |
|
37 class G1ConcurrentMark; |
|
38 class ConcurrentGCTimer; |
|
39 class G1OldTracer; |
|
40 class G1SurvivorRegions; |
|
41 |
|
42 #ifdef _MSC_VER |
|
43 #pragma warning(push) |
|
44 // warning C4522: multiple assignment operators specified |
|
45 #pragma warning(disable:4522) |
|
46 #endif |
|
47 |
|
48 // This is a container class for either an oop or a continuation address for |
|
49 // mark stack entries. Both are pushed onto the mark stack. |
|
50 class G1TaskQueueEntry VALUE_OBJ_CLASS_SPEC { |
|
51 private: |
|
52 void* _holder; |
|
53 |
|
54 static const uintptr_t ArraySliceBit = 1; |
|
55 |
|
56 G1TaskQueueEntry(oop obj) : _holder(obj) { |
|
57 assert(_holder != NULL, "Not allowed to set NULL task queue element"); |
|
58 } |
|
59 G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { } |
|
60 public: |
|
61 G1TaskQueueEntry(const G1TaskQueueEntry& other) { _holder = other._holder; } |
|
62 G1TaskQueueEntry() : _holder(NULL) { } |
|
63 |
|
64 static G1TaskQueueEntry from_slice(HeapWord* what) { return G1TaskQueueEntry(what); } |
|
65 static G1TaskQueueEntry from_oop(oop obj) { return G1TaskQueueEntry(obj); } |
|
66 |
|
67 G1TaskQueueEntry& operator=(const G1TaskQueueEntry& t) { |
|
68 _holder = t._holder; |
|
69 return *this; |
|
70 } |
|
71 |
|
72 volatile G1TaskQueueEntry& operator=(const volatile G1TaskQueueEntry& t) volatile { |
|
73 _holder = t._holder; |
|
74 return *this; |
|
75 } |
|
76 |
|
77 oop obj() const { |
|
78 assert(!is_array_slice(), "Trying to read array slice " PTR_FORMAT " as oop", p2i(_holder)); |
|
79 return (oop)_holder; |
|
80 } |
|
81 |
|
82 HeapWord* slice() const { |
|
83 assert(is_array_slice(), "Trying to read oop " PTR_FORMAT " as array slice", p2i(_holder)); |
|
84 return (HeapWord*)((uintptr_t)_holder & ~ArraySliceBit); |
|
85 } |
|
86 |
|
87 bool is_oop() const { return !is_array_slice(); } |
|
88 bool is_array_slice() const { return ((uintptr_t)_holder & ArraySliceBit) != 0; } |
|
89 bool is_null() const { return _holder == NULL; } |
|
90 }; |
|
91 |
|
92 #ifdef _MSC_VER |
|
93 #pragma warning(pop) |
|
94 #endif |
|
95 |
|
96 typedef GenericTaskQueue<G1TaskQueueEntry, mtGC> G1CMTaskQueue; |
|
97 typedef GenericTaskQueueSet<G1CMTaskQueue, mtGC> G1CMTaskQueueSet; |
|
98 |
|
99 // Closure used by CM during concurrent reference discovery |
|
100 // and reference processing (during remarking) to determine |
|
101 // if a particular object is alive. It is primarily used |
|
102 // to determine if referents of discovered reference objects |
|
103 // are alive. An instance is also embedded into the |
|
104 // reference processor as the _is_alive_non_header field |
|
105 class G1CMIsAliveClosure: public BoolObjectClosure { |
|
106 G1CollectedHeap* _g1; |
|
107 public: |
|
108 G1CMIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { } |
|
109 |
|
110 bool do_object_b(oop obj); |
|
111 }; |
|
112 |
|
113 // Represents the overflow mark stack used by concurrent marking. |
|
114 // |
|
115 // Stores oops in a huge buffer in virtual memory that is always fully committed. |
|
116 // Resizing may only happen during a STW pause when the stack is empty. |
|
117 // |
|
118 // Memory is allocated on a "chunk" basis, i.e. a set of oops. For this, the mark |
|
119 // stack memory is split into evenly sized chunks of oops. Users can only |
|
120 // add or remove entries on that basis. |
|
121 // Chunks are filled in increasing address order. Not completely filled chunks |
|
122 // have a NULL element as a terminating element. |
|
123 // |
|
124 // Every chunk has a header containing a single pointer element used for memory |
|
125 // management. This wastes some space, but is negligible (< .1% with current sizing). |
|
126 // |
|
127 // Memory management is done using a mix of tracking a high water-mark indicating |
|
128 // that all chunks at a lower address are valid chunks, and a singly linked free |
|
129 // list connecting all empty chunks. |
|
130 class G1CMMarkStack VALUE_OBJ_CLASS_SPEC { |
|
131 public: |
|
132 // Number of TaskQueueEntries that can fit in a single chunk. |
|
133 static const size_t EntriesPerChunk = 1024 - 1 /* One reference for the next pointer */; |
|
134 private: |
|
135 struct TaskQueueEntryChunk { |
|
136 TaskQueueEntryChunk* next; |
|
137 G1TaskQueueEntry data[EntriesPerChunk]; |
|
138 }; |
|
139 |
|
140 size_t _max_chunk_capacity; // Maximum number of TaskQueueEntryChunk elements on the stack. |
|
141 |
|
142 TaskQueueEntryChunk* _base; // Bottom address of allocated memory area. |
|
143 size_t _chunk_capacity; // Current maximum number of TaskQueueEntryChunk elements. |
|
144 |
|
145 char _pad0[DEFAULT_CACHE_LINE_SIZE]; |
|
146 TaskQueueEntryChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. |
|
147 char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*)]; |
|
148 TaskQueueEntryChunk* volatile _chunk_list; // List of chunks currently containing data. |
|
149 volatile size_t _chunks_in_chunk_list; |
|
150 char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*) - sizeof(size_t)]; |
|
151 |
|
152 volatile size_t _hwm; // High water mark within the reserved space. |
|
153 char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; |
|
154 |
|
155 // Allocate a new chunk from the reserved memory, using the high water mark. Returns |
|
156 // NULL if out of memory. |
|
157 TaskQueueEntryChunk* allocate_new_chunk(); |
|
158 |
|
159 // Atomically add the given chunk to the list. |
|
160 void add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem); |
|
161 // Atomically remove and return a chunk from the given list. Returns NULL if the |
|
162 // list is empty. |
|
163 TaskQueueEntryChunk* remove_chunk_from_list(TaskQueueEntryChunk* volatile* list); |
|
164 |
|
165 void add_chunk_to_chunk_list(TaskQueueEntryChunk* elem); |
|
166 void add_chunk_to_free_list(TaskQueueEntryChunk* elem); |
|
167 |
|
168 TaskQueueEntryChunk* remove_chunk_from_chunk_list(); |
|
169 TaskQueueEntryChunk* remove_chunk_from_free_list(); |
|
170 |
|
171 // Resizes the mark stack to the given new capacity. Releases any previous |
|
172 // memory if successful. |
|
173 bool resize(size_t new_capacity); |
|
174 |
|
175 public: |
|
176 G1CMMarkStack(); |
|
177 ~G1CMMarkStack(); |
|
178 |
|
179 // Alignment and minimum capacity of this mark stack in number of oops. |
|
180 static size_t capacity_alignment(); |
|
181 |
|
182 // Allocate and initialize the mark stack with the given number of oops. |
|
183 bool initialize(size_t initial_capacity, size_t max_capacity); |
|
184 |
|
185 // Pushes the given buffer containing at most EntriesPerChunk elements on the mark |
|
186 // stack. If less than EntriesPerChunk elements are to be pushed, the array must |
|
187 // be terminated with a NULL. |
|
188 // Returns whether the buffer contents were successfully pushed to the global mark |
|
189 // stack. |
|
190 bool par_push_chunk(G1TaskQueueEntry* buffer); |
|
191 |
|
192 // Pops a chunk from this mark stack, copying them into the given buffer. This |
|
193 // chunk may contain up to EntriesPerChunk elements. If there are less, the last |
|
194 // element in the array is a NULL pointer. |
|
195 bool par_pop_chunk(G1TaskQueueEntry* buffer); |
|
196 |
|
197 // Return whether the chunk list is empty. Racy due to unsynchronized access to |
|
198 // _chunk_list. |
|
199 bool is_empty() const { return _chunk_list == NULL; } |
|
200 |
|
201 size_t capacity() const { return _chunk_capacity; } |
|
202 |
|
203 // Expand the stack, typically in response to an overflow condition |
|
204 void expand(); |
|
205 |
|
206 // Return the approximate number of oops on this mark stack. Racy due to |
|
207 // unsynchronized access to _chunks_in_chunk_list. |
|
208 size_t size() const { return _chunks_in_chunk_list * EntriesPerChunk; } |
|
209 |
|
210 void set_empty(); |
|
211 |
|
212 // Apply Fn to every oop on the mark stack. The mark stack must not |
|
213 // be modified while iterating. |
|
214 template<typename Fn> void iterate(Fn fn) const PRODUCT_RETURN; |
|
215 }; |
|
216 |
|
217 // Root Regions are regions that are not empty at the beginning of a |
|
218 // marking cycle and which we might collect during an evacuation pause |
|
219 // while the cycle is active. Given that, during evacuation pauses, we |
|
220 // do not copy objects that are explicitly marked, what we have to do |
|
221 // for the root regions is to scan them and mark all objects reachable |
|
222 // from them. According to the SATB assumptions, we only need to visit |
|
223 // each object once during marking. So, as long as we finish this scan |
|
224 // before the next evacuation pause, we can copy the objects from the |
|
225 // root regions without having to mark them or do anything else to them. |
|
226 // |
|
227 // Currently, we only support root region scanning once (at the start |
|
228 // of the marking cycle) and the root regions are all the survivor |
|
229 // regions populated during the initial-mark pause. |
|
230 class G1CMRootRegions VALUE_OBJ_CLASS_SPEC { |
|
231 private: |
|
232 const G1SurvivorRegions* _survivors; |
|
233 G1ConcurrentMark* _cm; |
|
234 |
|
235 volatile bool _scan_in_progress; |
|
236 volatile bool _should_abort; |
|
237 volatile int _claimed_survivor_index; |
|
238 |
|
239 void notify_scan_done(); |
|
240 |
|
241 public: |
|
242 G1CMRootRegions(); |
|
243 // We actually do most of the initialization in this method. |
|
244 void init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm); |
|
245 |
|
246 // Reset the claiming / scanning of the root regions. |
|
247 void prepare_for_scan(); |
|
248 |
|
249 // Forces get_next() to return NULL so that the iteration aborts early. |
|
250 void abort() { _should_abort = true; } |
|
251 |
|
252 // Return true if the CM thread are actively scanning root regions, |
|
253 // false otherwise. |
|
254 bool scan_in_progress() { return _scan_in_progress; } |
|
255 |
|
256 // Claim the next root region to scan atomically, or return NULL if |
|
257 // all have been claimed. |
|
258 HeapRegion* claim_next(); |
|
259 |
|
260 // The number of root regions to scan. |
|
261 uint num_root_regions() const; |
|
262 |
|
263 void cancel_scan(); |
|
264 |
|
265 // Flag that we're done with root region scanning and notify anyone |
|
266 // who's waiting on it. If aborted is false, assume that all regions |
|
267 // have been claimed. |
|
268 void scan_finished(); |
|
269 |
|
270 // If CM threads are still scanning root regions, wait until they |
|
271 // are done. Return true if we had to wait, false otherwise. |
|
272 bool wait_until_scan_finished(); |
|
273 }; |
|
274 |
|
275 class ConcurrentMarkThread; |
|
276 |
|
277 class G1ConcurrentMark: public CHeapObj<mtGC> { |
|
278 friend class ConcurrentMarkThread; |
|
279 friend class G1ParNoteEndTask; |
|
280 friend class G1VerifyLiveDataClosure; |
|
281 friend class G1CMRefProcTaskProxy; |
|
282 friend class G1CMRefProcTaskExecutor; |
|
283 friend class G1CMKeepAliveAndDrainClosure; |
|
284 friend class G1CMDrainMarkingStackClosure; |
|
285 friend class G1CMBitMapClosure; |
|
286 friend class G1CMConcurrentMarkingTask; |
|
287 friend class G1CMRemarkTask; |
|
288 friend class G1CMTask; |
|
289 |
|
290 protected: |
|
291 ConcurrentMarkThread* _cmThread; // The thread doing the work |
|
292 G1CollectedHeap* _g1h; // The heap |
|
293 uint _parallel_marking_threads; // The number of marking |
|
294 // threads we're using |
|
295 uint _max_parallel_marking_threads; // Max number of marking |
|
296 // threads we'll ever use |
|
297 double _sleep_factor; // How much we have to sleep, with |
|
298 // respect to the work we just did, to |
|
299 // meet the marking overhead goal |
|
300 double _marking_task_overhead; // Marking target overhead for |
|
301 // a single task |
|
302 |
|
303 FreeRegionList _cleanup_list; |
|
304 |
|
305 // Concurrent marking support structures |
|
306 G1CMBitMap _markBitMap1; |
|
307 G1CMBitMap _markBitMap2; |
|
308 G1CMBitMap* _prevMarkBitMap; // Completed mark bitmap |
|
309 G1CMBitMap* _nextMarkBitMap; // Under-construction mark bitmap |
|
310 |
|
311 // Heap bounds |
|
312 HeapWord* _heap_start; |
|
313 HeapWord* _heap_end; |
|
314 |
|
315 // Root region tracking and claiming |
|
316 G1CMRootRegions _root_regions; |
|
317 |
|
318 // For gray objects |
|
319 G1CMMarkStack _global_mark_stack; // Grey objects behind global finger |
|
320 HeapWord* volatile _finger; // The global finger, region aligned, |
|
321 // always points to the end of the |
|
322 // last claimed region |
|
323 |
|
324 // Marking tasks |
|
325 uint _max_worker_id;// Maximum worker id |
|
326 uint _active_tasks; // Task num currently active |
|
327 G1CMTask** _tasks; // Task queue array (max_worker_id len) |
|
328 G1CMTaskQueueSet* _task_queues; // Task queue set |
|
329 ParallelTaskTerminator _terminator; // For termination |
|
330 |
|
331 // Two sync barriers that are used to synchronize tasks when an |
|
332 // overflow occurs. The algorithm is the following. All tasks enter |
|
333 // the first one to ensure that they have all stopped manipulating |
|
334 // the global data structures. After they exit it, they re-initialize |
|
335 // their data structures and task 0 re-initializes the global data |
|
336 // structures. Then, they enter the second sync barrier. This |
|
337 // ensure, that no task starts doing work before all data |
|
338 // structures (local and global) have been re-initialized. When they |
|
339 // exit it, they are free to start working again. |
|
340 WorkGangBarrierSync _first_overflow_barrier_sync; |
|
341 WorkGangBarrierSync _second_overflow_barrier_sync; |
|
342 |
|
343 // This is set by any task, when an overflow on the global data |
|
344 // structures is detected |
|
345 volatile bool _has_overflown; |
|
346 // True: marking is concurrent, false: we're in remark |
|
347 volatile bool _concurrent; |
|
348 // Set at the end of a Full GC so that marking aborts |
|
349 volatile bool _has_aborted; |
|
350 |
|
351 // Used when remark aborts due to an overflow to indicate that |
|
352 // another concurrent marking phase should start |
|
353 volatile bool _restart_for_overflow; |
|
354 |
|
355 // This is true from the very start of concurrent marking until the |
|
356 // point when all the tasks complete their work. It is really used |
|
357 // to determine the points between the end of concurrent marking and |
|
358 // time of remark. |
|
359 volatile bool _concurrent_marking_in_progress; |
|
360 |
|
361 ConcurrentGCTimer* _gc_timer_cm; |
|
362 |
|
363 G1OldTracer* _gc_tracer_cm; |
|
364 |
|
365 // All of these times are in ms |
|
366 NumberSeq _init_times; |
|
367 NumberSeq _remark_times; |
|
368 NumberSeq _remark_mark_times; |
|
369 NumberSeq _remark_weak_ref_times; |
|
370 NumberSeq _cleanup_times; |
|
371 double _total_counting_time; |
|
372 double _total_rs_scrub_time; |
|
373 |
|
374 double* _accum_task_vtime; // Accumulated task vtime |
|
375 |
|
376 WorkGang* _parallel_workers; |
|
377 |
|
378 void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes); |
|
379 void weakRefsWork(bool clear_all_soft_refs); |
|
380 |
|
381 void swapMarkBitMaps(); |
|
382 |
|
383 // It resets the global marking data structures, as well as the |
|
384 // task local ones; should be called during initial mark. |
|
385 void reset(); |
|
386 |
|
387 // Resets all the marking data structures. Called when we have to restart |
|
388 // marking or when marking completes (via set_non_marking_state below). |
|
389 void reset_marking_state(); |
|
390 |
|
391 // We do this after we're done with marking so that the marking data |
|
392 // structures are initialized to a sensible and predictable state. |
|
393 void set_non_marking_state(); |
|
394 |
|
395 // Called to indicate how many threads are currently active. |
|
396 void set_concurrency(uint active_tasks); |
|
397 |
|
398 // It should be called to indicate which phase we're in (concurrent |
|
399 // mark or remark) and how many threads are currently active. |
|
400 void set_concurrency_and_phase(uint active_tasks, bool concurrent); |
|
401 |
|
402 // Prints all gathered CM-related statistics |
|
403 void print_stats(); |
|
404 |
|
405 bool cleanup_list_is_empty() { |
|
406 return _cleanup_list.is_empty(); |
|
407 } |
|
408 |
|
409 // Accessor methods |
|
410 uint parallel_marking_threads() const { return _parallel_marking_threads; } |
|
411 uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;} |
|
412 double sleep_factor() { return _sleep_factor; } |
|
413 double marking_task_overhead() { return _marking_task_overhead;} |
|
414 |
|
415 HeapWord* finger() { return _finger; } |
|
416 bool concurrent() { return _concurrent; } |
|
417 uint active_tasks() { return _active_tasks; } |
|
418 ParallelTaskTerminator* terminator() { return &_terminator; } |
|
419 |
|
420 // It claims the next available region to be scanned by a marking |
|
421 // task/thread. It might return NULL if the next region is empty or |
|
422 // we have run out of regions. In the latter case, out_of_regions() |
|
423 // determines whether we've really run out of regions or the task |
|
424 // should call claim_region() again. This might seem a bit |
|
425 // awkward. Originally, the code was written so that claim_region() |
|
426 // either successfully returned with a non-empty region or there |
|
427 // were no more regions to be claimed. The problem with this was |
|
428 // that, in certain circumstances, it iterated over large chunks of |
|
429 // the heap finding only empty regions and, while it was working, it |
|
430 // was preventing the calling task to call its regular clock |
|
431 // method. So, this way, each task will spend very little time in |
|
432 // claim_region() and is allowed to call the regular clock method |
|
433 // frequently. |
|
434 HeapRegion* claim_region(uint worker_id); |
|
435 |
|
436 // It determines whether we've run out of regions to scan. Note that |
|
437 // the finger can point past the heap end in case the heap was expanded |
|
438 // to satisfy an allocation without doing a GC. This is fine, because all |
|
439 // objects in those regions will be considered live anyway because of |
|
440 // SATB guarantees (i.e. their TAMS will be equal to bottom). |
|
441 bool out_of_regions() { return _finger >= _heap_end; } |
|
442 |
|
443 // Returns the task with the given id |
|
444 G1CMTask* task(int id) { |
|
445 assert(0 <= id && id < (int) _active_tasks, |
|
446 "task id not within active bounds"); |
|
447 return _tasks[id]; |
|
448 } |
|
449 |
|
450 // Returns the task queue with the given id |
|
451 G1CMTaskQueue* task_queue(int id) { |
|
452 assert(0 <= id && id < (int) _active_tasks, |
|
453 "task queue id not within active bounds"); |
|
454 return (G1CMTaskQueue*) _task_queues->queue(id); |
|
455 } |
|
456 |
|
457 // Returns the task queue set |
|
458 G1CMTaskQueueSet* task_queues() { return _task_queues; } |
|
459 |
|
460 // Access / manipulation of the overflow flag which is set to |
|
461 // indicate that the global stack has overflown |
|
462 bool has_overflown() { return _has_overflown; } |
|
463 void set_has_overflown() { _has_overflown = true; } |
|
464 void clear_has_overflown() { _has_overflown = false; } |
|
465 bool restart_for_overflow() { return _restart_for_overflow; } |
|
466 |
|
467 // Methods to enter the two overflow sync barriers |
|
468 void enter_first_sync_barrier(uint worker_id); |
|
469 void enter_second_sync_barrier(uint worker_id); |
|
470 |
|
471 // Card index of the bottom of the G1 heap. Used for biasing indices into |
|
472 // the card bitmaps. |
|
473 intptr_t _heap_bottom_card_num; |
|
474 |
|
475 // Set to true when initialization is complete |
|
476 bool _completed_initialization; |
|
477 |
|
478 // end_timer, true to end gc timer after ending concurrent phase. |
|
479 void register_concurrent_phase_end_common(bool end_timer); |
|
480 |
|
481 // Clear the given bitmap in parallel using the given WorkGang. If may_yield is |
|
482 // true, periodically insert checks to see if this method should exit prematurely. |
|
483 void clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield); |
|
484 public: |
|
485 // Manipulation of the global mark stack. |
|
486 // The push and pop operations are used by tasks for transfers |
|
487 // between task-local queues and the global mark stack. |
|
488 bool mark_stack_push(G1TaskQueueEntry* arr) { |
|
489 if (!_global_mark_stack.par_push_chunk(arr)) { |
|
490 set_has_overflown(); |
|
491 return false; |
|
492 } |
|
493 return true; |
|
494 } |
|
495 bool mark_stack_pop(G1TaskQueueEntry* arr) { |
|
496 return _global_mark_stack.par_pop_chunk(arr); |
|
497 } |
|
498 size_t mark_stack_size() { return _global_mark_stack.size(); } |
|
499 size_t partial_mark_stack_size_target() { return _global_mark_stack.capacity()/3; } |
|
500 bool mark_stack_empty() { return _global_mark_stack.is_empty(); } |
|
501 |
|
502 G1CMRootRegions* root_regions() { return &_root_regions; } |
|
503 |
|
504 bool concurrent_marking_in_progress() { |
|
505 return _concurrent_marking_in_progress; |
|
506 } |
|
507 void set_concurrent_marking_in_progress() { |
|
508 _concurrent_marking_in_progress = true; |
|
509 } |
|
510 void clear_concurrent_marking_in_progress() { |
|
511 _concurrent_marking_in_progress = false; |
|
512 } |
|
513 |
|
514 void concurrent_cycle_start(); |
|
515 void concurrent_cycle_end(); |
|
516 |
|
517 void update_accum_task_vtime(int i, double vtime) { |
|
518 _accum_task_vtime[i] += vtime; |
|
519 } |
|
520 |
|
521 double all_task_accum_vtime() { |
|
522 double ret = 0.0; |
|
523 for (uint i = 0; i < _max_worker_id; ++i) |
|
524 ret += _accum_task_vtime[i]; |
|
525 return ret; |
|
526 } |
|
527 |
|
528 // Attempts to steal an object from the task queues of other tasks |
|
529 bool try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry); |
|
530 |
|
531 G1ConcurrentMark(G1CollectedHeap* g1h, |
|
532 G1RegionToSpaceMapper* prev_bitmap_storage, |
|
533 G1RegionToSpaceMapper* next_bitmap_storage); |
|
534 ~G1ConcurrentMark(); |
|
535 |
|
536 ConcurrentMarkThread* cmThread() { return _cmThread; } |
|
537 |
|
538 const G1CMBitMap* const prevMarkBitMap() const { return _prevMarkBitMap; } |
|
539 G1CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; } |
|
540 |
|
541 // Returns the number of GC threads to be used in a concurrent |
|
542 // phase based on the number of GC threads being used in a STW |
|
543 // phase. |
|
544 uint scale_parallel_threads(uint n_par_threads); |
|
545 |
|
546 // Calculates the number of GC threads to be used in a concurrent phase. |
|
547 uint calc_parallel_marking_threads(); |
|
548 |
|
549 // Prepare internal data structures for the next mark cycle. This includes clearing |
|
550 // the next mark bitmap and some internal data structures. This method is intended |
|
551 // to be called concurrently to the mutator. It will yield to safepoint requests. |
|
552 void cleanup_for_next_mark(); |
|
553 |
|
554 // Clear the previous marking bitmap during safepoint. |
|
555 void clear_prev_bitmap(WorkGang* workers); |
|
556 |
|
557 // Return whether the next mark bitmap has no marks set. To be used for assertions |
|
558 // only. Will not yield to pause requests. |
|
559 bool nextMarkBitmapIsClear(); |
|
560 |
|
561 // These two do the work that needs to be done before and after the |
|
562 // initial root checkpoint. Since this checkpoint can be done at two |
|
563 // different points (i.e. an explicit pause or piggy-backed on a |
|
564 // young collection), then it's nice to be able to easily share the |
|
565 // pre/post code. It might be the case that we can put everything in |
|
566 // the post method. TP |
|
567 void checkpointRootsInitialPre(); |
|
568 void checkpointRootsInitialPost(); |
|
569 |
|
570 // Scan all the root regions and mark everything reachable from |
|
571 // them. |
|
572 void scan_root_regions(); |
|
573 |
|
574 // Scan a single root region and mark everything reachable from it. |
|
575 void scanRootRegion(HeapRegion* hr); |
|
576 |
|
577 // Do concurrent phase of marking, to a tentative transitive closure. |
|
578 void mark_from_roots(); |
|
579 |
|
580 void checkpointRootsFinal(bool clear_all_soft_refs); |
|
581 void checkpointRootsFinalWork(); |
|
582 void cleanup(); |
|
583 void complete_cleanup(); |
|
584 |
|
585 // Mark in the previous bitmap. NB: this is usually read-only, so use |
|
586 // this carefully! |
|
587 inline void markPrev(oop p); |
|
588 |
|
589 // Clears marks for all objects in the given range, for the prev or |
|
590 // next bitmaps. NB: the previous bitmap is usually |
|
591 // read-only, so use this carefully! |
|
592 void clearRangePrevBitmap(MemRegion mr); |
|
593 |
|
594 // Verify that there are no CSet oops on the stacks (taskqueues / |
|
595 // global mark stack) and fingers (global / per-task). |
|
596 // If marking is not in progress, it's a no-op. |
|
597 void verify_no_cset_oops() PRODUCT_RETURN; |
|
598 |
|
599 inline bool isPrevMarked(oop p) const; |
|
600 |
|
601 inline bool do_yield_check(); |
|
602 |
|
603 // Abandon current marking iteration due to a Full GC. |
|
604 void abort(); |
|
605 |
|
606 bool has_aborted() { return _has_aborted; } |
|
607 |
|
608 void print_summary_info(); |
|
609 |
|
610 void print_worker_threads_on(outputStream* st) const; |
|
611 void threads_do(ThreadClosure* tc) const; |
|
612 |
|
613 void print_on_error(outputStream* st) const; |
|
614 |
|
615 // Mark the given object on the next bitmap if it is below nTAMS. |
|
616 inline bool mark_in_next_bitmap(HeapRegion* const hr, oop const obj); |
|
617 inline bool mark_in_next_bitmap(oop const obj); |
|
618 |
|
619 // Returns true if initialization was successfully completed. |
|
620 bool completed_initialization() const { |
|
621 return _completed_initialization; |
|
622 } |
|
623 |
|
624 ConcurrentGCTimer* gc_timer_cm() const { return _gc_timer_cm; } |
|
625 G1OldTracer* gc_tracer_cm() const { return _gc_tracer_cm; } |
|
626 |
|
627 private: |
|
628 // Clear (Reset) all liveness count data. |
|
629 void clear_live_data(WorkGang* workers); |
|
630 |
|
631 #ifdef ASSERT |
|
632 // Verify all of the above data structures that they are in initial state. |
|
633 void verify_live_data_clear(); |
|
634 #endif |
|
635 |
|
636 // Aggregates the per-card liveness data based on the current marking. Also sets |
|
637 // the amount of marked bytes for each region. |
|
638 void create_live_data(); |
|
639 |
|
640 void finalize_live_data(); |
|
641 |
|
642 void verify_live_data(); |
|
643 }; |
|
644 |
|
645 // A class representing a marking task. |
|
646 class G1CMTask : public TerminatorTerminator { |
|
647 private: |
|
648 enum PrivateConstants { |
|
649 // The regular clock call is called once the scanned words reaches |
|
650 // this limit |
|
651 words_scanned_period = 12*1024, |
|
652 // The regular clock call is called once the number of visited |
|
653 // references reaches this limit |
|
654 refs_reached_period = 1024, |
|
655 // Initial value for the hash seed, used in the work stealing code |
|
656 init_hash_seed = 17 |
|
657 }; |
|
658 |
|
659 G1CMObjArrayProcessor _objArray_processor; |
|
660 |
|
661 uint _worker_id; |
|
662 G1CollectedHeap* _g1h; |
|
663 G1ConcurrentMark* _cm; |
|
664 G1CMBitMap* _nextMarkBitMap; |
|
665 // the task queue of this task |
|
666 G1CMTaskQueue* _task_queue; |
|
667 private: |
|
668 // the task queue set---needed for stealing |
|
669 G1CMTaskQueueSet* _task_queues; |
|
670 // indicates whether the task has been claimed---this is only for |
|
671 // debugging purposes |
|
672 bool _claimed; |
|
673 |
|
674 // number of calls to this task |
|
675 int _calls; |
|
676 |
|
677 // when the virtual timer reaches this time, the marking step should |
|
678 // exit |
|
679 double _time_target_ms; |
|
680 // the start time of the current marking step |
|
681 double _start_time_ms; |
|
682 |
|
683 // the oop closure used for iterations over oops |
|
684 G1CMOopClosure* _cm_oop_closure; |
|
685 |
|
686 // the region this task is scanning, NULL if we're not scanning any |
|
687 HeapRegion* _curr_region; |
|
688 // the local finger of this task, NULL if we're not scanning a region |
|
689 HeapWord* _finger; |
|
690 // limit of the region this task is scanning, NULL if we're not scanning one |
|
691 HeapWord* _region_limit; |
|
692 |
|
693 // the number of words this task has scanned |
|
694 size_t _words_scanned; |
|
695 // When _words_scanned reaches this limit, the regular clock is |
|
696 // called. Notice that this might be decreased under certain |
|
697 // circumstances (i.e. when we believe that we did an expensive |
|
698 // operation). |
|
699 size_t _words_scanned_limit; |
|
700 // the initial value of _words_scanned_limit (i.e. what it was |
|
701 // before it was decreased). |
|
702 size_t _real_words_scanned_limit; |
|
703 |
|
704 // the number of references this task has visited |
|
705 size_t _refs_reached; |
|
706 // When _refs_reached reaches this limit, the regular clock is |
|
707 // called. Notice this this might be decreased under certain |
|
708 // circumstances (i.e. when we believe that we did an expensive |
|
709 // operation). |
|
710 size_t _refs_reached_limit; |
|
711 // the initial value of _refs_reached_limit (i.e. what it was before |
|
712 // it was decreased). |
|
713 size_t _real_refs_reached_limit; |
|
714 |
|
715 // used by the work stealing stuff |
|
716 int _hash_seed; |
|
717 // if this is true, then the task has aborted for some reason |
|
718 bool _has_aborted; |
|
719 // set when the task aborts because it has met its time quota |
|
720 bool _has_timed_out; |
|
721 // true when we're draining SATB buffers; this avoids the task |
|
722 // aborting due to SATB buffers being available (as we're already |
|
723 // dealing with them) |
|
724 bool _draining_satb_buffers; |
|
725 |
|
726 // number sequence of past step times |
|
727 NumberSeq _step_times_ms; |
|
728 // elapsed time of this task |
|
729 double _elapsed_time_ms; |
|
730 // termination time of this task |
|
731 double _termination_time_ms; |
|
732 // when this task got into the termination protocol |
|
733 double _termination_start_time_ms; |
|
734 |
|
735 // true when the task is during a concurrent phase, false when it is |
|
736 // in the remark phase (so, in the latter case, we do not have to |
|
737 // check all the things that we have to check during the concurrent |
|
738 // phase, i.e. SATB buffer availability...) |
|
739 bool _concurrent; |
|
740 |
|
741 TruncatedSeq _marking_step_diffs_ms; |
|
742 |
|
743 // it updates the local fields after this task has claimed |
|
744 // a new region to scan |
|
745 void setup_for_region(HeapRegion* hr); |
|
746 // it brings up-to-date the limit of the region |
|
747 void update_region_limit(); |
|
748 |
|
749 // called when either the words scanned or the refs visited limit |
|
750 // has been reached |
|
751 void reached_limit(); |
|
752 // recalculates the words scanned and refs visited limits |
|
753 void recalculate_limits(); |
|
754 // decreases the words scanned and refs visited limits when we reach |
|
755 // an expensive operation |
|
756 void decrease_limits(); |
|
757 // it checks whether the words scanned or refs visited reached their |
|
758 // respective limit and calls reached_limit() if they have |
|
759 void check_limits() { |
|
760 if (_words_scanned >= _words_scanned_limit || |
|
761 _refs_reached >= _refs_reached_limit) { |
|
762 reached_limit(); |
|
763 } |
|
764 } |
|
765 // this is supposed to be called regularly during a marking step as |
|
766 // it checks a bunch of conditions that might cause the marking step |
|
767 // to abort |
|
768 void regular_clock_call(); |
|
769 bool concurrent() { return _concurrent; } |
|
770 |
|
771 // Test whether obj might have already been passed over by the |
|
772 // mark bitmap scan, and so needs to be pushed onto the mark stack. |
|
773 bool is_below_finger(oop obj, HeapWord* global_finger) const; |
|
774 |
|
775 template<bool scan> void process_grey_task_entry(G1TaskQueueEntry task_entry); |
|
776 public: |
|
777 // Apply the closure on the given area of the objArray. Return the number of words |
|
778 // scanned. |
|
779 inline size_t scan_objArray(objArrayOop obj, MemRegion mr); |
|
780 // It resets the task; it should be called right at the beginning of |
|
781 // a marking phase. |
|
782 void reset(G1CMBitMap* _nextMarkBitMap); |
|
783 // it clears all the fields that correspond to a claimed region. |
|
784 void clear_region_fields(); |
|
785 |
|
786 void set_concurrent(bool concurrent) { _concurrent = concurrent; } |
|
787 |
|
788 // The main method of this class which performs a marking step |
|
789 // trying not to exceed the given duration. However, it might exit |
|
790 // prematurely, according to some conditions (i.e. SATB buffers are |
|
791 // available for processing). |
|
792 void do_marking_step(double target_ms, |
|
793 bool do_termination, |
|
794 bool is_serial); |
|
795 |
|
796 // These two calls start and stop the timer |
|
797 void record_start_time() { |
|
798 _elapsed_time_ms = os::elapsedTime() * 1000.0; |
|
799 } |
|
800 void record_end_time() { |
|
801 _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms; |
|
802 } |
|
803 |
|
804 // returns the worker ID associated with this task. |
|
805 uint worker_id() { return _worker_id; } |
|
806 |
|
807 // From TerminatorTerminator. It determines whether this task should |
|
808 // exit the termination protocol after it's entered it. |
|
809 virtual bool should_exit_termination(); |
|
810 |
|
811 // Resets the local region fields after a task has finished scanning a |
|
812 // region; or when they have become stale as a result of the region |
|
813 // being evacuated. |
|
814 void giveup_current_region(); |
|
815 |
|
816 HeapWord* finger() { return _finger; } |
|
817 |
|
818 bool has_aborted() { return _has_aborted; } |
|
819 void set_has_aborted() { _has_aborted = true; } |
|
820 void clear_has_aborted() { _has_aborted = false; } |
|
821 bool has_timed_out() { return _has_timed_out; } |
|
822 bool claimed() { return _claimed; } |
|
823 |
|
824 void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure); |
|
825 |
|
826 // Increment the number of references this task has visited. |
|
827 void increment_refs_reached() { ++_refs_reached; } |
|
828 |
|
829 // Grey the object by marking it. If not already marked, push it on |
|
830 // the local queue if below the finger. |
|
831 // obj is below its region's NTAMS. |
|
832 inline void make_reference_grey(oop obj); |
|
833 |
|
834 // Grey the object (by calling make_grey_reference) if required, |
|
835 // e.g. obj is below its containing region's NTAMS. |
|
836 // Precondition: obj is a valid heap object. |
|
837 inline void deal_with_reference(oop obj); |
|
838 |
|
839 // It scans an object and visits its children. |
|
840 inline void scan_task_entry(G1TaskQueueEntry task_entry); |
|
841 |
|
842 // It pushes an object on the local queue. |
|
843 inline void push(G1TaskQueueEntry task_entry); |
|
844 |
|
845 // Move entries to the global stack. |
|
846 void move_entries_to_global_stack(); |
|
847 // Move entries from the global stack, return true if we were successful to do so. |
|
848 bool get_entries_from_global_stack(); |
|
849 |
|
850 // It pops and scans objects from the local queue. If partially is |
|
851 // true, then it stops when the queue size is of a given limit. If |
|
852 // partially is false, then it stops when the queue is empty. |
|
853 void drain_local_queue(bool partially); |
|
854 // It moves entries from the global stack to the local queue and |
|
855 // drains the local queue. If partially is true, then it stops when |
|
856 // both the global stack and the local queue reach a given size. If |
|
857 // partially if false, it tries to empty them totally. |
|
858 void drain_global_stack(bool partially); |
|
859 // It keeps picking SATB buffers and processing them until no SATB |
|
860 // buffers are available. |
|
861 void drain_satb_buffers(); |
|
862 |
|
863 // moves the local finger to a new location |
|
864 inline void move_finger_to(HeapWord* new_finger) { |
|
865 assert(new_finger >= _finger && new_finger < _region_limit, "invariant"); |
|
866 _finger = new_finger; |
|
867 } |
|
868 |
|
869 G1CMTask(uint worker_id, |
|
870 G1ConcurrentMark *cm, |
|
871 G1CMTaskQueue* task_queue, |
|
872 G1CMTaskQueueSet* task_queues); |
|
873 |
|
874 // it prints statistics associated with this task |
|
875 void print_stats(); |
|
876 }; |
|
877 |
|
878 // Class that's used to to print out per-region liveness |
|
879 // information. It's currently used at the end of marking and also |
|
880 // after we sort the old regions at the end of the cleanup operation. |
|
881 class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure { |
|
882 private: |
|
883 // Accumulators for these values. |
|
884 size_t _total_used_bytes; |
|
885 size_t _total_capacity_bytes; |
|
886 size_t _total_prev_live_bytes; |
|
887 size_t _total_next_live_bytes; |
|
888 |
|
889 // Accumulator for the remembered set size |
|
890 size_t _total_remset_bytes; |
|
891 |
|
892 // Accumulator for strong code roots memory size |
|
893 size_t _total_strong_code_roots_bytes; |
|
894 |
|
895 static double perc(size_t val, size_t total) { |
|
896 if (total == 0) { |
|
897 return 0.0; |
|
898 } else { |
|
899 return 100.0 * ((double) val / (double) total); |
|
900 } |
|
901 } |
|
902 |
|
903 static double bytes_to_mb(size_t val) { |
|
904 return (double) val / (double) M; |
|
905 } |
|
906 |
|
907 public: |
|
908 // The header and footer are printed in the constructor and |
|
909 // destructor respectively. |
|
910 G1PrintRegionLivenessInfoClosure(const char* phase_name); |
|
911 virtual bool doHeapRegion(HeapRegion* r); |
|
912 ~G1PrintRegionLivenessInfoClosure(); |
|
913 }; |
|
914 |
|
915 #endif // SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP |