49 &_to_space_root_closure, gen_, &_old_gen_root_closure, |
49 &_to_space_root_closure, gen_, &_old_gen_root_closure, |
50 work_queue_set_, &term_), |
50 work_queue_set_, &term_), |
51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), |
51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), |
52 _keep_alive_closure(&_scan_weak_ref_closure), |
52 _keep_alive_closure(&_scan_weak_ref_closure), |
53 _promotion_failure_size(0), |
53 _promotion_failure_size(0), |
54 _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0), |
|
55 _strong_roots_time(0.0), _term_time(0.0) |
54 _strong_roots_time(0.0), _term_time(0.0) |
56 { |
55 { |
|
56 #if TASKQUEUE_STATS |
|
57 _term_attempts = 0; |
|
58 _overflow_refills = 0; |
|
59 _overflow_refill_objs = 0; |
|
60 #endif // TASKQUEUE_STATS |
|
61 |
57 _survivor_chunk_array = |
62 _survivor_chunk_array = |
58 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); |
63 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); |
59 _hash_seed = 17; // Might want to take time-based random value. |
64 _hash_seed = 17; // Might want to take time-based random value. |
60 _start = os::elapsedTime(); |
65 _start = os::elapsedTime(); |
61 _old_gen_closure.set_generation(old_gen_); |
66 _old_gen_closure.set_generation(old_gen_); |
98 end = start + ParGCArrayScanChunk; |
103 end = start + ParGCArrayScanChunk; |
99 arrayOop(old)->set_length(end); |
104 arrayOop(old)->set_length(end); |
100 // Push remainder. |
105 // Push remainder. |
101 bool ok = work_queue()->push(old); |
106 bool ok = work_queue()->push(old); |
102 assert(ok, "just popped, push must be okay"); |
107 assert(ok, "just popped, push must be okay"); |
103 note_push(); |
|
104 } else { |
108 } else { |
105 // Restore length so that it can be used if there |
109 // Restore length so that it can be used if there |
106 // is a promotion failure and forwarding pointers |
110 // is a promotion failure and forwarding pointers |
107 // must be removed. |
111 // must be removed. |
108 arrayOop(old)->set_length(end); |
112 arrayOop(old)->set_length(end); |
124 ObjToScanQueue* queue = work_queue(); |
128 ObjToScanQueue* queue = work_queue(); |
125 do { |
129 do { |
126 while (queue->size() > (juint)max_size) { |
130 while (queue->size() > (juint)max_size) { |
127 oop obj_to_scan; |
131 oop obj_to_scan; |
128 if (queue->pop_local(obj_to_scan)) { |
132 if (queue->pop_local(obj_to_scan)) { |
129 note_pop(); |
|
130 if ((HeapWord *)obj_to_scan < young_old_boundary()) { |
133 if ((HeapWord *)obj_to_scan < young_old_boundary()) { |
131 if (obj_to_scan->is_objArray() && |
134 if (obj_to_scan->is_objArray() && |
132 obj_to_scan->is_forwarded() && |
135 obj_to_scan->is_forwarded() && |
133 obj_to_scan->forwardee() != obj_to_scan) { |
136 obj_to_scan->forwardee() != obj_to_scan) { |
134 scan_partial_array_and_push_remainder(obj_to_scan); |
137 scan_partial_array_and_push_remainder(obj_to_scan); |
269 Generation& old_gen, |
272 Generation& old_gen, |
270 ObjToScanQueueSet& queue_set, |
273 ObjToScanQueueSet& queue_set, |
271 GrowableArray<oop>** overflow_stacks_, |
274 GrowableArray<oop>** overflow_stacks_, |
272 size_t desired_plab_sz, |
275 size_t desired_plab_sz, |
273 ParallelTaskTerminator& term); |
276 ParallelTaskTerminator& term); |
|
277 |
|
278 ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); } |
|
279 |
274 inline ParScanThreadState& thread_state(int i); |
280 inline ParScanThreadState& thread_state(int i); |
275 int pushes() { return _pushes; } |
281 |
276 int pops() { return _pops; } |
|
277 int steals() { return _steals; } |
|
278 void reset(bool promotion_failed); |
282 void reset(bool promotion_failed); |
279 void flush(); |
283 void flush(); |
|
284 |
|
285 #if TASKQUEUE_STATS |
|
286 static void |
|
287 print_termination_stats_hdr(outputStream* const st = gclog_or_tty); |
|
288 void print_termination_stats(outputStream* const st = gclog_or_tty); |
|
289 static void |
|
290 print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); |
|
291 void print_taskqueue_stats(outputStream* const st = gclog_or_tty); |
|
292 void reset_stats(); |
|
293 #endif // TASKQUEUE_STATS |
|
294 |
280 private: |
295 private: |
281 ParallelTaskTerminator& _term; |
296 ParallelTaskTerminator& _term; |
282 ParNewGeneration& _gen; |
297 ParNewGeneration& _gen; |
283 Generation& _next_gen; |
298 Generation& _next_gen; |
284 // staticstics |
|
285 int _pushes; |
|
286 int _pops; |
|
287 int _steals; |
|
288 }; |
299 }; |
289 |
300 |
290 |
301 |
291 ParScanThreadStateSet::ParScanThreadStateSet( |
302 ParScanThreadStateSet::ParScanThreadStateSet( |
292 int num_threads, Space& to_space, ParNewGeneration& gen, |
303 int num_threads, Space& to_space, ParNewGeneration& gen, |
293 Generation& old_gen, ObjToScanQueueSet& queue_set, |
304 Generation& old_gen, ObjToScanQueueSet& queue_set, |
294 GrowableArray<oop>** overflow_stack_set_, |
305 GrowableArray<oop>** overflow_stack_set_, |
295 size_t desired_plab_sz, ParallelTaskTerminator& term) |
306 size_t desired_plab_sz, ParallelTaskTerminator& term) |
296 : ResourceArray(sizeof(ParScanThreadState), num_threads), |
307 : ResourceArray(sizeof(ParScanThreadState), num_threads), |
297 _gen(gen), _next_gen(old_gen), _term(term), |
308 _gen(gen), _next_gen(old_gen), _term(term) |
298 _pushes(0), _pops(0), _steals(0) |
|
299 { |
309 { |
300 assert(num_threads > 0, "sanity check!"); |
310 assert(num_threads > 0, "sanity check!"); |
301 // Initialize states. |
311 // Initialize states. |
302 for (int i = 0; i < num_threads; ++i) { |
312 for (int i = 0; i < num_threads; ++i) { |
303 new ((ParScanThreadState*)_data + i) |
313 new ((ParScanThreadState*)_data + i) |
321 thread_state(i).print_and_clear_promotion_failure_size(); |
331 thread_state(i).print_and_clear_promotion_failure_size(); |
322 } |
332 } |
323 } |
333 } |
324 } |
334 } |
325 |
335 |
|
336 #if TASKQUEUE_STATS |
|
337 void |
|
338 ParScanThreadState::reset_stats() |
|
339 { |
|
340 taskqueue_stats().reset(); |
|
341 _term_attempts = 0; |
|
342 _overflow_refills = 0; |
|
343 _overflow_refill_objs = 0; |
|
344 } |
|
345 |
|
346 void ParScanThreadStateSet::reset_stats() |
|
347 { |
|
348 for (int i = 0; i < length(); ++i) { |
|
349 thread_state(i).reset_stats(); |
|
350 } |
|
351 } |
|
352 |
|
353 void |
|
354 ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st) |
|
355 { |
|
356 st->print_raw_cr("GC Termination Stats"); |
|
357 st->print_raw_cr(" elapsed --strong roots-- " |
|
358 "-------termination-------"); |
|
359 st->print_raw_cr("thr ms ms % " |
|
360 " ms % attempts"); |
|
361 st->print_raw_cr("--- --------- --------- ------ " |
|
362 "--------- ------ --------"); |
|
363 } |
|
364 |
|
365 void ParScanThreadStateSet::print_termination_stats(outputStream* const st) |
|
366 { |
|
367 print_termination_stats_hdr(st); |
|
368 |
|
369 for (int i = 0; i < length(); ++i) { |
|
370 const ParScanThreadState & pss = thread_state(i); |
|
371 const double elapsed_ms = pss.elapsed_time() * 1000.0; |
|
372 const double s_roots_ms = pss.strong_roots_time() * 1000.0; |
|
373 const double term_ms = pss.term_time() * 1000.0; |
|
374 st->print_cr("%3d %9.2f %9.2f %6.2f " |
|
375 "%9.2f %6.2f " SIZE_FORMAT_W(8), |
|
376 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, |
|
377 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts()); |
|
378 } |
|
379 } |
|
380 |
|
381 // Print stats related to work queue activity. |
|
382 void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st) |
|
383 { |
|
384 st->print_raw_cr("GC Task Stats"); |
|
385 st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); |
|
386 st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); |
|
387 } |
|
388 |
|
389 void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st) |
|
390 { |
|
391 print_taskqueue_stats_hdr(st); |
|
392 |
|
393 TaskQueueStats totals; |
|
394 for (int i = 0; i < length(); ++i) { |
|
395 const ParScanThreadState & pss = thread_state(i); |
|
396 const TaskQueueStats & stats = pss.taskqueue_stats(); |
|
397 st->print("%3d ", i); stats.print(st); st->cr(); |
|
398 totals += stats; |
|
399 |
|
400 if (pss.overflow_refills() > 0) { |
|
401 st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills " |
|
402 SIZE_FORMAT_W(10) " overflow objects", |
|
403 pss.overflow_refills(), pss.overflow_refill_objs()); |
|
404 } |
|
405 } |
|
406 st->print("tot "); totals.print(st); st->cr(); |
|
407 |
|
408 DEBUG_ONLY(totals.verify()); |
|
409 } |
|
410 #endif // TASKQUEUE_STATS |
|
411 |
326 void ParScanThreadStateSet::flush() |
412 void ParScanThreadStateSet::flush() |
327 { |
413 { |
328 // Work in this loop should be kept as lightweight as |
414 // Work in this loop should be kept as lightweight as |
329 // possible since this might otherwise become a bottleneck |
415 // possible since this might otherwise become a bottleneck |
330 // to scaling. Should we add heavy-weight work into this |
416 // to scaling. Should we add heavy-weight work into this |
344 _gen.age_table()->merge(local_table); |
430 _gen.age_table()->merge(local_table); |
345 |
431 |
346 // Inform old gen that we're done. |
432 // Inform old gen that we're done. |
347 _next_gen.par_promote_alloc_done(i); |
433 _next_gen.par_promote_alloc_done(i); |
348 _next_gen.par_oop_since_save_marks_iterate_done(i); |
434 _next_gen.par_oop_since_save_marks_iterate_done(i); |
349 |
435 } |
350 // Flush stats related to work queue activity (push/pop/steal) |
436 |
351 // This could conceivably become a bottleneck; if so, we'll put the |
|
352 // stat's gathering under the flag. |
|
353 if (PAR_STATS_ENABLED) { |
|
354 _pushes += par_scan_state.pushes(); |
|
355 _pops += par_scan_state.pops(); |
|
356 _steals += par_scan_state.steals(); |
|
357 if (ParallelGCVerbose) { |
|
358 gclog_or_tty->print("Thread %d complete:\n" |
|
359 " Pushes: %7d Pops: %7d Steals %7d (in %d attempts)\n", |
|
360 i, par_scan_state.pushes(), par_scan_state.pops(), |
|
361 par_scan_state.steals(), par_scan_state.steal_attempts()); |
|
362 if (par_scan_state.overflow_pushes() > 0 || |
|
363 par_scan_state.overflow_refills() > 0) { |
|
364 gclog_or_tty->print(" Overflow pushes: %7d " |
|
365 "Overflow refills: %7d for %d objs.\n", |
|
366 par_scan_state.overflow_pushes(), |
|
367 par_scan_state.overflow_refills(), |
|
368 par_scan_state.overflow_refill_objs()); |
|
369 } |
|
370 |
|
371 double elapsed = par_scan_state.elapsed(); |
|
372 double strong_roots = par_scan_state.strong_roots_time(); |
|
373 double term = par_scan_state.term_time(); |
|
374 gclog_or_tty->print( |
|
375 " Elapsed: %7.2f ms.\n" |
|
376 " Strong roots: %7.2f ms (%6.2f%%)\n" |
|
377 " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n", |
|
378 elapsed * 1000.0, |
|
379 strong_roots * 1000.0, (strong_roots*100.0/elapsed), |
|
380 term * 1000.0, (term*100.0/elapsed), |
|
381 par_scan_state.term_attempts()); |
|
382 } |
|
383 } |
|
384 } |
|
385 if (UseConcMarkSweepGC && ParallelGCThreads > 0) { |
437 if (UseConcMarkSweepGC && ParallelGCThreads > 0) { |
386 // We need to call this even when ResizeOldPLAB is disabled |
438 // We need to call this even when ResizeOldPLAB is disabled |
387 // so as to avoid breaking some asserts. While we may be able |
439 // so as to avoid breaking some asserts. While we may be able |
388 // to avoid this by reorganizing the code a bit, I am loathe |
440 // to avoid this by reorganizing the code a bit, I am loathe |
389 // to do that unless we find cases where ergo leads to bad |
441 // to do that unless we find cases where ergo leads to bad |
454 par_scan_state()->trim_queues(0); |
506 par_scan_state()->trim_queues(0); |
455 |
507 |
456 // We have no local work, attempt to steal from other threads. |
508 // We have no local work, attempt to steal from other threads. |
457 |
509 |
458 // attempt to steal work from promoted. |
510 // attempt to steal work from promoted. |
459 par_scan_state()->note_steal_attempt(); |
|
460 if (task_queues()->steal(par_scan_state()->thread_num(), |
511 if (task_queues()->steal(par_scan_state()->thread_num(), |
461 par_scan_state()->hash_seed(), |
512 par_scan_state()->hash_seed(), |
462 obj_to_scan)) { |
513 obj_to_scan)) { |
463 par_scan_state()->note_steal(); |
|
464 bool res = work_q->push(obj_to_scan); |
514 bool res = work_q->push(obj_to_scan); |
465 assert(res, "Empty queue should have room for a push."); |
515 assert(res, "Empty queue should have room for a push."); |
466 |
516 |
467 par_scan_state()->note_push(); |
|
468 // if successful, goto Start. |
517 // if successful, goto Start. |
469 continue; |
518 continue; |
470 |
519 |
471 // try global overflow list. |
520 // try global overflow list. |
472 } else if (par_gen()->take_from_overflow_list(par_scan_state())) { |
521 } else if (par_gen()->take_from_overflow_list(par_scan_state())) { |
839 } else { |
888 } else { |
840 GenCollectedHeap::StrongRootsScope srs(gch); |
889 GenCollectedHeap::StrongRootsScope srs(gch); |
841 tsk.work(0); |
890 tsk.work(0); |
842 } |
891 } |
843 thread_state_set.reset(promotion_failed()); |
892 thread_state_set.reset(promotion_failed()); |
844 |
|
845 if (PAR_STATS_ENABLED && ParallelGCVerbose) { |
|
846 gclog_or_tty->print("Thread totals:\n" |
|
847 " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n", |
|
848 thread_state_set.pushes(), thread_state_set.pops(), |
|
849 thread_state_set.steals(), |
|
850 thread_state_set.pops()+thread_state_set.steals()); |
|
851 } |
|
852 assert(thread_state_set.pushes() == thread_state_set.pops() |
|
853 + thread_state_set.steals(), |
|
854 "Or else the queues are leaky."); |
|
855 |
893 |
856 // Process (weak) reference objects found during scavenge. |
894 // Process (weak) reference objects found during scavenge. |
857 ReferenceProcessor* rp = ref_processor(); |
895 ReferenceProcessor* rp = ref_processor(); |
858 IsAliveClosure is_alive(this); |
896 IsAliveClosure is_alive(this); |
859 ScanWeakRefClosure scan_weak_ref(this); |
897 ScanWeakRefClosure scan_weak_ref(this); |
1102 // Add stats for overflow pushes. |
1143 // Add stats for overflow pushes. |
1103 if (Verbose && PrintGCDetails) { |
1144 if (Verbose && PrintGCDetails) { |
1104 gclog_or_tty->print("queue overflow!\n"); |
1145 gclog_or_tty->print("queue overflow!\n"); |
1105 } |
1146 } |
1106 push_on_overflow_list(old, par_scan_state); |
1147 push_on_overflow_list(old, par_scan_state); |
1107 par_scan_state->note_overflow_push(); |
1148 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); |
1108 } |
1149 } |
1109 par_scan_state->note_push(); |
|
1110 |
1150 |
1111 return new_obj; |
1151 return new_obj; |
1112 } |
1152 } |
1113 |
1153 |
1114 // Oops. Someone beat us to it. Undo the allocation. Where did we |
1154 // Oops. Someone beat us to it. Undo the allocation. Where did we |