313 template<class E, MEMFLAGS F, unsigned int N> |
306 template<class E, MEMFLAGS F, unsigned int N> |
314 GenericTaskQueue<E, F, N>::GenericTaskQueue() { |
307 GenericTaskQueue<E, F, N>::GenericTaskQueue() { |
315 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
308 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
316 } |
309 } |
317 |
310 |
318 template<class E, MEMFLAGS F, unsigned int N> |
|
319 void GenericTaskQueue<E, F, N>::initialize() { |
|
320 _elems = _array_allocator.allocate(N); |
|
321 } |
|
322 |
|
323 template<class E, MEMFLAGS F, unsigned int N> |
|
324 void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) { |
|
325 // tty->print_cr("START OopTaskQueue::oops_do"); |
|
326 uint iters = size(); |
|
327 uint index = _bottom; |
|
328 for (uint i = 0; i < iters; ++i) { |
|
329 index = decrement_index(index); |
|
330 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, |
|
331 // index, &_elems[index], _elems[index]); |
|
332 E* t = (E*)&_elems[index]; // cast away volatility |
|
333 oop* p = (oop*)t; |
|
334 assert((*t)->is_oop_or_null(), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(*t))); |
|
335 f->do_oop(p); |
|
336 } |
|
337 // tty->print_cr("END OopTaskQueue::oops_do"); |
|
338 } |
|
339 |
|
340 template<class E, MEMFLAGS F, unsigned int N> |
|
341 bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { |
|
342 if (dirty_n_elems == N - 1) { |
|
343 // Actually means 0, so do the push. |
|
344 uint localBot = _bottom; |
|
345 // g++ complains if the volatile result of the assignment is |
|
346 // unused, so we cast the volatile away. We cannot cast directly |
|
347 // to void, because gcc treats that as not using the result of the |
|
348 // assignment. However, casting to E& means that we trigger an |
|
349 // unused-value warning. So, we cast the E& to void. |
|
350 (void)const_cast<E&>(_elems[localBot] = t); |
|
351 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
|
352 TASKQUEUE_STATS_ONLY(stats.record_push()); |
|
353 return true; |
|
354 } |
|
355 return false; |
|
356 } |
|
357 |
|
358 // pop_local_slow() is done by the owning thread and is trying to |
|
359 // get the last task in the queue. It will compete with pop_global() |
|
360 // that will be used by other threads. The tag age is incremented |
|
361 // whenever the queue goes empty which it will do here if this thread |
|
362 // gets the last task or in pop_global() if the queue wraps (top == 0 |
|
363 // and pop_global() succeeds, see pop_global()). |
|
364 template<class E, MEMFLAGS F, unsigned int N> |
|
365 bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) { |
|
366 // This queue was observed to contain exactly one element; either this |
|
367 // thread will claim it, or a competing "pop_global". In either case, |
|
368 // the queue will be logically empty afterwards. Create a new Age value |
|
369 // that represents the empty queue for the given value of "_bottom". (We |
|
370 // must also increment "tag" because of the case where "bottom == 1", |
|
371 // "top == 0". A pop_global could read the queue element in that case, |
|
372 // then have the owner thread do a pop followed by another push. Without |
|
373 // the incrementing of "tag", the pop_global's CAS could succeed, |
|
374 // allowing it to believe it has claimed the stale element.) |
|
375 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
|
376 // Perhaps a competing pop_global has already incremented "top", in which |
|
377 // case it wins the element. |
|
378 if (localBot == oldAge.top()) { |
|
379 // No competing pop_global has yet incremented "top"; we'll try to |
|
380 // install new_age, thus claiming the element. |
|
381 Age tempAge = _age.cmpxchg(newAge, oldAge); |
|
382 if (tempAge == oldAge) { |
|
383 // We win. |
|
384 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
|
385 TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); |
|
386 return true; |
|
387 } |
|
388 } |
|
389 // We lose; a completing pop_global gets the element. But the queue is empty |
|
390 // and top is greater than bottom. Fix this representation of the empty queue |
|
391 // to become the canonical one. |
|
392 _age.set(newAge); |
|
393 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
|
394 return false; |
|
395 } |
|
396 |
|
397 template<class E, MEMFLAGS F, unsigned int N> |
|
398 bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) { |
|
399 Age oldAge = _age.get(); |
|
400 // Architectures with weak memory model require a barrier here |
|
401 // to guarantee that bottom is not older than age, |
|
402 // which is crucial for the correctness of the algorithm. |
|
403 #if !(defined SPARC || defined IA32 || defined AMD64) |
|
404 OrderAccess::fence(); |
|
405 #endif |
|
406 uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom); |
|
407 uint n_elems = size(localBot, oldAge.top()); |
|
408 if (n_elems == 0) { |
|
409 return false; |
|
410 } |
|
411 |
|
412 // g++ complains if the volatile result of the assignment is |
|
413 // unused, so we cast the volatile away. We cannot cast directly |
|
414 // to void, because gcc treats that as not using the result of the |
|
415 // assignment. However, casting to E& means that we trigger an |
|
416 // unused-value warning. So, we cast the E& to void. |
|
417 (void) const_cast<E&>(t = _elems[oldAge.top()]); |
|
418 Age newAge(oldAge); |
|
419 newAge.increment(); |
|
420 Age resAge = _age.cmpxchg(newAge, oldAge); |
|
421 |
|
422 // Note that using "_bottom" here might fail, since a pop_local might |
|
423 // have decremented it. |
|
424 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
|
425 return resAge == oldAge; |
|
426 } |
|
427 |
|
428 template<class E, MEMFLAGS F, unsigned int N> |
|
429 GenericTaskQueue<E, F, N>::~GenericTaskQueue() { |
|
430 FREE_C_HEAP_ARRAY(E, _elems); |
|
431 } |
|
432 |
|
433 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for |
311 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for |
434 // elements that do not fit in the TaskQueue. |
312 // elements that do not fit in the TaskQueue. |
435 // |
313 // |
436 // This class hides two methods from super classes: |
314 // This class hides two methods from super classes: |
437 // |
315 // |
537 } |
391 } |
538 |
392 |
539 template<class T, MEMFLAGS F> T* |
393 template<class T, MEMFLAGS F> T* |
540 GenericTaskQueueSet<T, F>::queue(uint i) { |
394 GenericTaskQueueSet<T, F>::queue(uint i) { |
541 return _queues[i]; |
395 return _queues[i]; |
542 } |
|
543 |
|
544 template<class T, MEMFLAGS F> bool |
|
545 GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) { |
|
546 for (uint i = 0; i < 2 * _n; i++) { |
|
547 if (steal_best_of_2(queue_num, seed, t)) { |
|
548 TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true)); |
|
549 return true; |
|
550 } |
|
551 } |
|
552 TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false)); |
|
553 return false; |
|
554 } |
|
555 |
|
556 template<class T, MEMFLAGS F> bool |
|
557 GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) { |
|
558 if (_n > 2) { |
|
559 uint k1 = queue_num; |
|
560 while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n; |
|
561 uint k2 = queue_num; |
|
562 while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n; |
|
563 // Sample both and try the larger. |
|
564 uint sz1 = _queues[k1]->size(); |
|
565 uint sz2 = _queues[k2]->size(); |
|
566 if (sz2 > sz1) return _queues[k2]->pop_global(t); |
|
567 else return _queues[k1]->pop_global(t); |
|
568 } else if (_n == 2) { |
|
569 // Just try the other one. |
|
570 uint k = (queue_num + 1) % 2; |
|
571 return _queues[k]->pop_global(t); |
|
572 } else { |
|
573 assert(_n == 1, "can't be zero."); |
|
574 return false; |
|
575 } |
|
576 } |
396 } |
577 |
397 |
578 template<class T, MEMFLAGS F> |
398 template<class T, MEMFLAGS F> |
579 bool GenericTaskQueueSet<T, F>::peek() { |
399 bool GenericTaskQueueSet<T, F>::peek() { |
580 // Try all the queues. |
400 // Try all the queues. |
647 static uint total_peeks() { return _total_peeks; } |
467 static uint total_peeks() { return _total_peeks; } |
648 static void print_termination_counts(); |
468 static void print_termination_counts(); |
649 #endif |
469 #endif |
650 }; |
470 }; |
651 |
471 |
652 template<class E, MEMFLAGS F, unsigned int N> inline bool |
|
653 GenericTaskQueue<E, F, N>::push(E t) { |
|
654 uint localBot = _bottom; |
|
655 assert(localBot < N, "_bottom out of range."); |
|
656 idx_t top = _age.top(); |
|
657 uint dirty_n_elems = dirty_size(localBot, top); |
|
658 assert(dirty_n_elems < N, "n_elems out of range."); |
|
659 if (dirty_n_elems < max_elems()) { |
|
660 // g++ complains if the volatile result of the assignment is |
|
661 // unused, so we cast the volatile away. We cannot cast directly |
|
662 // to void, because gcc treats that as not using the result of the |
|
663 // assignment. However, casting to E& means that we trigger an |
|
664 // unused-value warning. So, we cast the E& to void. |
|
665 (void) const_cast<E&>(_elems[localBot] = t); |
|
666 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
|
667 TASKQUEUE_STATS_ONLY(stats.record_push()); |
|
668 return true; |
|
669 } else { |
|
670 return push_slow(t, dirty_n_elems); |
|
671 } |
|
672 } |
|
673 |
|
674 template<class E, MEMFLAGS F, unsigned int N> inline bool |
|
675 GenericTaskQueue<E, F, N>::pop_local(volatile E& t) { |
|
676 uint localBot = _bottom; |
|
677 // This value cannot be N-1. That can only occur as a result of |
|
678 // the assignment to bottom in this method. If it does, this method |
|
679 // resets the size to 0 before the next call (which is sequential, |
|
680 // since this is pop_local.) |
|
681 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
|
682 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); |
|
683 if (dirty_n_elems == 0) return false; |
|
684 localBot = decrement_index(localBot); |
|
685 _bottom = localBot; |
|
686 // This is necessary to prevent any read below from being reordered |
|
687 // before the store just above. |
|
688 OrderAccess::fence(); |
|
689 // g++ complains if the volatile result of the assignment is |
|
690 // unused, so we cast the volatile away. We cannot cast directly |
|
691 // to void, because gcc treats that as not using the result of the |
|
692 // assignment. However, casting to E& means that we trigger an |
|
693 // unused-value warning. So, we cast the E& to void. |
|
694 (void) const_cast<E&>(t = _elems[localBot]); |
|
695 // This is a second read of "age"; the "size()" above is the first. |
|
696 // If there's still at least one element in the queue, based on the |
|
697 // "_bottom" and "age" we've read, then there can be no interference with |
|
698 // a "pop_global" operation, and we're done. |
|
699 idx_t tp = _age.top(); // XXX |
|
700 if (size(localBot, tp) > 0) { |
|
701 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
|
702 TASKQUEUE_STATS_ONLY(stats.record_pop()); |
|
703 return true; |
|
704 } else { |
|
705 // Otherwise, the queue contained exactly one element; we take the slow |
|
706 // path. |
|
707 return pop_local_slow(localBot, _age.get()); |
|
708 } |
|
709 } |
|
710 |
|
711 typedef GenericTaskQueue<oop, mtGC> OopTaskQueue; |
472 typedef GenericTaskQueue<oop, mtGC> OopTaskQueue; |
712 typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet; |
473 typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet; |
713 |
474 |
714 #ifdef _MSC_VER |
475 #ifdef _MSC_VER |
715 #pragma warning(push) |
476 #pragma warning(push) |