390 |
390 |
391 // ----------------------------------------------------------------------------- |
391 // ----------------------------------------------------------------------------- |
392 // Hash Code handling |
392 // Hash Code handling |
393 // |
393 // |
394 // Performance concern: |
394 // Performance concern: |
395 // OrderAccess::storestore() calls release() which STs 0 into the global volatile |
395 // OrderAccess::storestore() calls release() which at one time stored 0 |
396 // OrderAccess::Dummy variable. This store is unnecessary for correctness. |
396 // into the global volatile OrderAccess::dummy variable. This store was |
397 // Many threads STing into a common location causes considerable cache migration |
397 // unnecessary for correctness. Many threads storing into a common location |
398 // or "sloshing" on large SMP system. As such, I avoid using OrderAccess::storestore() |
398 // causes considerable cache migration or "sloshing" on large SMP systems. |
399 // until it's repaired. In some cases OrderAccess::fence() -- which incurs local |
399 // As such, I avoided using OrderAccess::storestore(). In some cases |
400 // latency on the executing processor -- is a better choice as it scales on SMP |
400 // OrderAccess::fence() -- which incurs local latency on the executing |
401 // systems. See http://blogs.sun.com/dave/entry/biased_locking_in_hotspot for a |
401 // processor -- is a better choice as it scales on SMP systems. |
402 // discussion of coherency costs. Note that all our current reference platforms |
402 // |
403 // provide strong ST-ST order, so the issue is moot on IA32, x64, and SPARC. |
403 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for |
|
404 // a discussion of coherency costs. Note that all our current reference |
|
405 // platforms provide strong ST-ST order, so the issue is moot on IA32, |
|
406 // x64, and SPARC. |
404 // |
407 // |
405 // As a general policy we use "volatile" to control compiler-based reordering |
408 // As a general policy we use "volatile" to control compiler-based reordering |
406 // and explicit fences (barriers) to control for architectural reordering performed |
409 // and explicit fences (barriers) to control for architectural reordering |
407 // by the CPU(s) or platform. |
410 // performed by the CPU(s) or platform. |
408 |
411 |
409 struct SharedGlobals { |
412 struct SharedGlobals { |
410 // These are highly shared mostly-read variables. |
413 // These are highly shared mostly-read variables. |
411 // To avoid false-sharing they need to be the sole occupants of a $ line. |
414 // To avoid false-sharing they need to be the sole occupants of a $ line. |
412 double padPrefix[8]; |
415 double padPrefix[8]; |
1594 Thread::muxRelease(&ListLock); |
1597 Thread::muxRelease(&ListLock); |
1595 THREAD->clear_pending_exception(); |
1598 THREAD->clear_pending_exception(); |
1596 } |
1599 } |
1597 |
1600 |
1598 //------------------------------------------------------------------------------ |
1601 //------------------------------------------------------------------------------ |
1599 // Non-product code |
1602 // Debugging code |
|
1603 |
|
1604 void ObjectSynchronizer::sanity_checks(const bool verbose, |
|
1605 const uint cache_line_size, |
|
1606 int *error_cnt_ptr, |
|
1607 int *warning_cnt_ptr) { |
|
1608 u_char *addr_begin = (u_char*)&GVars; |
|
1609 u_char *addr_stwRandom = (u_char*)&GVars.stwRandom; |
|
1610 u_char *addr_hcSequence = (u_char*)&GVars.hcSequence; |
|
1611 |
|
1612 if (verbose) { |
|
1613 tty->print_cr("INFO: sizeof(SharedGlobals)=" SIZE_FORMAT, |
|
1614 sizeof(SharedGlobals)); |
|
1615 } |
|
1616 |
|
1617 uint offset_stwRandom = (uint)(addr_stwRandom - addr_begin); |
|
1618 if (verbose) tty->print_cr("INFO: offset(stwRandom)=%u", offset_stwRandom); |
|
1619 |
|
1620 uint offset_hcSequence = (uint)(addr_hcSequence - addr_begin); |
|
1621 if (verbose) { |
|
1622 tty->print_cr("INFO: offset(_hcSequence)=%u", offset_hcSequence); |
|
1623 } |
|
1624 |
|
1625 if (cache_line_size != 0) { |
|
1626 // We were able to determine the L1 data cache line size so |
|
1627 // do some cache line specific sanity checks |
|
1628 |
|
1629 if (offset_stwRandom < cache_line_size) { |
|
1630 tty->print_cr("WARNING: the SharedGlobals.stwRandom field is closer " |
|
1631 "to the struct beginning than a cache line which permits " |
|
1632 "false sharing."); |
|
1633 (*warning_cnt_ptr)++; |
|
1634 } |
|
1635 |
|
1636 if ((offset_hcSequence - offset_stwRandom) < cache_line_size) { |
|
1637 tty->print_cr("WARNING: the SharedGlobals.stwRandom and " |
|
1638 "SharedGlobals.hcSequence fields are closer than a cache " |
|
1639 "line which permits false sharing."); |
|
1640 (*warning_cnt_ptr)++; |
|
1641 } |
|
1642 |
|
1643 if ((sizeof(SharedGlobals) - offset_hcSequence) < cache_line_size) { |
|
1644 tty->print_cr("WARNING: the SharedGlobals.hcSequence field is closer " |
|
1645 "to the struct end than a cache line which permits false " |
|
1646 "sharing."); |
|
1647 (*warning_cnt_ptr)++; |
|
1648 } |
|
1649 } |
|
1650 } |
1600 |
1651 |
1601 #ifndef PRODUCT |
1652 #ifndef PRODUCT |
1602 |
1653 |
1603 // Verify all monitors in the monitor cache, the verification is weak. |
1654 // Verify all monitors in the monitor cache, the verification is weak. |
1604 void ObjectSynchronizer::verify() { |
1655 void ObjectSynchronizer::verify() { |