src/hotspot/share/opto/macro.cpp
changeset 51806 1ecc914fb707
parent 51078 fc6cfe40e32a
child 51826 e777e997e7c1
equal deleted inserted replaced
51805:eb2adb0a9b09 51806:1ecc914fb707
  1305     // Fast path modifies only raw memory.
  1305     // Fast path modifies only raw memory.
  1306     if (mem->is_MergeMem()) {
  1306     if (mem->is_MergeMem()) {
  1307       mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
  1307       mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
  1308     }
  1308     }
  1309 
  1309 
  1310     Node* eden_top_adr;
       
  1311     Node* eden_end_adr;
       
  1312 
       
  1313     set_eden_pointers(eden_top_adr, eden_end_adr);
       
  1314 
       
  1315     // Load Eden::end.  Loop invariant and hoisted.
       
  1316     //
       
  1317     // Note: We set the control input on "eden_end" and "old_eden_top" when using
       
  1318     //       a TLAB to work around a bug where these values were being moved across
       
  1319     //       a safepoint.  These are not oops, so they cannot be include in the oop
       
  1320     //       map, but they can be changed by a GC.   The proper way to fix this would
       
  1321     //       be to set the raw memory state when generating a  SafepointNode.  However
       
  1322     //       this will require extensive changes to the loop optimization in order to
       
  1323     //       prevent a degradation of the optimization.
       
  1324     //       See comment in memnode.hpp, around line 227 in class LoadPNode.
       
  1325     Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
       
  1326 
       
  1327     // allocate the Region and Phi nodes for the result
  1310     // allocate the Region and Phi nodes for the result
  1328     result_region = new RegionNode(3);
  1311     result_region = new RegionNode(3);
  1329     result_phi_rawmem = new PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
  1312     result_phi_rawmem = new PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
  1330     result_phi_rawoop = new PhiNode(result_region, TypeRawPtr::BOTTOM);
  1313     result_phi_rawoop = new PhiNode(result_region, TypeRawPtr::BOTTOM);
  1331     result_phi_i_o    = new PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
  1314     result_phi_i_o    = new PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
  1332 
  1315 
  1333     // We need a Region for the loop-back contended case.
  1316     // Grab regular I/O before optional prefetch may change it.
  1334     enum { fall_in_path = 1, contended_loopback_path = 2 };
  1317     // Slow-path does no I/O so just set it to the original I/O.
  1335     Node *contended_region;
  1318     result_phi_i_o->init_req(slow_result_path, i_o);
  1336     Node *contended_phi_rawmem;
  1319 
  1337     if (UseTLAB) {
  1320     Node* needgc_ctrl = NULL;
  1338       contended_region = toobig_false;
  1321     // Name successful fast-path variables
  1339       contended_phi_rawmem = mem;
  1322     Node* fast_oop_ctrl;
  1340     } else {
  1323     Node* fast_oop_rawmem;
  1341       contended_region = new RegionNode(3);
  1324 
  1342       contended_phi_rawmem = new PhiNode(contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
  1325     intx prefetch_lines = length != NULL ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
  1343       // Now handle the passing-too-big test.  We fall into the contended
  1326 
  1344       // loop-back merge point.
  1327     BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  1345       contended_region    ->init_req(fall_in_path, toobig_false);
  1328     Node* fast_oop = bs->obj_allocate(this, ctrl, mem, toobig_false, size_in_bytes, i_o, needgc_ctrl,
  1346       contended_phi_rawmem->init_req(fall_in_path, mem);
  1329                                       fast_oop_ctrl, fast_oop_rawmem,
  1347       transform_later(contended_region);
  1330                                       prefetch_lines);
  1348       transform_later(contended_phi_rawmem);
  1331 
  1349     }
       
  1350 
       
  1351     // Load(-locked) the heap top.
       
  1352     // See note above concerning the control input when using a TLAB
       
  1353     Node *old_eden_top = UseTLAB
       
  1354       ? new LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered)
       
  1355       : new LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr, MemNode::acquire);
       
  1356 
       
  1357     transform_later(old_eden_top);
       
  1358     // Add to heap top to get a new heap top
       
  1359     Node *new_eden_top = new AddPNode(top(), old_eden_top, size_in_bytes);
       
  1360     transform_later(new_eden_top);
       
  1361     // Check for needing a GC; compare against heap end
       
  1362     Node *needgc_cmp = new CmpPNode(new_eden_top, eden_end);
       
  1363     transform_later(needgc_cmp);
       
  1364     Node *needgc_bol = new BoolNode(needgc_cmp, BoolTest::ge);
       
  1365     transform_later(needgc_bol);
       
  1366     IfNode *needgc_iff = new IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
       
  1367     transform_later(needgc_iff);
       
  1368 
       
  1369     // Plug the failing-heap-space-need-gc test into the slow-path region
       
  1370     Node *needgc_true = new IfTrueNode(needgc_iff);
       
  1371     transform_later(needgc_true);
       
  1372     if (initial_slow_test) {
  1332     if (initial_slow_test) {
  1373       slow_region->init_req(need_gc_path, needgc_true);
  1333       slow_region->init_req(need_gc_path, needgc_ctrl);
  1374       // This completes all paths into the slow merge point
  1334       // This completes all paths into the slow merge point
  1375       transform_later(slow_region);
  1335       transform_later(slow_region);
  1376     } else {                      // No initial slow path needed!
  1336     } else {                      // No initial slow path needed!
  1377       // Just fall from the need-GC path straight into the VM call.
  1337       // Just fall from the need-GC path straight into the VM call.
  1378       slow_region = needgc_true;
  1338       slow_region = needgc_ctrl;
  1379     }
       
  1380     // No need for a GC.  Setup for the Store-Conditional
       
  1381     Node *needgc_false = new IfFalseNode(needgc_iff);
       
  1382     transform_later(needgc_false);
       
  1383 
       
  1384     // Grab regular I/O before optional prefetch may change it.
       
  1385     // Slow-path does no I/O so just set it to the original I/O.
       
  1386     result_phi_i_o->init_req(slow_result_path, i_o);
       
  1387 
       
  1388     i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
       
  1389                               old_eden_top, new_eden_top, length);
       
  1390 
       
  1391     // Name successful fast-path variables
       
  1392     Node* fast_oop = old_eden_top;
       
  1393     Node* fast_oop_ctrl;
       
  1394     Node* fast_oop_rawmem;
       
  1395 
       
  1396     // Store (-conditional) the modified eden top back down.
       
  1397     // StorePConditional produces flags for a test PLUS a modified raw
       
  1398     // memory state.
       
  1399     if (UseTLAB) {
       
  1400       Node* store_eden_top =
       
  1401         new StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
       
  1402                               TypeRawPtr::BOTTOM, new_eden_top, MemNode::unordered);
       
  1403       transform_later(store_eden_top);
       
  1404       fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
       
  1405       fast_oop_rawmem = store_eden_top;
       
  1406     } else {
       
  1407       Node* store_eden_top =
       
  1408         new StorePConditionalNode(needgc_false, contended_phi_rawmem, eden_top_adr,
       
  1409                                          new_eden_top, fast_oop/*old_eden_top*/);
       
  1410       transform_later(store_eden_top);
       
  1411       Node *contention_check = new BoolNode(store_eden_top, BoolTest::ne);
       
  1412       transform_later(contention_check);
       
  1413       store_eden_top = new SCMemProjNode(store_eden_top);
       
  1414       transform_later(store_eden_top);
       
  1415 
       
  1416       // If not using TLABs, check to see if there was contention.
       
  1417       IfNode *contention_iff = new IfNode (needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN);
       
  1418       transform_later(contention_iff);
       
  1419       Node *contention_true = new IfTrueNode(contention_iff);
       
  1420       transform_later(contention_true);
       
  1421       // If contention, loopback and try again.
       
  1422       contended_region->init_req(contended_loopback_path, contention_true);
       
  1423       contended_phi_rawmem->init_req(contended_loopback_path, store_eden_top);
       
  1424 
       
  1425       // Fast-path succeeded with no contention!
       
  1426       Node *contention_false = new IfFalseNode(contention_iff);
       
  1427       transform_later(contention_false);
       
  1428       fast_oop_ctrl = contention_false;
       
  1429 
       
  1430       // Bump total allocated bytes for this thread
       
  1431       Node* thread = new ThreadLocalNode();
       
  1432       transform_later(thread);
       
  1433       Node* alloc_bytes_adr = basic_plus_adr(top()/*not oop*/, thread,
       
  1434                                              in_bytes(JavaThread::allocated_bytes_offset()));
       
  1435       Node* alloc_bytes = make_load(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
       
  1436                                     0, TypeLong::LONG, T_LONG);
       
  1437 #ifdef _LP64
       
  1438       Node* alloc_size = size_in_bytes;
       
  1439 #else
       
  1440       Node* alloc_size = new ConvI2LNode(size_in_bytes);
       
  1441       transform_later(alloc_size);
       
  1442 #endif
       
  1443       Node* new_alloc_bytes = new AddLNode(alloc_bytes, alloc_size);
       
  1444       transform_later(new_alloc_bytes);
       
  1445       fast_oop_rawmem = make_store(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
       
  1446                                    0, new_alloc_bytes, T_LONG);
       
  1447     }
  1339     }
  1448 
  1340 
  1449     InitializeNode* init = alloc->initialization();
  1341     InitializeNode* init = alloc->initialization();
  1450     fast_oop_rawmem = initialize_object(alloc,
  1342     fast_oop_rawmem = initialize_object(alloc,
  1451                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
  1343                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
  1772 
  1664 
  1773 // Generate prefetch instructions for next allocations.
  1665 // Generate prefetch instructions for next allocations.
  1774 Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
  1666 Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
  1775                                         Node*& contended_phi_rawmem,
  1667                                         Node*& contended_phi_rawmem,
  1776                                         Node* old_eden_top, Node* new_eden_top,
  1668                                         Node* old_eden_top, Node* new_eden_top,
  1777                                         Node* length) {
  1669                                         intx lines) {
  1778    enum { fall_in_path = 1, pf_path = 2 };
  1670    enum { fall_in_path = 1, pf_path = 2 };
  1779    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
  1671    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
  1780       // Generate prefetch allocation with watermark check.
  1672       // Generate prefetch allocation with watermark check.
  1781       // As an allocation hits the watermark, we will prefetch starting
  1673       // As an allocation hits the watermark, we will prefetch starting
  1782       // at a "distance" away from watermark.
  1674       // at a "distance" away from watermark.
  1830       // adding prefetches
  1722       // adding prefetches
  1831       pf_phi_abio->init_req( fall_in_path, i_o );
  1723       pf_phi_abio->init_req( fall_in_path, i_o );
  1832 
  1724 
  1833       Node *prefetch_adr;
  1725       Node *prefetch_adr;
  1834       Node *prefetch;
  1726       Node *prefetch;
  1835       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       
  1836       uint step_size = AllocatePrefetchStepSize;
  1727       uint step_size = AllocatePrefetchStepSize;
  1837       uint distance = 0;
  1728       uint distance = 0;
  1838 
  1729 
  1839       for ( uint i = 0; i < lines; i++ ) {
  1730       for ( uint i = 0; i < lines; i++ ) {
  1840         prefetch_adr = new AddPNode( old_pf_wm, new_pf_wmt,
  1731         prefetch_adr = new AddPNode( old_pf_wm, new_pf_wmt,
  1863    } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
  1754    } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
  1864       // Insert a prefetch instruction for each allocation.
  1755       // Insert a prefetch instruction for each allocation.
  1865       // This code is used to generate 1 prefetch instruction per cache line.
  1756       // This code is used to generate 1 prefetch instruction per cache line.
  1866 
  1757 
  1867       // Generate several prefetch instructions.
  1758       // Generate several prefetch instructions.
  1868       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       
  1869       uint step_size = AllocatePrefetchStepSize;
  1759       uint step_size = AllocatePrefetchStepSize;
  1870       uint distance = AllocatePrefetchDistance;
  1760       uint distance = AllocatePrefetchDistance;
  1871 
  1761 
  1872       // Next cache address.
  1762       // Next cache address.
  1873       Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
  1763       Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
  1902    } else if( AllocatePrefetchStyle > 0 ) {
  1792    } else if( AllocatePrefetchStyle > 0 ) {
  1903       // Insert a prefetch for each allocation only on the fast-path
  1793       // Insert a prefetch for each allocation only on the fast-path
  1904       Node *prefetch_adr;
  1794       Node *prefetch_adr;
  1905       Node *prefetch;
  1795       Node *prefetch;
  1906       // Generate several prefetch instructions.
  1796       // Generate several prefetch instructions.
  1907       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       
  1908       uint step_size = AllocatePrefetchStepSize;
  1797       uint step_size = AllocatePrefetchStepSize;
  1909       uint distance = AllocatePrefetchDistance;
  1798       uint distance = AllocatePrefetchDistance;
  1910       for ( uint i = 0; i < lines; i++ ) {
  1799       for ( uint i = 0; i < lines; i++ ) {
  1911         prefetch_adr = new AddPNode( old_eden_top, new_eden_top,
  1800         prefetch_adr = new AddPNode( old_eden_top, new_eden_top,
  1912                                             _igvn.MakeConX(distance) );
  1801                                             _igvn.MakeConX(distance) );