346 //------------------------------PhaseCFG--------------------------------------- |
346 //------------------------------PhaseCFG--------------------------------------- |
347 // Build an array of Basic Block pointers, one per Node. |
347 // Build an array of Basic Block pointers, one per Node. |
348 class PhaseCFG : public Phase { |
348 class PhaseCFG : public Phase { |
349 friend class VMStructs; |
349 friend class VMStructs; |
350 private: |
350 private: |
|
351 |
|
352 // Root of whole program |
|
353 RootNode* _root; |
|
354 |
|
355 // The block containing the root node |
|
356 Block* _root_block; |
|
357 |
|
358 // List of basic blocks that are created during CFG creation |
|
359 Block_List _blocks; |
|
360 |
|
361 // Count of basic blocks |
|
362 uint _number_of_blocks; |
|
363 |
351 // Arena for the blocks to be stored in |
364 // Arena for the blocks to be stored in |
352 Arena* _block_arena; |
365 Arena* _block_arena; |
353 |
366 |
|
367 // The matcher for this compilation |
|
368 Matcher& _matcher; |
|
369 |
354 // Map nodes to owning basic block |
370 // Map nodes to owning basic block |
355 Block_Array _node_to_block_mapping; |
371 Block_Array _node_to_block_mapping; |
356 |
372 |
|
373 // Loop from the root |
|
374 CFGLoop* _root_loop; |
|
375 |
|
376 // Outmost loop frequency |
|
377 float _outer_loop_frequency; |
|
378 |
|
379 // Per node latency estimation, valid only during GCM |
|
380 GrowableArray<uint>* _node_latency; |
|
381 |
357 // Build a proper looking cfg. Return count of basic blocks |
382 // Build a proper looking cfg. Return count of basic blocks |
358 uint build_cfg(); |
383 uint build_cfg(); |
359 |
384 |
360 // Perform DFS search. |
385 // Build the dominator tree so that we know where we can move instructions |
|
386 void build_dominator_tree(); |
|
387 |
|
388 // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions |
|
389 void estimate_block_frequency(); |
|
390 |
|
391 // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific |
|
392 // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block. |
|
393 // Move nodes to ensure correctness from GVN and also try to move nodes out of loops. |
|
394 void global_code_motion(); |
|
395 |
|
396 // Schedule Nodes early in their basic blocks. |
|
397 bool schedule_early(VectorSet &visited, Node_List &roots); |
|
398 |
|
399 // For each node, find the latest block it can be scheduled into |
|
400 // and then select the cheapest block between the latest and earliest |
|
401 // block to place the node. |
|
402 void schedule_late(VectorSet &visited, Node_List &stack); |
|
403 |
|
404 // Compute the (backwards) latency of a node from a single use |
|
405 int latency_from_use(Node *n, const Node *def, Node *use); |
|
406 |
|
407 // Compute the (backwards) latency of a node from the uses of this instruction |
|
408 void partial_latency_of_defs(Node *n); |
|
409 |
|
410 // Compute the instruction global latency with a backwards walk |
|
411 void compute_latencies_backwards(VectorSet &visited, Node_List &stack); |
|
412 |
|
413 // Pick a block between early and late that is a cheaper alternative |
|
414 // to late. Helper for schedule_late. |
|
415 Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); |
|
416 |
|
417 // Perform a Depth First Search (DFS). |
361 // Setup 'vertex' as DFS to vertex mapping. |
418 // Setup 'vertex' as DFS to vertex mapping. |
362 // Setup 'semi' as vertex to DFS mapping. |
419 // Setup 'semi' as vertex to DFS mapping. |
363 // Set 'parent' to DFS parent. |
420 // Set 'parent' to DFS parent. |
364 uint DFS( Tarjan *tarjan ); |
421 uint do_DFS(Tarjan* tarjan, uint rpo_counter); |
365 |
422 |
366 // Helper function to insert a node into a block |
423 // Helper function to insert a node into a block |
367 void schedule_node_into_block( Node *n, Block *b ); |
424 void schedule_node_into_block( Node *n, Block *b ); |
368 |
425 |
369 void replace_block_proj_ctrl( Node *n ); |
426 void replace_block_proj_ctrl( Node *n ); |
370 |
427 |
371 // Set the basic block for pinned Nodes |
428 // Set the basic block for pinned Nodes |
372 void schedule_pinned_nodes( VectorSet &visited ); |
429 void schedule_pinned_nodes( VectorSet &visited ); |
373 |
430 |
374 // I'll need a few machine-specific GotoNodes. Clone from this one. |
431 // I'll need a few machine-specific GotoNodes. Clone from this one. |
375 MachNode *_goto; |
432 // Used when building the CFG and creating end nodes for blocks. |
|
433 MachNode* _goto; |
376 |
434 |
377 Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false); |
435 Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false); |
378 void verify_anti_dependences(Block* LCA, Node* load) { |
436 void verify_anti_dependences(Block* LCA, Node* load) { |
379 assert(LCA == get_block_for_node(load), "should already be scheduled"); |
437 assert(LCA == get_block_for_node(load), "should already be scheduled"); |
380 insert_anti_dependences(LCA, load, true); |
438 insert_anti_dependences(LCA, load, true); |
381 } |
439 } |
382 |
440 |
383 public: |
|
384 PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher); |
|
385 |
|
386 uint _num_blocks; // Count of basic blocks |
|
387 Block_List _blocks; // List of basic blocks |
|
388 RootNode *_root; // Root of whole program |
|
389 Block *_broot; // Basic block of root |
|
390 uint _rpo_ctr; |
|
391 CFGLoop* _root_loop; |
|
392 float _outer_loop_freq; // Outmost loop frequency |
|
393 |
|
394 |
|
395 // set which block this node should reside in |
|
396 void map_node_to_block(const Node* node, Block* block) { |
|
397 _node_to_block_mapping.map(node->_idx, block); |
|
398 } |
|
399 |
|
400 // removes the mapping from a node to a block |
|
401 void unmap_node_from_block(const Node* node) { |
|
402 _node_to_block_mapping.map(node->_idx, NULL); |
|
403 } |
|
404 |
|
405 // get the block in which this node resides |
|
406 Block* get_block_for_node(const Node* node) const { |
|
407 return _node_to_block_mapping[node->_idx]; |
|
408 } |
|
409 |
|
410 // does this node reside in a block; return true |
|
411 bool has_block(const Node* node) const { |
|
412 return (_node_to_block_mapping.lookup(node->_idx) != NULL); |
|
413 } |
|
414 |
|
415 // Per node latency estimation, valid only during GCM |
|
416 GrowableArray<uint> *_node_latency; |
|
417 |
|
418 #ifndef PRODUCT |
|
419 bool _trace_opto_pipelining; // tracing flag |
|
420 #endif |
|
421 |
|
422 #ifdef ASSERT |
|
423 Unique_Node_List _raw_oops; |
|
424 #endif |
|
425 |
|
426 // Build dominators |
|
427 void Dominators(); |
|
428 |
|
429 // Estimate block frequencies based on IfNode probabilities |
|
430 void Estimate_Block_Frequency(); |
|
431 |
|
432 // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific |
|
433 // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block. |
|
434 void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list ); |
|
435 |
|
436 // Compute the (backwards) latency of a node from the uses |
|
437 void latency_from_uses(Node *n); |
|
438 |
|
439 // Compute the (backwards) latency of a node from a single use |
|
440 int latency_from_use(Node *n, const Node *def, Node *use); |
|
441 |
|
442 // Compute the (backwards) latency of a node from the uses of this instruction |
|
443 void partial_latency_of_defs(Node *n); |
|
444 |
|
445 // Schedule Nodes early in their basic blocks. |
|
446 bool schedule_early(VectorSet &visited, Node_List &roots); |
|
447 |
|
448 // For each node, find the latest block it can be scheduled into |
|
449 // and then select the cheapest block between the latest and earliest |
|
450 // block to place the node. |
|
451 void schedule_late(VectorSet &visited, Node_List &stack); |
|
452 |
|
453 // Pick a block between early and late that is a cheaper alternative |
|
454 // to late. Helper for schedule_late. |
|
455 Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); |
|
456 |
|
457 // Compute the instruction global latency with a backwards walk |
|
458 void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack); |
|
459 |
|
460 // Set loop alignment |
|
461 void set_loop_alignment(); |
|
462 |
|
463 // Remove empty basic blocks |
|
464 void remove_empty(); |
|
465 void fixup_flow(); |
|
466 bool move_to_next(Block* bx, uint b_index); |
441 bool move_to_next(Block* bx, uint b_index); |
467 void move_to_end(Block* bx, uint b_index); |
442 void move_to_end(Block* bx, uint b_index); |
|
443 |
468 void insert_goto_at(uint block_no, uint succ_no); |
444 void insert_goto_at(uint block_no, uint succ_no); |
469 |
445 |
470 // Check for NeverBranch at block end. This needs to become a GOTO to the |
446 // Check for NeverBranch at block end. This needs to become a GOTO to the |
471 // true target. NeverBranch are treated as a conditional branch that always |
447 // true target. NeverBranch are treated as a conditional branch that always |
472 // goes the same direction for most of the optimizer and are used to give a |
448 // goes the same direction for most of the optimizer and are used to give a |
474 // into Goto's so that when you enter the infinite loop you indeed hang. |
450 // into Goto's so that when you enter the infinite loop you indeed hang. |
475 void convert_NeverBranch_to_Goto(Block *b); |
451 void convert_NeverBranch_to_Goto(Block *b); |
476 |
452 |
477 CFGLoop* create_loop_tree(); |
453 CFGLoop* create_loop_tree(); |
478 |
454 |
479 // Insert a node into a block, and update the _bbs |
455 #ifndef PRODUCT |
480 void insert( Block *b, uint idx, Node *n ) { |
456 bool _trace_opto_pipelining; // tracing flag |
|
457 #endif |
|
458 |
|
459 public: |
|
460 PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher); |
|
461 |
|
462 void set_latency_for_node(Node* node, int latency) { |
|
463 _node_latency->at_put_grow(node->_idx, latency); |
|
464 } |
|
465 |
|
466 uint get_latency_for_node(Node* node) { |
|
467 return _node_latency->at_grow(node->_idx); |
|
468 } |
|
469 |
|
470 // Get the outer most frequency |
|
471 float get_outer_loop_frequency() const { |
|
472 return _outer_loop_frequency; |
|
473 } |
|
474 |
|
475 // Get the root node of the CFG |
|
476 RootNode* get_root_node() const { |
|
477 return _root; |
|
478 } |
|
479 |
|
480 // Get the block of the root node |
|
481 Block* get_root_block() const { |
|
482 return _root_block; |
|
483 } |
|
484 |
|
485 // Add a block at a position and moves the later ones one step |
|
486 void add_block_at(uint pos, Block* block) { |
|
487 _blocks.insert(pos, block); |
|
488 _number_of_blocks++; |
|
489 } |
|
490 |
|
491 // Adds a block to the top of the block list |
|
492 void add_block(Block* block) { |
|
493 _blocks.push(block); |
|
494 _number_of_blocks++; |
|
495 } |
|
496 |
|
497 // Clear the list of blocks |
|
498 void clear_blocks() { |
|
499 _blocks.reset(); |
|
500 _number_of_blocks = 0; |
|
501 } |
|
502 |
|
503 // Get the block at position pos in _blocks |
|
504 Block* get_block(uint pos) const { |
|
505 return _blocks[pos]; |
|
506 } |
|
507 |
|
508 // Number of blocks |
|
509 uint number_of_blocks() const { |
|
510 return _number_of_blocks; |
|
511 } |
|
512 |
|
513 // set which block this node should reside in |
|
514 void map_node_to_block(const Node* node, Block* block) { |
|
515 _node_to_block_mapping.map(node->_idx, block); |
|
516 } |
|
517 |
|
518 // removes the mapping from a node to a block |
|
519 void unmap_node_from_block(const Node* node) { |
|
520 _node_to_block_mapping.map(node->_idx, NULL); |
|
521 } |
|
522 |
|
523 // get the block in which this node resides |
|
524 Block* get_block_for_node(const Node* node) const { |
|
525 return _node_to_block_mapping[node->_idx]; |
|
526 } |
|
527 |
|
528 // does this node reside in a block; return true |
|
529 bool has_block(const Node* node) const { |
|
530 return (_node_to_block_mapping.lookup(node->_idx) != NULL); |
|
531 } |
|
532 |
|
533 #ifdef ASSERT |
|
534 Unique_Node_List _raw_oops; |
|
535 #endif |
|
536 |
|
537 // Do global code motion by first building dominator tree and estimate block frequency |
|
538 // Returns true on success |
|
539 bool do_global_code_motion(); |
|
540 |
|
541 // Compute the (backwards) latency of a node from the uses |
|
542 void latency_from_uses(Node *n); |
|
543 |
|
544 // Set loop alignment |
|
545 void set_loop_alignment(); |
|
546 |
|
547 // Remove empty basic blocks |
|
548 void remove_empty_blocks(); |
|
549 void fixup_flow(); |
|
550 |
|
551 // Insert a node into a block at index and map the node to the block |
|
552 void insert(Block *b, uint idx, Node *n) { |
481 b->_nodes.insert( idx, n ); |
553 b->_nodes.insert( idx, n ); |
482 map_node_to_block(n, b); |
554 map_node_to_block(n, b); |
483 } |
555 } |
484 |
556 |
485 #ifndef PRODUCT |
557 #ifndef PRODUCT |