8026796: Make replace_in_map() on parent maps generic
authorroland
Tue, 10 Jun 2014 13:37:16 +0200
changeset 24946 24b68ccf3fc4
parent 24945 6df48e563632
child 24947 65444bb18da8
child 24948 bc074f400757
child 24950 002a4742a989
8026796: Make replace_in_map() on parent maps generic Summary: propagate node replacements along control flow edges to callers Reviewed-by: kvn, vlivanov
hotspot/src/share/vm/opto/c2_globals.hpp
hotspot/src/share/vm/opto/callGenerator.cpp
hotspot/src/share/vm/opto/callGenerator.hpp
hotspot/src/share/vm/opto/callnode.cpp
hotspot/src/share/vm/opto/callnode.hpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/compile.hpp
hotspot/src/share/vm/opto/doCall.cpp
hotspot/src/share/vm/opto/graphKit.cpp
hotspot/src/share/vm/opto/graphKit.hpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/node.cpp
hotspot/src/share/vm/opto/parse.hpp
hotspot/src/share/vm/opto/parse1.cpp
hotspot/src/share/vm/opto/replacednodes.cpp
hotspot/src/share/vm/opto/replacednodes.hpp
hotspot/src/share/vm/runtime/arguments.cpp
hotspot/src/share/vm/utilities/growableArray.hpp
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -650,9 +650,6 @@
   product(bool, UseMathExactIntrinsics, true,                               \
           "Enables intrinsification of various java.lang.Math functions")   \
                                                                             \
-  experimental(bool, ReplaceInParentMaps, false,                            \
-          "Propagate type improvements in callers of inlinee if possible")  \
-                                                                            \
   product(bool, UseTypeSpeculation, true,                                   \
           "Speculatively propagate types from profiles")                    \
                                                                             \
--- a/hotspot/src/share/vm/opto/callGenerator.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -63,12 +63,12 @@
   }
 
   virtual bool      is_parse() const           { return true; }
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
   int is_osr() { return _is_osr; }
 
 };
 
-JVMState* ParseGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* ParseGenerator::generate(JVMState* jvms) {
   Compile* C = Compile::current();
   C->print_inlining_update(this);
 
@@ -81,7 +81,7 @@
     return NULL;  // bailing out of the compile; do not try to parse
   }
 
-  Parse parser(jvms, method(), _expected_uses, parent_parser);
+  Parse parser(jvms, method(), _expected_uses);
   // Grab signature for matching/allocation
 #ifdef ASSERT
   if (parser.tf() != (parser.depth() == 1 ? C->tf() : tf())) {
@@ -120,12 +120,12 @@
       _separate_io_proj(separate_io_proj)
   {
   }
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
 
   CallStaticJavaNode* call_node() const { return _call_node; }
 };
 
-JVMState* DirectCallGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* DirectCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
   kit.C->print_inlining_update(this);
   bool is_static = method()->is_static();
@@ -173,10 +173,10 @@
            vtable_index >= 0, "either invalid or usable");
   }
   virtual bool      is_virtual() const          { return true; }
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
 };
 
-JVMState* VirtualCallGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* VirtualCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
   Node* receiver = kit.argument(0);
 
@@ -283,7 +283,7 @@
   // Convert the CallStaticJava into an inline
   virtual void do_late_inline();
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) {
+  virtual JVMState* generate(JVMState* jvms) {
     Compile *C = Compile::current();
 
     C->log_inline_id(this);
@@ -298,7 +298,7 @@
     // that the late inlining logic can distinguish between fall
     // through and exceptional uses of the memory and io projections
     // as is done for allocations and macro expansion.
-    return DirectCallGenerator::generate(jvms, parent_parser);
+    return DirectCallGenerator::generate(jvms);
   }
 
   virtual void print_inlining_late(const char* msg) {
@@ -399,7 +399,7 @@
   }
 
   // Now perform the inlining using the synthesized JVMState
-  JVMState* new_jvms = _inline_cg->generate(jvms, NULL);
+  JVMState* new_jvms = _inline_cg->generate(jvms);
   if (new_jvms == NULL)  return;  // no change
   if (C->failing())      return;
 
@@ -417,7 +417,7 @@
   C->env()->notice_inlined_method(_inline_cg->method());
   C->set_inlining_progress(true);
 
-  kit.replace_call(call, result);
+  kit.replace_call(call, result, true);
 }
 
 
@@ -439,8 +439,8 @@
 
   virtual bool is_mh_late_inline() const { return true; }
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) {
-    JVMState* new_jvms = LateInlineCallGenerator::generate(jvms, parent_parser);
+  virtual JVMState* generate(JVMState* jvms) {
+    JVMState* new_jvms = LateInlineCallGenerator::generate(jvms);
 
     Compile* C = Compile::current();
     if (_input_not_const) {
@@ -486,14 +486,14 @@
   LateInlineStringCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
     LateInlineCallGenerator(method, inline_cg) {}
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) {
+  virtual JVMState* generate(JVMState* jvms) {
     Compile *C = Compile::current();
 
     C->log_inline_id(this);
 
     C->add_string_late_inline(this);
 
-    JVMState* new_jvms =  DirectCallGenerator::generate(jvms, parent_parser);
+    JVMState* new_jvms =  DirectCallGenerator::generate(jvms);
     return new_jvms;
   }
 
@@ -510,14 +510,14 @@
   LateInlineBoxingCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
     LateInlineCallGenerator(method, inline_cg) {}
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) {
+  virtual JVMState* generate(JVMState* jvms) {
     Compile *C = Compile::current();
 
     C->log_inline_id(this);
 
     C->add_boxing_late_inline(this);
 
-    JVMState* new_jvms =  DirectCallGenerator::generate(jvms, parent_parser);
+    JVMState* new_jvms =  DirectCallGenerator::generate(jvms);
     return new_jvms;
   }
 };
@@ -553,7 +553,7 @@
   virtual bool      is_virtual() const          { return _is_virtual; }
   virtual bool      is_deferred() const         { return true; }
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
 };
 
 
@@ -563,14 +563,14 @@
   return new WarmCallGenerator(ci, if_cold, if_hot);
 }
 
-JVMState* WarmCallGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* WarmCallGenerator::generate(JVMState* jvms) {
   Compile* C = Compile::current();
   C->print_inlining_update(this);
 
   if (C->log() != NULL) {
     C->log()->elem("warm_call bci='%d'", jvms->bci());
   }
-  jvms = _if_cold->generate(jvms, parent_parser);
+  jvms = _if_cold->generate(jvms);
   if (jvms != NULL) {
     Node* m = jvms->map()->control();
     if (m->is_CatchProj()) m = m->in(0);  else m = C->top();
@@ -631,7 +631,7 @@
   virtual bool      is_inline()    const    { return _if_hit->is_inline(); }
   virtual bool      is_deferred()  const    { return _if_hit->is_deferred(); }
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
 };
 
 
@@ -643,14 +643,13 @@
 }
 
 
-JVMState* PredictedCallGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* PredictedCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
   kit.C->print_inlining_update(this);
   PhaseGVN& gvn = kit.gvn();
   // We need an explicit receiver null_check before checking its type.
   // We share a map with the caller, so his JVMS gets adjusted.
   Node* receiver = kit.argument(0);
-
   CompileLog* log = kit.C->log();
   if (log != NULL) {
     log->elem("predicted_call bci='%d' klass='%d'",
@@ -662,6 +661,10 @@
     return kit.transfer_exceptions_into_jvms();
   }
 
+  // Make a copy of the replaced nodes in case we need to restore them
+  ReplacedNodes replaced_nodes = kit.map()->replaced_nodes();
+  replaced_nodes.clone();
+
   Node* exact_receiver = receiver;  // will get updated in place...
   Node* slow_ctl = kit.type_check_receiver(receiver,
                                            _predicted_receiver, _hit_prob,
@@ -672,7 +675,7 @@
   { PreserveJVMState pjvms(&kit);
     kit.set_control(slow_ctl);
     if (!kit.stopped()) {
-      slow_jvms = _if_missed->generate(kit.sync_jvms(), parent_parser);
+      slow_jvms = _if_missed->generate(kit.sync_jvms());
       if (kit.failing())
         return NULL;  // might happen because of NodeCountInliningCutoff
       assert(slow_jvms != NULL, "must be");
@@ -693,12 +696,12 @@
   kit.replace_in_map(receiver, exact_receiver);
 
   // Make the hot call:
-  JVMState* new_jvms = _if_hit->generate(kit.sync_jvms(), parent_parser);
+  JVMState* new_jvms = _if_hit->generate(kit.sync_jvms());
   if (new_jvms == NULL) {
     // Inline failed, so make a direct call.
     assert(_if_hit->is_inline(), "must have been a failed inline");
     CallGenerator* cg = CallGenerator::for_direct_call(_if_hit->method());
-    new_jvms = cg->generate(kit.sync_jvms(), parent_parser);
+    new_jvms = cg->generate(kit.sync_jvms());
   }
   kit.add_exception_states_from(new_jvms);
   kit.set_jvms(new_jvms);
@@ -715,6 +718,11 @@
     return kit.transfer_exceptions_into_jvms();
   }
 
+  // There are 2 branches and the replaced nodes are only valid on
+  // one: restore the replaced nodes to what they were before the
+  // branch.
+  kit.map()->set_replaced_nodes(replaced_nodes);
+
   // Finish the diamond.
   kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
   RegionNode* region = new RegionNode(3);
@@ -901,7 +909,7 @@
   virtual bool      is_inlined()   const    { return true; }
   virtual bool      is_intrinsic() const    { return true; }
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
 };
 
 
@@ -911,7 +919,7 @@
 }
 
 
-JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
   PhaseGVN& gvn = kit.gvn();
 
@@ -932,7 +940,7 @@
     PreserveJVMState pjvms(&kit);
     kit.set_control(slow_ctl);
     if (!kit.stopped()) {
-      slow_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
+      slow_jvms = _cg->generate(kit.sync_jvms());
       if (kit.failing())
         return NULL;  // might happen because of NodeCountInliningCutoff
       assert(slow_jvms != NULL, "must be");
@@ -950,12 +958,12 @@
   }
 
   // Generate intrinsic code:
-  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
+  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms());
   if (new_jvms == NULL) {
     // Intrinsic failed, so use slow code or make a direct call.
     if (slow_map == NULL) {
       CallGenerator* cg = CallGenerator::for_direct_call(method());
-      new_jvms = cg->generate(kit.sync_jvms(), parent_parser);
+      new_jvms = cg->generate(kit.sync_jvms());
     } else {
       kit.set_jvms(slow_jvms);
       return kit.transfer_exceptions_into_jvms();
@@ -1025,7 +1033,7 @@
   virtual bool      is_virtual() const          { ShouldNotReachHere(); return false; }
   virtual bool      is_trap() const             { return true; }
 
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
 };
 
 
@@ -1037,7 +1045,7 @@
 }
 
 
-JVMState* UncommonTrapCallGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* UncommonTrapCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
   kit.C->print_inlining_update(this);
   // Take the trap with arguments pushed on the stack.  (Cf. null_check_receiver).
--- a/hotspot/src/share/vm/opto/callGenerator.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/callGenerator.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -31,8 +31,6 @@
 #include "opto/type.hpp"
 #include "runtime/deoptimization.hpp"
 
-class Parse;
-
 //---------------------------CallGenerator-------------------------------------
 // The subclasses of this class handle generation of ideal nodes for
 // call sites and method entry points.
@@ -114,7 +112,7 @@
   //
   // If the result is NULL, it means that this CallGenerator was unable
   // to handle the given call, and another CallGenerator should be consulted.
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser) = 0;
+  virtual JVMState* generate(JVMState* jvms) = 0;
 
   // How to generate a call site that is inlined:
   static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1);
--- a/hotspot/src/share/vm/opto/callnode.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/callnode.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -1090,6 +1090,7 @@
 #ifndef PRODUCT
 void SafePointNode::dump_spec(outputStream *st) const {
   st->print(" SafePoint ");
+  _replaced_nodes.dump(st);
 }
 #endif
 
--- a/hotspot/src/share/vm/opto/callnode.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/callnode.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -30,6 +30,7 @@
 #include "opto/multnode.hpp"
 #include "opto/opcodes.hpp"
 #include "opto/phaseX.hpp"
+#include "opto/replacednodes.hpp"
 #include "opto/type.hpp"
 
 // Portions of code courtesy of Clifford Click
@@ -335,6 +336,7 @@
   OopMap*         _oop_map;   // Array of OopMap info (8-bit char) for GC
   JVMState* const _jvms;      // Pointer to list of JVM State objects
   const TypePtr*  _adr_type;  // What type of memory does this node produce?
+  ReplacedNodes   _replaced_nodes; // During parsing: list of pair of nodes from calls to GraphKit::replace_in_map()
 
   // Many calls take *all* of memory as input,
   // but some produce a limited subset of that memory as output.
@@ -426,6 +428,37 @@
   void               set_next_exception(SafePointNode* n);
   bool                   has_exceptions() const { return next_exception() != NULL; }
 
+  // Helper methods to operate on replaced nodes
+  ReplacedNodes replaced_nodes() const {
+    return _replaced_nodes;
+  }
+
+  void set_replaced_nodes(ReplacedNodes replaced_nodes) {
+    _replaced_nodes = replaced_nodes;
+  }
+
+  void clone_replaced_nodes() {
+    _replaced_nodes.clone();
+  }
+  void record_replaced_node(Node* initial, Node* improved) {
+    _replaced_nodes.record(initial, improved);
+  }
+  void transfer_replaced_nodes_from(SafePointNode* sfpt, uint idx = 0) {
+    _replaced_nodes.transfer_from(sfpt->_replaced_nodes, idx);
+  }
+  void delete_replaced_nodes() {
+    _replaced_nodes.reset();
+  }
+  void apply_replaced_nodes() {
+    _replaced_nodes.apply(this);
+  }
+  void merge_replaced_nodes_with(SafePointNode* sfpt) {
+    _replaced_nodes.merge_with(sfpt->_replaced_nodes);
+  }
+  bool has_replaced_nodes() const {
+    return !_replaced_nodes.is_empty();
+  }
+
   // Standard Node stuff
   virtual int            Opcode() const;
   virtual bool           pinned() const { return true; }
--- a/hotspot/src/share/vm/opto/compile.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/compile.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -392,6 +392,11 @@
   uint next = 0;
   while (next < useful.size()) {
     Node *n = useful.at(next++);
+    if (n->is_SafePoint()) {
+      // We're done with a parsing phase. Replaced nodes are not valid
+      // beyond that point.
+      n->as_SafePoint()->delete_replaced_nodes();
+    }
     // Use raw traversal of out edges since this code removes out edges
     int max = n->outcnt();
     for (int j = 0; j < max; ++j) {
@@ -673,7 +678,6 @@
                   _print_inlining_stream(NULL),
                   _print_inlining_idx(0),
                   _print_inlining_output(NULL),
-                  _preserve_jvm_state(0),
                   _interpreter_frame_size(0) {
   C = this;
 
@@ -783,7 +787,7 @@
       return;
     }
     JVMState* jvms = build_start_state(start(), tf());
-    if ((jvms = cg->generate(jvms, NULL)) == NULL) {
+    if ((jvms = cg->generate(jvms)) == NULL) {
       record_method_not_compilable("method parse failed");
       return;
     }
@@ -980,7 +984,6 @@
     _print_inlining_stream(NULL),
     _print_inlining_idx(0),
     _print_inlining_output(NULL),
-    _preserve_jvm_state(0),
     _allowed_reasons(0),
     _interpreter_frame_size(0) {
   C = this;
@@ -1914,6 +1917,8 @@
     for_igvn()->clear();
     gvn->replace_with(&igvn);
 
+    _late_inlines_pos = _late_inlines.length();
+
     while (_boxing_late_inlines.length() > 0) {
       CallGenerator* cg = _boxing_late_inlines.pop();
       cg->do_late_inline();
@@ -1977,8 +1982,8 @@
     if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
       if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
         // PhaseIdealLoop is expensive so we only try it once we are
-        // out of loop and we only try it again if the previous helped
-        // got the number of nodes down significantly
+        // out of live nodes and we only try it again if the previous
+        // helped got the number of nodes down significantly
         PhaseIdealLoop ideal_loop( igvn, false, true );
         if (failing())  return;
         low_live_nodes = live_nodes();
@@ -2072,6 +2077,10 @@
     // Inline valueOf() methods now.
     inline_boxing_calls(igvn);
 
+    if (AlwaysIncrementalInline) {
+      inline_incrementally(igvn);
+    }
+
     print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);
 
     if (failing())  return;
--- a/hotspot/src/share/vm/opto/compile.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/compile.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -431,9 +431,6 @@
   // Remove the speculative part of types and clean up the graph
   void remove_speculative_types(PhaseIterGVN &igvn);
 
-  // Are we within a PreserveJVMState block?
-  int _preserve_jvm_state;
-
   void* _replay_inline_data; // Pointer to data loaded from file
 
   void print_inlining_init();
@@ -1198,21 +1195,6 @@
 
   // Auxiliary method for randomized fuzzing/stressing
   static bool randomized_select(int count);
-
-  // enter a PreserveJVMState block
-  void inc_preserve_jvm_state() {
-    _preserve_jvm_state++;
-  }
-
-  // exit a PreserveJVMState block
-  void dec_preserve_jvm_state() {
-    _preserve_jvm_state--;
-    assert(_preserve_jvm_state >= 0, "_preserve_jvm_state shouldn't be negative");
-  }
-
-  bool has_preserve_jvm_state() const {
-    return _preserve_jvm_state > 0;
-  }
 };
 
 #endif // SHARE_VM_OPTO_COMPILE_HPP
--- a/hotspot/src/share/vm/opto/doCall.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -525,7 +525,7 @@
   // because exceptions don't return to the call site.)
   profile_call(receiver);
 
-  JVMState* new_jvms = cg->generate(jvms, this);
+  JVMState* new_jvms = cg->generate(jvms);
   if (new_jvms == NULL) {
     // When inlining attempt fails (e.g., too many arguments),
     // it may contaminate the current compile state, making it
@@ -539,7 +539,7 @@
     // intrinsic was expecting to optimize. Should always be possible to
     // get a normal java call that may inline in that case
     cg = C->call_generator(cg->method(), vtable_index, call_does_dispatch, jvms, try_inline, prof_factor(), speculative_receiver_type, /* allow_intrinsics= */ false);
-    new_jvms = cg->generate(jvms, this);
+    new_jvms = cg->generate(jvms);
     if (new_jvms == NULL) {
       guarantee(failing(), "call failed to generate:  calls should work");
       return;
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -432,6 +432,7 @@
       }
     }
   }
+  phi_map->merge_replaced_nodes_with(ex_map);
 }
 
 //--------------------------use_exception_state--------------------------------
@@ -645,7 +646,6 @@
   _map    = kit->map();   // preserve the map
   _sp     = kit->sp();
   kit->set_map(clone_map ? kit->clone_map() : NULL);
-  Compile::current()->inc_preserve_jvm_state();
 #ifdef ASSERT
   _bci    = kit->bci();
   Parse* parser = kit->is_Parse();
@@ -663,7 +663,6 @@
 #endif
   kit->set_map(_map);
   kit->set_sp(_sp);
-  Compile::current()->dec_preserve_jvm_state();
 }
 
 
@@ -1403,60 +1402,17 @@
   // on the map.  This includes locals, stack, and monitors
   // of the current (innermost) JVM state.
 
-  if (!ReplaceInParentMaps) {
-    return;
-  }
-
-  // PreserveJVMState doesn't do a deep copy so we can't modify
-  // parents
-  if (Compile::current()->has_preserve_jvm_state()) {
+  // don't let inconsistent types from profiling escape this
+  // method
+
+  const Type* told = _gvn.type(old);
+  const Type* tnew = _gvn.type(neww);
+
+  if (!tnew->higher_equal(told)) {
     return;
   }
 
-  Parse* parser = is_Parse();
-  bool progress = true;
-  Node* ctrl = map()->in(0);
-  // Follow the chain of parsers and see whether the update can be
-  // done in the map of callers. We can do the replace for a caller if
-  // the current control post dominates the control of a caller.
-  while (parser != NULL && parser->caller() != NULL && progress) {
-    progress = false;
-    Node* parent_map = parser->caller()->map();
-    assert(parser->exits().map()->jvms()->depth() == parser->caller()->depth(), "map mismatch");
-
-    Node* parent_ctrl = parent_map->in(0);
-
-    while (parent_ctrl->is_Region()) {
-      Node* n = parent_ctrl->as_Region()->is_copy();
-      if (n == NULL) {
-        break;
-      }
-      parent_ctrl = n;
-    }
-
-    for (;;) {
-      if (ctrl == parent_ctrl) {
-        // update the map of the exits which is the one that will be
-        // used when compilation resume after inlining
-        parser->exits().map()->replace_edge(old, neww);
-        progress = true;
-        break;
-      }
-      if (ctrl->is_Proj() && ctrl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) {
-        ctrl = ctrl->in(0)->in(0);
-      } else if (ctrl->is_Region()) {
-        Node* n = ctrl->as_Region()->is_copy();
-        if (n == NULL) {
-          break;
-        }
-        ctrl = n;
-      } else {
-        break;
-      }
-    }
-
-    parser = parser->parent_parser();
-  }
+  map()->record_replaced_node(old, neww);
 }
 
 
@@ -1864,12 +1820,16 @@
 
 
 // Replace the call with the current state of the kit.
-void GraphKit::replace_call(CallNode* call, Node* result) {
+void GraphKit::replace_call(CallNode* call, Node* result, bool do_replaced_nodes) {
   JVMState* ejvms = NULL;
   if (has_exceptions()) {
     ejvms = transfer_exceptions_into_jvms();
   }
 
+  ReplacedNodes replaced_nodes = map()->replaced_nodes();
+  ReplacedNodes replaced_nodes_exception;
+  Node* ex_ctl = top();
+
   SafePointNode* final_state = stop();
 
   // Find all the needed outputs of this call
@@ -1886,6 +1846,10 @@
     C->gvn_replace_by(callprojs.fallthrough_catchproj, final_ctl);
   }
   if (callprojs.fallthrough_memproj != NULL) {
+    if (final_mem->is_MergeMem()) {
+      // Parser's exits MergeMem was not transformed but may be optimized
+      final_mem = _gvn.transform(final_mem);
+    }
     C->gvn_replace_by(callprojs.fallthrough_memproj,   final_mem);
   }
   if (callprojs.fallthrough_ioproj != NULL) {
@@ -1917,10 +1881,13 @@
 
     // Load my combined exception state into the kit, with all phis transformed:
     SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states();
+    replaced_nodes_exception = ex_map->replaced_nodes();
 
     Node* ex_oop = ekit.use_exception_state(ex_map);
+
     if (callprojs.catchall_catchproj != NULL) {
       C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control());
+      ex_ctl = ekit.control();
     }
     if (callprojs.catchall_memproj != NULL) {
       C->gvn_replace_by(callprojs.catchall_memproj,   ekit.reset_memory());
@@ -1953,6 +1920,13 @@
       _gvn.transform(wl.pop());
     }
   }
+
+  if (callprojs.fallthrough_catchproj != NULL && !final_ctl->is_top() && do_replaced_nodes) {
+    replaced_nodes.apply(C, final_ctl);
+  }
+  if (!ex_ctl->is_top() && do_replaced_nodes) {
+    replaced_nodes_exception.apply(C, ex_ctl);
+  }
 }
 
 
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -690,7 +690,7 @@
   // Replace the call with the current state of the kit.  Requires
   // that the call was generated with separate io_projs so that
   // exceptional control flow can be handled properly.
-  void replace_call(CallNode* call, Node* result);
+  void replace_call(CallNode* call, Node* result, bool do_replaced_nodes = false);
 
   // helper functions for statistics
   void increment_counter(address counter_addr);   // increment a debug counter
--- a/hotspot/src/share/vm/opto/library_call.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -69,7 +69,7 @@
   virtual bool is_virtual()   const { return _is_virtual; }
   virtual bool is_predicted()   const { return _is_predicted; }
   virtual bool does_virtual_dispatch()   const { return _does_virtual_dispatch; }
-  virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
+  virtual JVMState* generate(JVMState* jvms);
   virtual Node* generate_predicate(JVMState* jvms);
   vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
 };
@@ -591,7 +591,7 @@
   // Nothing to do here.
 }
 
-JVMState* LibraryIntrinsic::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
   LibraryCallKit kit(jvms, this);
   Compile* C = kit.C;
   int nodes = C->unique();
--- a/hotspot/src/share/vm/opto/node.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/node.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -514,6 +514,9 @@
   if (n->is_Call()) {
     n->as_Call()->clone_jvms(C);
   }
+  if (n->is_SafePoint()) {
+    n->as_SafePoint()->clone_replaced_nodes();
+  }
   return n;                     // Return the clone
 }
 
@@ -609,6 +612,9 @@
   if (is_expensive()) {
     compile->remove_expensive_node(this);
   }
+  if (is_SafePoint()) {
+    as_SafePoint()->delete_replaced_nodes();
+  }
 #ifdef ASSERT
   // We will not actually delete the storage, but we'll make the node unusable.
   *(address*)this = badAddress;  // smash the C++ vtbl, probably
--- a/hotspot/src/share/vm/opto/parse.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/parse.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -359,12 +359,13 @@
   int _est_switch_depth;        // Debugging SwitchRanges.
 #endif
 
-  // parser for the caller of the method of this object
-  Parse* const _parent;
+  bool         _first_return;                  // true if return is the first to be parsed
+  bool         _replaced_nodes_for_exceptions; // needs processing of replaced nodes in exception paths?
+  uint         _new_idx;                       // any node with _idx above were new during this parsing. Used to trim the replaced nodes list.
 
  public:
   // Constructor
-  Parse(JVMState* caller, ciMethod* parse_method, float expected_uses, Parse* parent);
+  Parse(JVMState* caller, ciMethod* parse_method, float expected_uses);
 
   virtual Parse* is_Parse() const { return (Parse*)this; }
 
@@ -425,8 +426,6 @@
     return block()->successor_for_bci(bci);
   }
 
-  Parse* parent_parser() const { return _parent; }
-
  private:
   // Create a JVMS & map for the initial state of this method.
   SafePointNode* create_entry_map();
--- a/hotspot/src/share/vm/opto/parse1.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -383,8 +383,8 @@
 
 //------------------------------Parse------------------------------------------
 // Main parser constructor.
-Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses, Parse* parent)
-  : _exits(caller), _parent(parent)
+Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
+  : _exits(caller)
 {
   // Init some variables
   _caller = caller;
@@ -399,6 +399,9 @@
   _entry_bci = InvocationEntryBci;
   _tf = NULL;
   _block = NULL;
+  _first_return = true;
+  _replaced_nodes_for_exceptions = false;
+  _new_idx = C->unique();
   debug_only(_block_count = -1);
   debug_only(_blocks = (Block*)-1);
 #ifndef PRODUCT
@@ -901,6 +904,10 @@
   for (uint i = 0; i < TypeFunc::Parms; i++) {
     caller.map()->set_req(i, ex_map->in(i));
   }
+  if (ex_map->has_replaced_nodes()) {
+    _replaced_nodes_for_exceptions = true;
+  }
+  caller.map()->transfer_replaced_nodes_from(ex_map, _new_idx);
   // ...and the exception:
   Node*          ex_oop        = saved_ex_oop(ex_map);
   SafePointNode* caller_ex_map = caller.make_exception_state(ex_oop);
@@ -991,7 +998,7 @@
   bool do_synch = method()->is_synchronized() && GenerateSynchronizationCode;
 
   // record exit from a method if compiled while Dtrace is turned on.
-  if (do_synch || C->env()->dtrace_method_probes()) {
+  if (do_synch || C->env()->dtrace_method_probes() || _replaced_nodes_for_exceptions) {
     // First move the exception list out of _exits:
     GraphKit kit(_exits.transfer_exceptions_into_jvms());
     SafePointNode* normal_map = kit.map();  // keep this guy safe
@@ -1016,6 +1023,9 @@
       if (C->env()->dtrace_method_probes()) {
         kit.make_dtrace_method_exit(method());
       }
+      if (_replaced_nodes_for_exceptions) {
+        kit.map()->apply_replaced_nodes();
+      }
       // Done with exception-path processing.
       ex_map = kit.make_exception_state(ex_oop);
       assert(ex_jvms->same_calls_as(ex_map->jvms()), "sanity");
@@ -1035,6 +1045,7 @@
       _exits.add_exception_state(ex_map);
     }
   }
+  _exits.map()->apply_replaced_nodes();
 }
 
 //-----------------------------create_entry_map-------------------------------
@@ -1049,6 +1060,9 @@
     return NULL;
   }
 
+  // clear current replaced nodes that are of no use from here on (map was cloned in build_exits).
+  _caller->map()->delete_replaced_nodes();
+
   // If this is an inlined method, we may have to do a receiver null check.
   if (_caller->has_method() && is_normal_parse() && !method()->is_static()) {
     GraphKit kit(_caller);
@@ -1072,6 +1086,8 @@
 
   SafePointNode* inmap = _caller->map();
   assert(inmap != NULL, "must have inmap");
+  // In case of null check on receiver above
+  map()->transfer_replaced_nodes_from(inmap, _new_idx);
 
   uint i;
 
@@ -1701,6 +1717,8 @@
       set_control(r->nonnull_req());
     }
 
+    map()->merge_replaced_nodes_with(newin);
+
     // newin has been subsumed into the lazy merge, and is now dead.
     set_block(save_block);
 
@@ -2130,6 +2148,13 @@
     phi->add_req(value);
   }
 
+  if (_first_return) {
+    _exits.map()->transfer_replaced_nodes_from(map(), _new_idx);
+    _first_return = false;
+  } else {
+    _exits.map()->merge_replaced_nodes_with(map());
+  }
+
   stop_and_kill_map();          // This CFG path dies here
 }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/opto/replacednodes.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/cfgnode.hpp"
+#include "opto/phaseX.hpp"
+#include "opto/replacednodes.hpp"
+
+void ReplacedNodes::allocate_if_necessary() {
+  if (_replaced_nodes == NULL) {
+    _replaced_nodes = new GrowableArray<ReplacedNode>();
+  }
+}
+
+bool ReplacedNodes::is_empty() const {
+  return _replaced_nodes == NULL || _replaced_nodes->length() == 0;
+}
+
+bool ReplacedNodes::has_node(const ReplacedNode& r) const {
+  return _replaced_nodes->find(r) != -1;
+}
+
+bool ReplacedNodes::has_target_node(Node* n) const {
+  for (int i = 0; i < _replaced_nodes->length(); i++) {
+    if (_replaced_nodes->at(i).improved() == n) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Record replaced node if not seen before
+void ReplacedNodes::record(Node* initial, Node* improved) {
+  allocate_if_necessary();
+  ReplacedNode r(initial, improved);
+  if (!has_node(r)) {
+    _replaced_nodes->push(r);
+  }
+}
+
+// Copy replaced nodes from one map to another. idx is used to
+// identify nodes that are too new to be of interest in the target
+// node list.
+void ReplacedNodes::transfer_from(const ReplacedNodes& other, uint idx) {
+  if (other.is_empty()) {
+    return;
+  }
+  allocate_if_necessary();
+  for (int i = 0; i < other._replaced_nodes->length(); i++) {
+    ReplacedNode replaced = other._replaced_nodes->at(i);
+    // Only transfer the nodes that can actually be useful
+    if (!has_node(replaced) && (replaced.initial()->_idx < idx || has_target_node(replaced.initial()))) {
+      _replaced_nodes->push(replaced);
+    }
+  }
+}
+
+void ReplacedNodes::clone() {
+  if (_replaced_nodes != NULL) {
+    GrowableArray<ReplacedNode>* replaced_nodes_clone = new GrowableArray<ReplacedNode>();
+    replaced_nodes_clone->appendAll(_replaced_nodes);
+    _replaced_nodes = replaced_nodes_clone;
+  }
+}
+
+void ReplacedNodes::reset() {
+  if (_replaced_nodes != NULL) {
+    _replaced_nodes->clear();
+  }
+}
+
+// Perfom node replacement (used when returning to caller)
+void ReplacedNodes::apply(Node* n) {
+  if (is_empty()) {
+    return;
+  }
+  for (int i = 0; i < _replaced_nodes->length(); i++) {
+    ReplacedNode replaced = _replaced_nodes->at(i);
+    n->replace_edge(replaced.initial(), replaced.improved());
+  }
+}
+
+static void enqueue_use(Node* n, Node* use, Unique_Node_List& work) {
+  if (use->is_Phi()) {
+    Node* r = use->in(0);
+    assert(r->is_Region(), "Phi should have Region");
+    for (uint i = 1; i < use->req(); i++) {
+      if (use->in(i) == n) {
+        work.push(r->in(i));
+      }
+    }
+  } else {
+    work.push(use);
+  }
+}
+
+// Perfom node replacement following late inlining
+void ReplacedNodes::apply(Compile* C, Node* ctl) {
+  // ctl is the control on exit of the method that was late inlined
+  if (is_empty()) {
+    return;
+  }
+  for (int i = 0; i < _replaced_nodes->length(); i++) {
+    ReplacedNode replaced = _replaced_nodes->at(i);
+    Node* initial = replaced.initial();
+    Node* improved = replaced.improved();
+    assert (ctl != NULL && !ctl->is_top(), "replaced node should have actual control");
+
+    ResourceMark rm;
+    Unique_Node_List work;
+    // Go over all the uses of the node that is considered for replacement...
+    for (DUIterator j = initial->outs(); initial->has_out(j); j++) {
+      Node* use = initial->out(j);
+
+      if (use == improved || use->outcnt() == 0) {
+        continue;
+      }
+      work.clear();
+      enqueue_use(initial, use, work);
+      bool replace = true;
+      // Check that this use is dominated by ctl. Go ahead with the
+      // replacement if it is.
+      while (work.size() != 0 && replace) {
+        Node* n = work.pop();
+        if (use->outcnt() == 0) {
+          continue;
+        }
+        if (n->is_CFG() || (n->in(0) != NULL && !n->in(0)->is_top())) {
+          int depth = 0;
+          Node *m = n;
+          if (!n->is_CFG()) {
+            n = n->in(0);
+          }
+          assert(n->is_CFG(), "should be CFG now");
+          while(n != ctl) {
+            n = IfNode::up_one_dom(n);
+            depth++;
+            // limit search depth
+            if (depth >= 100 || n == NULL) {
+              replace = false;
+              break;
+            }
+          }
+        } else {
+          for (DUIterator k = n->outs(); n->has_out(k); k++) {
+            enqueue_use(n, n->out(k), work);
+          }
+        }
+      }
+      if (replace) {
+        bool is_in_table = C->initial_gvn()->hash_delete(use);
+        int replaced = use->replace_edge(initial, improved);
+        if (is_in_table) {
+          C->initial_gvn()->hash_find_insert(use);
+        }
+        C->record_for_igvn(use);
+
+        assert(replaced > 0, "inconsistent");
+        --j;
+      }
+    }
+  }
+}
+
+void ReplacedNodes::dump(outputStream *st) const {
+  if (!is_empty()) {
+    tty->print("replaced nodes: ");
+    for (int i = 0; i < _replaced_nodes->length(); i++) {
+      tty->print("%d->%d", _replaced_nodes->at(i).initial()->_idx, _replaced_nodes->at(i).improved()->_idx);
+      if (i < _replaced_nodes->length()-1) {
+        tty->print(",");
+      }
+    }
+  }
+}
+
+// Merge 2 list of replaced node at a point where control flow paths merge
+void ReplacedNodes::merge_with(const ReplacedNodes& other) {
+  if (is_empty()) {
+    return;
+  }
+  if (other.is_empty()) {
+    reset();
+    return;
+  }
+  int shift = 0;
+  int len = _replaced_nodes->length();
+  for (int i = 0; i < len; i++) {
+    if (!other.has_node(_replaced_nodes->at(i))) {
+      shift++;
+    } else if (shift > 0) {
+      _replaced_nodes->at_put(i-shift, _replaced_nodes->at(i));
+    }
+  }
+  if (shift > 0) {
+    _replaced_nodes->trunc_to(len - shift);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/opto/replacednodes.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_OPTO_REPLACEDNODES_HPP
+#define SHARE_VM_OPTO_REPLACEDNODES_HPP
+
+#include "opto/connode.hpp"
+
+// During parsing, when a node is "improved",
+// GraphKit::replace_in_map() is called to update the current map so
+// that the improved node is used from that point
+// on. GraphKit::replace_in_map() doesn't operate on the callers maps
+// and so some optimization opportunities may be lost. The
+// ReplacedNodes class addresses that problem.
+//
+// A ReplacedNodes object is a list of pair of nodes. Every
+// SafePointNode carries a ReplacedNodes object. Every time
+// GraphKit::replace_in_map() is called, a new pair of nodes is pushed
+// on the list of replaced nodes. When control flow paths merge, their
+// replaced nodes are also merged. When parsing exits a method to
+// return to a caller, the replaced nodes on the exit path are used to
+// update the caller's map.
+class ReplacedNodes VALUE_OBJ_CLASS_SPEC {
+ private:
+  class ReplacedNode VALUE_OBJ_CLASS_SPEC {
+  private:
+    Node* _initial;
+    Node* _improved;
+  public:
+    ReplacedNode() : _initial(NULL), _improved(NULL) {}
+    ReplacedNode(Node* initial, Node* improved) : _initial(initial), _improved(improved) {}
+    Node* initial() const  { return _initial; }
+    Node* improved() const { return _improved; }
+
+    bool operator==(const ReplacedNode& other) {
+      return _initial == other._initial && _improved == other._improved;
+    }
+  };
+  GrowableArray<ReplacedNode>* _replaced_nodes;
+
+  void allocate_if_necessary();
+  bool has_node(const ReplacedNode& r) const;
+  bool has_target_node(Node* n) const;
+
+ public:
+  ReplacedNodes()
+    : _replaced_nodes(NULL) {}
+
+  void clone();
+  void record(Node* initial, Node* improved);
+  void transfer_from(const ReplacedNodes& other, uint idx);
+  void reset();
+  void apply(Node* n);
+  void merge_with(const ReplacedNodes& other);
+  bool is_empty() const;
+  void dump(outputStream *st) const;
+  void apply(Compile* C, Node* ctl);
+};
+
+#endif // SHARE_VM_OPTO_REPLACEDNODES_HPP
--- a/hotspot/src/share/vm/runtime/arguments.cpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/runtime/arguments.cpp	Tue Jun 10 13:37:16 2014 +0200
@@ -3834,10 +3834,6 @@
     // nothing to use the profiling, turn if off
     FLAG_SET_DEFAULT(TypeProfileLevel, 0);
   }
-  if (UseTypeSpeculation && FLAG_IS_DEFAULT(ReplaceInParentMaps)) {
-    // Doing the replace in parent maps helps speculation
-    FLAG_SET_DEFAULT(ReplaceInParentMaps, true);
-  }
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
--- a/hotspot/src/share/vm/utilities/growableArray.hpp	Tue Jun 10 10:00:59 2014 +0000
+++ b/hotspot/src/share/vm/utilities/growableArray.hpp	Tue Jun 10 13:37:16 2014 +0200
@@ -349,6 +349,7 @@
 
   // inserts the given element before the element at index i
   void insert_before(const int idx, const E& elem) {
+    assert(0 <= idx && idx <= _len, "illegal index");
     check_nesting();
     if (_len == _max) grow(_len);
     for (int j = _len - 1; j >= idx; j--) {
@@ -360,7 +361,7 @@
 
   void appendAll(const GrowableArray<E>* l) {
     for (int i = 0; i < l->_len; i++) {
-      raw_at_put_grow(_len, l->_data[i], 0);
+      raw_at_put_grow(_len, l->_data[i], E());
     }
   }