6912064: type profiles need to be exploited more for dynamic language support
authorjrose
Fri, 20 Aug 2010 23:40:30 -0700
changeset 6416 d49b65c9425a
parent 6272 94a20ad0e9de
child 6417 82e8f98f22d8
6912064: type profiles need to be exploited more for dynamic language support Reviewed-by: kvn
hotspot/src/share/vm/includeDB_compiler2
hotspot/src/share/vm/opto/graphKit.cpp
hotspot/src/share/vm/opto/graphKit.hpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/parse.hpp
hotspot/src/share/vm/opto/parse2.cpp
hotspot/src/share/vm/opto/parseHelper.cpp
hotspot/src/share/vm/runtime/globals.hpp
--- a/hotspot/src/share/vm/includeDB_compiler2	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/includeDB_compiler2	Fri Aug 20 23:40:30 2010 -0700
@@ -504,6 +504,7 @@
 graphKit.hpp                            callnode.hpp
 graphKit.hpp                            cfgnode.hpp
 graphKit.hpp                            ciEnv.hpp
+graphKit.hpp                            ciMethodData.hpp
 graphKit.hpp                            divnode.hpp
 graphKit.hpp                            compile.hpp
 graphKit.hpp                            deoptimization.hpp
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Fri Aug 20 23:40:30 2010 -0700
@@ -2451,11 +2451,79 @@
 }
 
 
+//------------------------------seems_never_null-------------------------------
+// Use null_seen information if it is available from the profile.
+// If we see an unexpected null at a type check we record it and force a
+// recompile; the offending check will be recompiled to handle NULLs.
+// If we see several offending BCIs, then all checks in the
+// method will be recompiled.
+bool GraphKit::seems_never_null(Node* obj, ciProfileData* data) {
+  if (UncommonNullCast               // Cutout for this technique
+      && obj != null()               // And not the -Xcomp stupid case?
+      && !too_many_traps(Deoptimization::Reason_null_check)
+      ) {
+    if (data == NULL)
+      // Edge case:  no mature data.  Be optimistic here.
+      return true;
+    // If the profile has not seen a null, assume it won't happen.
+    assert(java_bc() == Bytecodes::_checkcast ||
+           java_bc() == Bytecodes::_instanceof ||
+           java_bc() == Bytecodes::_aastore, "MDO must collect null_seen bit here");
+    return !data->as_BitData()->null_seen();
+  }
+  return false;
+}
+
+//------------------------maybe_cast_profiled_receiver-------------------------
+// If the profile has seen exactly one type, narrow to exactly that type.
+// Subsequent type checks will always fold up.
+Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj,
+                                             ciProfileData* data,
+                                             ciKlass* require_klass) {
+  if (!UseTypeProfile || !TypeProfileCasts) return NULL;
+  if (data == NULL)  return NULL;
+
+  // Make sure we haven't already deoptimized from this tactic.
+  if (too_many_traps(Deoptimization::Reason_class_check))
+    return NULL;
+
+  // (No, this isn't a call, but it's enough like a virtual call
+  // to use the same ciMethod accessor to get the profile info...)
+  ciCallProfile profile = method()->call_profile_at_bci(bci());
+  if (profile.count() >= 0 &&         // no cast failures here
+      profile.has_receiver(0) &&
+      profile.morphism() == 1) {
+    ciKlass* exact_kls = profile.receiver(0);
+    if (require_klass == NULL ||
+        static_subtype_check(require_klass, exact_kls) == SSC_always_true) {
+      // If we narrow the type to match what the type profile sees,
+      // we can then remove the rest of the cast.
+      // This is a win, even if the exact_kls is very specific,
+      // because downstream operations, such as method calls,
+      // will often benefit from the sharper type.
+      Node* exact_obj = not_null_obj; // will get updated in place...
+      Node* slow_ctl  = type_check_receiver(exact_obj, exact_kls, 1.0,
+                                            &exact_obj);
+      { PreserveJVMState pjvms(this);
+        set_control(slow_ctl);
+        uncommon_trap(Deoptimization::Reason_class_check,
+                      Deoptimization::Action_maybe_recompile);
+      }
+      replace_in_map(not_null_obj, exact_obj);
+      return exact_obj;
+    }
+    // assert(ssc == SSC_always_true)... except maybe the profile lied to us.
+  }
+
+  return NULL;
+}
+
+
 //-------------------------------gen_instanceof--------------------------------
 // Generate an instance-of idiom.  Used by both the instance-of bytecode
 // and the reflective instance-of call.
-Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) {
-  C->set_has_split_ifs(true); // Has chance for split-if optimization
+Node* GraphKit::gen_instanceof(Node* obj, Node* superklass) {
+  kill_dead_locals();           // Benefit all the uncommon traps
   assert( !stopped(), "dead parse path should be checked in callers" );
   assert(!TypePtr::NULL_PTR->higher_equal(_gvn.type(superklass)->is_klassptr()),
          "must check for not-null not-dead klass in callers");
@@ -2466,9 +2534,16 @@
   Node*       phi    = new(C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
   C->set_has_split_ifs(true); // Has chance for split-if optimization
 
+  ciProfileData* data = NULL;
+  if (java_bc() == Bytecodes::_instanceof) {  // Only for the bytecode
+    data = method()->method_data()->bci_to_data(bci());
+  }
+  bool never_see_null = (ProfileDynamicTypes  // aggressive use of profile
+                         && seems_never_null(obj, data));
+
   // Null check; get casted pointer; set region slot 3
   Node* null_ctl = top();
-  Node* not_null_obj = null_check_oop(subobj, &null_ctl);
+  Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null);
 
   // If not_null_obj is dead, only null-path is taken
   if (stopped()) {              // Doing instance-of on a NULL?
@@ -2477,6 +2552,23 @@
   }
   region->init_req(_null_path, null_ctl);
   phi   ->init_req(_null_path, intcon(0)); // Set null path value
+  if (null_ctl == top()) {
+    // Do this eagerly, so that pattern matches like is_diamond_phi
+    // will work even during parsing.
+    assert(_null_path == PATH_LIMIT-1, "delete last");
+    region->del_req(_null_path);
+    phi   ->del_req(_null_path);
+  }
+
+  if (ProfileDynamicTypes && data != NULL) {
+    Node* cast_obj = maybe_cast_profiled_receiver(not_null_obj, data, NULL);
+    if (stopped()) {            // Profile disagrees with this path.
+      set_control(null_ctl);    // Null is the only remaining possibility.
+      return intcon(0);
+    }
+    if (cast_obj != NULL)
+      not_null_obj = cast_obj;
+  }
 
   // Load the object's klass
   Node* obj_klass = load_object_klass(not_null_obj);
@@ -2546,20 +2638,8 @@
   C->set_has_split_ifs(true); // Has chance for split-if optimization
 
   // Use null-cast information if it is available
-  bool never_see_null = false;
-  // If we see an unexpected null at a check-cast we record it and force a
-  // recompile; the offending check-cast will be compiled to handle NULLs.
-  // If we see several offending BCIs, then all checkcasts in the
-  // method will be compiled to handle NULLs.
-  if (UncommonNullCast            // Cutout for this technique
-      && failure_control == NULL  // regular case
-      && obj != null()            // And not the -Xcomp stupid case?
-      && !too_many_traps(Deoptimization::Reason_null_check)) {
-    // Finally, check the "null_seen" bit from the interpreter.
-    if (data == NULL || !data->as_BitData()->null_seen()) {
-      never_see_null = true;
-    }
-  }
+  bool never_see_null = ((failure_control == NULL)  // regular case only
+                         && seems_never_null(obj, data));
 
   // Null check; get casted pointer; set region slot 3
   Node* null_ctl = top();
@@ -2572,47 +2652,26 @@
   }
   region->init_req(_null_path, null_ctl);
   phi   ->init_req(_null_path, null());  // Set null path value
-
-  Node* cast_obj = NULL;        // the casted version of the object
-
-  // If the profile has seen exactly one type, narrow to that type.
-  // (The subsequent subtype check will always fold up.)
-  if (UseTypeProfile && TypeProfileCasts && data != NULL &&
+  if (null_ctl == top()) {
+    // Do this eagerly, so that pattern matches like is_diamond_phi
+    // will work even during parsing.
+    assert(_null_path == PATH_LIMIT-1, "delete last");
+    region->del_req(_null_path);
+    phi   ->del_req(_null_path);
+  }
+
+  Node* cast_obj = NULL;
+  if (data != NULL &&
       // Counter has never been decremented (due to cast failure).
       // ...This is a reasonable thing to expect.  It is true of
       // all casts inserted by javac to implement generic types.
-      data->as_CounterData()->count() >= 0 &&
-      !too_many_traps(Deoptimization::Reason_class_check)) {
-    // (No, this isn't a call, but it's enough like a virtual call
-    // to use the same ciMethod accessor to get the profile info...)
-    ciCallProfile profile = method()->call_profile_at_bci(bci());
-    if (profile.count() >= 0 &&         // no cast failures here
-        profile.has_receiver(0) &&
-        profile.morphism() == 1) {
-      ciKlass* exact_kls = profile.receiver(0);
-      int ssc = static_subtype_check(tk->klass(), exact_kls);
-      if (ssc == SSC_always_true) {
-        // If we narrow the type to match what the type profile sees,
-        // we can then remove the rest of the cast.
-        // This is a win, even if the exact_kls is very specific,
-        // because downstream operations, such as method calls,
-        // will often benefit from the sharper type.
-        Node* exact_obj = not_null_obj; // will get updated in place...
-        Node* slow_ctl  = type_check_receiver(exact_obj, exact_kls, 1.0,
-                                              &exact_obj);
-        { PreserveJVMState pjvms(this);
-          set_control(slow_ctl);
-          uncommon_trap(Deoptimization::Reason_class_check,
-                        Deoptimization::Action_maybe_recompile);
-        }
-        if (failure_control != NULL) // failure is now impossible
-          (*failure_control) = top();
-        replace_in_map(not_null_obj, exact_obj);
-        // adjust the type of the phi to the exact klass:
-        phi->raise_bottom_type(_gvn.type(exact_obj)->meet(TypePtr::NULL_PTR));
-        cast_obj = exact_obj;
-      }
-      // assert(cast_obj != NULL)... except maybe the profile lied to us.
+      data->as_CounterData()->count() >= 0) {
+    cast_obj = maybe_cast_profiled_receiver(not_null_obj, data, tk->klass());
+    if (cast_obj != NULL) {
+      if (failure_control != NULL) // failure is now impossible
+        (*failure_control) = top();
+      // adjust the type of the phi to the exact klass:
+      phi->raise_bottom_type(_gvn.type(cast_obj)->meet(TypePtr::NULL_PTR));
     }
   }
 
--- a/hotspot/src/share/vm/opto/graphKit.hpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.hpp	Fri Aug 20 23:40:30 2010 -0700
@@ -341,6 +341,14 @@
   Node* null_check_oop(Node* value, Node* *null_control,
                        bool never_see_null = false);
 
+  // Check the null_seen bit.
+  bool seems_never_null(Node* obj, ciProfileData* data);
+
+  // Use the type profile to narrow an object type.
+  Node* maybe_cast_profiled_receiver(Node* not_null_obj,
+                                     ciProfileData* data,
+                                     ciKlass* require_klass);
+
   // Cast obj to not-null on this path
   Node* cast_not_null(Node* obj, bool do_replace_in_map = true);
   // Replace all occurrences of one node by another.
--- a/hotspot/src/share/vm/opto/library_call.cpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Fri Aug 20 23:40:30 2010 -0700
@@ -906,7 +906,8 @@
   const int count_offset = java_lang_String::count_offset_in_bytes();
   const int offset_offset = java_lang_String::offset_offset_in_bytes();
 
-  _sp += 2;
+  int nargs = 2;
+  _sp += nargs;
   Node* argument = pop();  // pop non-receiver first:  it was pushed second
   Node* receiver = pop();
 
@@ -914,11 +915,11 @@
   // null check technically happens in the wrong place, which can lead to
   // invalid stack traces when string compare is inlined into a method
   // which handles NullPointerExceptions.
-  _sp += 2;
+  _sp += nargs;
   receiver = do_null_check(receiver, T_OBJECT);
   //should not do null check for argument for String.equals(), because spec
   //allows to specify NULL as argument.
-  _sp -= 2;
+  _sp -= nargs;
 
   if (stopped()) {
     return true;
@@ -943,7 +944,9 @@
   ciInstanceKlass* klass = env()->String_klass();
 
   if (!stopped()) {
+    _sp += nargs;          // gen_instanceof might do an uncommon trap
     Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
+    _sp -= nargs;
     Node* cmp  = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1)));
     Node* bol  = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::ne));
 
@@ -2935,7 +2938,9 @@
   switch (id) {
   case vmIntrinsics::_isInstance:
     // nothing is an instance of a primitive type
+    _sp += nargs;          // gen_instanceof might do an uncommon trap
     query_value = gen_instanceof(obj, kls);
+    _sp -= nargs;
     break;
 
   case vmIntrinsics::_getModifiers:
--- a/hotspot/src/share/vm/opto/parse.hpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/opto/parse.hpp	Fri Aug 20 23:40:30 2010 -0700
@@ -494,6 +494,7 @@
   float   dynamic_branch_prediction(float &cnt);
   float   branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
   bool    seems_never_taken(float prob);
+  bool    seems_stable_comparison(BoolTest::mask btest, Node* c);
 
   void    do_ifnull(BoolTest::mask btest, Node* c);
   void    do_if(BoolTest::mask btest, Node* c);
--- a/hotspot/src/share/vm/opto/parse2.cpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/opto/parse2.cpp	Fri Aug 20 23:40:30 2010 -0700
@@ -892,6 +892,62 @@
   return prob < PROB_MIN;
 }
 
+// True if the comparison seems to be the kind that will not change its
+// statistics from true to false.  See comments in adjust_map_after_if.
+// This question is only asked along paths which are already
+// classifed as untaken (by seems_never_taken), so really,
+// if a path is never taken, its controlling comparison is
+// already acting in a stable fashion.  If the comparison
+// seems stable, we will put an expensive uncommon trap
+// on the untaken path.  To be conservative, and to allow
+// partially executed counted loops to be compiled fully,
+// we will plant uncommon traps only after pointer comparisons.
+bool Parse::seems_stable_comparison(BoolTest::mask btest, Node* cmp) {
+  for (int depth = 4; depth > 0; depth--) {
+    // The following switch can find CmpP here over half the time for
+    // dynamic language code rich with type tests.
+    // Code using counted loops or array manipulations (typical
+    // of benchmarks) will have many (>80%) CmpI instructions.
+    switch (cmp->Opcode()) {
+    case Op_CmpP:
+      // A never-taken null check looks like CmpP/BoolTest::eq.
+      // These certainly should be closed off as uncommon traps.
+      if (btest == BoolTest::eq)
+        return true;
+      // A never-failed type check looks like CmpP/BoolTest::ne.
+      // Let's put traps on those, too, so that we don't have to compile
+      // unused paths with indeterminate dynamic type information.
+      if (ProfileDynamicTypes)
+        return true;
+      return false;
+
+    case Op_CmpI:
+      // A small minority (< 10%) of CmpP are masked as CmpI,
+      // as if by boolean conversion ((p == q? 1: 0) != 0).
+      // Detect that here, even if it hasn't optimized away yet.
+      // Specifically, this covers the 'instanceof' operator.
+      if (btest == BoolTest::ne || btest == BoolTest::eq) {
+        if (_gvn.type(cmp->in(2))->singleton() &&
+            cmp->in(1)->is_Phi()) {
+          PhiNode* phi = cmp->in(1)->as_Phi();
+          int true_path = phi->is_diamond_phi();
+          if (true_path > 0 &&
+              _gvn.type(phi->in(1))->singleton() &&
+              _gvn.type(phi->in(2))->singleton()) {
+            // phi->region->if_proj->ifnode->bool->cmp
+            BoolNode* bol = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
+            btest = bol->_test._test;
+            cmp = bol->in(1);
+            continue;
+          }
+        }
+      }
+      return false;
+    }
+  }
+  return false;
+}
+
 //-------------------------------repush_if_args--------------------------------
 // Push arguments of an "if" bytecode back onto the stack by adjusting _sp.
 inline int Parse::repush_if_args() {
@@ -1137,19 +1193,22 @@
 
   bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
 
-  int cop = c->Opcode();
-  if (seems_never_taken(prob) && cop == Op_CmpP && btest == BoolTest::eq) {
-    // (An earlier version of do_if omitted '&& btest == BoolTest::eq'.)
-    //
+  if (seems_never_taken(prob) && seems_stable_comparison(btest, c)) {
     // If this might possibly turn into an implicit null check,
     // and the null has never yet been seen, we need to generate
     // an uncommon trap, so as to recompile instead of suffering
     // with very slow branches.  (We'll get the slow branches if
     // the program ever changes phase and starts seeing nulls here.)
     //
-    // The tests we worry about are of the form (p == null).
-    // We do not simply inspect for a null constant, since a node may
+    // We do not inspect for a null constant, since a node may
     // optimize to 'null' later on.
+    //
+    // Null checks, and other tests which expect inequality,
+    // show btest == BoolTest::eq along the non-taken branch.
+    // On the other hand, type tests, must-be-null tests,
+    // and other tests which expect pointer equality,
+    // show btest == BoolTest::ne along the non-taken branch.
+    // We prune both types of branches if they look unused.
     repush_if_args();
     // We need to mark this branch as taken so that if we recompile we will
     // see that it is possible. In the tiered system the interpreter doesn't
--- a/hotspot/src/share/vm/opto/parseHelper.cpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/opto/parseHelper.cpp	Fri Aug 20 23:40:30 2010 -0700
@@ -119,7 +119,11 @@
   }
 
   // Push the bool result back on stack
-  push( gen_instanceof( pop(), makecon(TypeKlassPtr::make(klass)) ) );
+  Node* res = gen_instanceof(peek(), makecon(TypeKlassPtr::make(klass)));
+
+  // Pop from stack AFTER gen_instanceof because it can uncommon trap.
+  pop();
+  push(res);
 }
 
 //------------------------------array_store_check------------------------------
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu Aug 19 14:51:47 2010 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Fri Aug 20 23:40:30 2010 -0700
@@ -2476,6 +2476,9 @@
   develop(bool, MonomorphicArrayCheck, true,                                \
           "Uncommon-trap array store checks that require full type check")  \
                                                                             \
+  diagnostic(bool, ProfileDynamicTypes, true,                               \
+          "do extra type profiling and use it more aggressively")           \
+                                                                            \
   develop(bool, DelayCompilationDuringStartup, true,                        \
           "Delay invoking the compiler until main application class is "    \
           "loaded")                                                         \