8154473: Update for CompilerDirectives to control stub generation and intrinsics
authorvdeshpande
Fri, 20 May 2016 14:17:55 -0700
changeset 38699 f8bec5f6b09c
parent 38698 2f5f325d4e6d
child 38700 6aa6444c1b48
8154473: Update for CompilerDirectives to control stub generation and intrinsics Summary: Use -XX:DisableIntrinsic flag to control intrinsics usage in Interpreter. Reviewed-by: twisti, neliasso, kvn, simonis
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp
hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp
hotspot/src/share/vm/classfile/vmSymbols.cpp
hotspot/src/share/vm/classfile/vmSymbols.hpp
hotspot/src/share/vm/compiler/compilerDirectives.hpp
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May 20 14:17:55 2016 -0700
@@ -2584,6 +2584,11 @@
 
 }
 
+// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter.
+void MacroAssembler::call_VM_leaf0(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 0);
+}
+
 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
   call_VM_leaf_base(entry_point, number_of_arguments);
 }
@@ -5629,235 +5634,6 @@
 #endif
 }
 
-void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
-  pusha();
-
-  // if we are coming from c1, xmm registers may be live
-  int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
-  if (UseAVX > 2) {
-    num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
-  }
-
-  if (UseSSE == 1)  {
-    subptr(rsp, sizeof(jdouble)*8);
-    for (int n = 0; n < 8; n++) {
-      movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
-    }
-  } else if (UseSSE >= 2)  {
-    if (UseAVX > 2) {
-      push(rbx);
-      movl(rbx, 0xffff);
-      kmovwl(k1, rbx);
-      pop(rbx);
-    }
-#ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      if(UseAVX > 2) {
-        // Save upper half of ZMM registers
-        subptr(rsp, 32*num_xmm_regs);
-        for (int n = 0; n < num_xmm_regs; n++) {
-          vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
-        }
-      }
-      assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
-      // Save upper half of YMM registers
-      subptr(rsp, 16*num_xmm_regs);
-      for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
-      }
-    }
-#endif
-    // Save whole 128bit (16 bytes) XMM registers
-    subptr(rsp, 16*num_xmm_regs);
-#ifdef _LP64
-    if (VM_Version::supports_evex()) {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
-      }
-    } else {
-      for (int n = 0; n < num_xmm_regs; n++) {
-        movdqu(Address(rsp, n*16), as_XMMRegister(n));
-      }
-    }
-#else
-    for (int n = 0; n < num_xmm_regs; n++) {
-      movdqu(Address(rsp, n*16), as_XMMRegister(n));
-    }
-#endif
-  }
-
-  // Preserve registers across runtime call
-  int incoming_argument_and_return_value_offset = -1;
-  if (num_fpu_regs_in_use > 1) {
-    // Must preserve all other FPU regs (could alternatively convert
-    // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
-    // FPU state, but can not trust C compiler)
-    NEEDS_CLEANUP;
-    // NOTE that in this case we also push the incoming argument(s) to
-    // the stack and restore it later; we also use this stack slot to
-    // hold the return value from dsin, dcos etc.
-    for (int i = 0; i < num_fpu_regs_in_use; i++) {
-      subptr(rsp, sizeof(jdouble));
-      fstp_d(Address(rsp, 0));
-    }
-    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
-    for (int i = nb_args-1; i >= 0; i--) {
-      fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
-    }
-  }
-
-  subptr(rsp, nb_args*sizeof(jdouble));
-  for (int i = 0; i < nb_args; i++) {
-    fstp_d(Address(rsp, i*sizeof(jdouble)));
-  }
-
-#ifdef _LP64
-  if (nb_args > 0) {
-    movdbl(xmm0, Address(rsp, 0));
-  }
-  if (nb_args > 1) {
-    movdbl(xmm1, Address(rsp, sizeof(jdouble)));
-  }
-  assert(nb_args <= 2, "unsupported number of args");
-#endif // _LP64
-
-  // NOTE: we must not use call_VM_leaf here because that requires a
-  // complete interpreter frame in debug mode -- same bug as 4387334
-  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
-  // do proper 64bit abi
-
-  NEEDS_CLEANUP;
-  // Need to add stack banging before this runtime call if it needs to
-  // be taken; however, there is no generic stack banging routine at
-  // the MacroAssembler level
-
-  MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
-
-#ifdef _LP64
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-#endif // _LP64
-  addptr(rsp, sizeof(jdouble)*nb_args);
-  if (num_fpu_regs_in_use > 1) {
-    // Must save return value to stack and then restore entire FPU
-    // stack except incoming arguments
-    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
-    for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
-      fld_d(Address(rsp, 0));
-      addptr(rsp, sizeof(jdouble));
-    }
-    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
-    addptr(rsp, sizeof(jdouble)*nb_args);
-  }
-
-  if (UseSSE == 1)  {
-    for (int n = 0; n < 8; n++) {
-      movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
-    }
-    addptr(rsp, sizeof(jdouble)*8);
-  } else if (UseSSE >= 2)  {
-    // Restore whole 128bit (16 bytes) XMM registers
-#ifdef _LP64
-  if (VM_Version::supports_evex()) {
-    for (int n = 0; n < num_xmm_regs; n++) {
-      vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
-    }
-  } else {
-    for (int n = 0; n < num_xmm_regs; n++) {
-      movdqu(as_XMMRegister(n), Address(rsp, n*16));
-    }
-  }
-#else
-  for (int n = 0; n < num_xmm_regs; n++) {
-    movdqu(as_XMMRegister(n), Address(rsp, n*16));
-  }
-#endif
-    addptr(rsp, 16*num_xmm_regs);
-
-#ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Restore upper half of YMM registers.
-      for (int n = 0; n < num_xmm_regs; n++) {
-        vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
-      }
-      addptr(rsp, 16*num_xmm_regs);
-      if(UseAVX > 2) {
-        for (int n = 0; n < num_xmm_regs; n++) {
-          vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
-        }
-        addptr(rsp, 32*num_xmm_regs);
-      }
-    }
-#endif
-  }
-  popa();
-}
-
-static const double     pi_4 =  0.7853981633974483;
-
-void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
-  // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
-  // was attempted in this code; unfortunately it appears that the
-  // switch to 80-bit precision and back causes this to be
-  // unprofitable compared with simply performing a runtime call if
-  // the argument is out of the (-pi/4, pi/4) range.
-
-  Register tmp = noreg;
-  if (!VM_Version::supports_cmov()) {
-    // fcmp needs a temporary so preserve rbx,
-    tmp = rbx;
-    push(tmp);
-  }
-
-  Label slow_case, done;
-  if (trig == 't') {
-    ExternalAddress pi4_adr = (address)&pi_4;
-    if (reachable(pi4_adr)) {
-      // x ?<= pi/4
-      fld_d(pi4_adr);
-      fld_s(1);                // Stack:  X  PI/4  X
-      fabs();                  // Stack: |X| PI/4  X
-      fcmp(tmp);
-      jcc(Assembler::above, slow_case);
-
-      // fastest case: -pi/4 <= x <= pi/4
-      ftan();
-
-      jmp(done);
-    }
-  }
-  // slow case: runtime call
-  bind(slow_case);
-
-  switch(trig) {
-  case 's':
-    {
-      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
-    }
-    break;
-  case 'c':
-    {
-      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
-    }
-    break;
-  case 't':
-    {
-      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
-    }
-    break;
-  default:
-    assert(false, "bad intrinsic");
-    break;
-  }
-
-  // Come here with result in F-TOS
-  bind(done);
-
-  if (tmp != noreg) {
-    pop(tmp);
-  }
-}
-
 // Look up the method for a megamorphic invokeinterface call.
 // The target method is determined by <intf_klass, itable_index>.
 // The receiver klass is in recv_klass.
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri May 20 14:17:55 2016 -0700
@@ -259,6 +259,7 @@
   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
 
+  void call_VM_leaf0(address entry_point);
   void call_VM_leaf(address entry_point,
                     int number_of_arguments = 0);
   void call_VM_leaf(address entry_point,
@@ -453,15 +454,6 @@
   void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
   void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
 
-  // Inlined sin/cos generator for Java; must not use CPU instruction
-  // directly on Intel as it does not have high enough precision
-  // outside of the range [-pi/4, pi/4]. Extra argument indicate the
-  // number of FPU stack slots in use; all but the topmost will
-  // require saving if a slow case is necessary. Assumes argument is
-  // on FP TOS; result is on FP TOS.  No cpu registers are changed by
-  // this code.
-  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
-
   // branch to L if FPU flag C2 is set/not set
   // tmp is a temporary register, if none is available use noreg
   void jC2 (Register tmp, Label& L);
@@ -1036,9 +1028,6 @@
 
 private:
 
-  // call runtime as a fallback for trig functions and pow/exp.
-  void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use);
-
   // these are private because users should be doing movflt/movdbl
 
   void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri May 20 14:17:55 2016 -0700
@@ -3858,21 +3858,46 @@
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
     }
     if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
-      StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
-      StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
-      StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
-      StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
-      StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
-      StubRoutines::_dexp = generate_libmExp();
-      StubRoutines::_dlog = generate_libmLog();
-      StubRoutines::_dlog10 = generate_libmLog10();
-      StubRoutines::_dpow = generate_libmPow();
-      StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
-      StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
-      StubRoutines::_dsin = generate_libmSin();
-      StubRoutines::_dcos = generate_libmCos();
-      StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
-      StubRoutines::_dtan = generate_libmTan();
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
+          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
+          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
+        StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
+        StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
+        StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
+        StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
+        StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
+        StubRoutines::_dexp = generate_libmExp();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
+        StubRoutines::_dlog = generate_libmLog();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) {
+        StubRoutines::_dlog10 = generate_libmLog10();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) {
+        StubRoutines::_dpow = generate_libmPow();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
+        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
+        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
+        StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
+        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
+        StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
+        StubRoutines::_dsin = generate_libmSin();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
+        StubRoutines::_dcos = generate_libmCos();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
+        StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
+        StubRoutines::_dtan = generate_libmTan();
+      }
     }
   }
 
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri May 20 14:17:55 2016 -0700
@@ -5128,27 +5128,45 @@
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
     }
     if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
-      StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
-      StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
-      StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
-      StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
-      StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
-      StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
-      StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
-      StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
-      StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
-      StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
-      StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
-      StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
-      StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
-      StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
-      StubRoutines::_dexp = generate_libmExp();
-      StubRoutines::_dlog = generate_libmLog();
-      StubRoutines::_dlog10 = generate_libmLog10();
-      StubRoutines::_dpow = generate_libmPow();
-      StubRoutines::_dtan = generate_libmTan();
-      StubRoutines::_dsin = generate_libmSin();
-      StubRoutines::_dcos = generate_libmCos();
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
+          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
+          vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
+        StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
+        StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
+        StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
+        StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
+        StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
+        StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
+        StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
+        StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
+        StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
+        StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
+        StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
+        StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
+        StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
+        StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
+        StubRoutines::_dexp = generate_libmExp();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
+        StubRoutines::_dlog = generate_libmLog();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) {
+        StubRoutines::_dlog10 = generate_libmLog10();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) {
+        StubRoutines::_dpow = generate_libmPow();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
+        StubRoutines::_dsin = generate_libmSin();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
+        StubRoutines::_dcos = generate_libmCos();
+      }
+      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
+        StubRoutines::_dtan = generate_libmTan();
+      }
     }
   }
 
--- a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp	Fri May 20 14:17:55 2016 -0700
@@ -350,7 +350,7 @@
         if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
         } else {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
+          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
         }
         __ addptr(rsp, 2 * wordSize);
         break;
@@ -360,7 +360,7 @@
         if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
         } else {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
+          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
         }
         __ addptr(rsp, 2 * wordSize);
         break;
@@ -370,7 +370,7 @@
         if (StubRoutines::dtan() != NULL) {
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
         } else {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
+          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
         }
         __ addptr(rsp, 2 * wordSize);
         break;
@@ -386,7 +386,7 @@
         if (StubRoutines::dlog() != NULL) {
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
         } else {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
+          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
         }
         __ addptr(rsp, 2 * wordSize);
         break;
@@ -396,7 +396,7 @@
         if (StubRoutines::dlog10() != NULL) {
           __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
         } else {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
+          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
         }
         __ addptr(rsp, 2 * wordSize);
         break;
@@ -408,7 +408,7 @@
       if (StubRoutines::dpow() != NULL) {
         __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
       } else {
-        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
+        __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
       }
       __ addptr(rsp, 4 * wordSize);
       break;
@@ -418,7 +418,7 @@
       if (StubRoutines::dexp() != NULL) {
         __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
       } else {
-        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
+        __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
       }
       __ addptr(rsp, 2*wordSize);
     break;
--- a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp	Fri May 20 14:17:55 2016 -0700
@@ -377,35 +377,35 @@
     if (StubRoutines::dexp() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
     }
   } else if (kind == Interpreter::java_lang_math_log) {
     __ movdbl(xmm0, Address(rsp, wordSize));
     if (StubRoutines::dlog() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
     }
   } else if (kind == Interpreter::java_lang_math_log10) {
     __ movdbl(xmm0, Address(rsp, wordSize));
     if (StubRoutines::dlog10() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
     }
   } else if (kind == Interpreter::java_lang_math_sin) {
     __ movdbl(xmm0, Address(rsp, wordSize));
     if (StubRoutines::dsin() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
     }
   } else if (kind == Interpreter::java_lang_math_cos) {
     __ movdbl(xmm0, Address(rsp, wordSize));
     if (StubRoutines::dcos() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
     }
   } else if (kind == Interpreter::java_lang_math_pow) {
     __ movdbl(xmm1, Address(rsp, wordSize));
@@ -413,23 +413,23 @@
     if (StubRoutines::dpow() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
     }
   } else if (kind == Interpreter::java_lang_math_tan) {
     __ movdbl(xmm0, Address(rsp, wordSize));
     if (StubRoutines::dtan() != NULL) {
       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
     } else {
-      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
+      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
     }
   } else {
     __ fld_d(Address(rsp, wordSize));
     switch (kind) {
-      case Interpreter::java_lang_math_abs:
-          __ fabs();
-          break;
-      default                              :
-          ShouldNotReachHere();
+    case Interpreter::java_lang_math_abs:
+      __ fabs();
+      break;
+    default:
+      ShouldNotReachHere();
     }
 
     // return double result in xmm0 for interpreter and compilers.
@@ -447,3 +447,4 @@
 
   return entry_point;
 }
+
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp	Fri May 20 14:17:55 2016 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "compiler/compilerDirectives.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
@@ -418,9 +419,43 @@
   }
 }
 
+bool vmIntrinsics::is_intrinsic_available(vmIntrinsics::ID id) {
+  return !vmIntrinsics::is_intrinsic_disabled(id) &&
+    !vmIntrinsics::is_disabled_by_flags(id);
+}
+
+bool vmIntrinsics::is_intrinsic_disabled(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  // Canonicalize DisableIntrinsic to contain only ',' as a separator.
+  // Note, DirectiveSet may not be created at this point yet since this code
+  // is called from initial stub geenration code.
+  char* local_list = (char*)DirectiveSet::canonicalize_disableintrinsic(DisableIntrinsic);
+
+  bool found = false;
+  char* token = strtok(local_list, ",");
+  while (token != NULL) {
+    if (strcmp(token, vmIntrinsics::name_at(id)) == 0) {
+      found = true;
+      break;
+    } else {
+      token = strtok(NULL, ",");
+    }
+  }
+
+  FREE_C_HEAP_ARRAY(char, local_list);
+  return found;
+}
+
+
 bool vmIntrinsics::is_disabled_by_flags(const methodHandle& method) {
   vmIntrinsics::ID id = method->intrinsic_id();
   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  return is_disabled_by_flags(id);
+}
+
+bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
 
   // -XX:-InlineNatives disables nearly all intrinsics except the ones listed in
   // the following switch statement.
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Fri May 20 14:17:55 2016 -0700
@@ -1583,6 +1583,9 @@
   // Returns true if a compiler intrinsic is disabled by command-line flags
   // and false otherwise.
   static bool is_disabled_by_flags(const methodHandle& method);
+  static bool is_disabled_by_flags(vmIntrinsics::ID id);
+  static bool is_intrinsic_disabled(vmIntrinsics::ID id);
+  static bool is_intrinsic_available(vmIntrinsics::ID id);
 };
 
 #endif // SHARE_VM_CLASSFILE_VMSYMBOLS_HPP
--- a/hotspot/src/share/vm/compiler/compilerDirectives.hpp	Fri May 20 18:27:03 2016 +0300
+++ b/hotspot/src/share/vm/compiler/compilerDirectives.hpp	Fri May 20 14:17:55 2016 -0700
@@ -101,8 +101,6 @@
   InlineMatcher* _inlinematchers;
   CompilerDirectives* _directive;
 
-  static ccstrlist canonicalize_disableintrinsic(ccstrlist option_value);
-
 public:
   DirectiveSet(CompilerDirectives* directive);
   ~DirectiveSet();
@@ -117,6 +115,7 @@
   bool matches_inline(methodHandle method, int inline_action);
   static DirectiveSet* clone(DirectiveSet const* src);
   bool is_intrinsic_disabled(methodHandle method);
+  static ccstrlist canonicalize_disableintrinsic(ccstrlist option_value);
   void finalize(outputStream* st);
 
   typedef enum {