8132207: update for x86 exp in the math lib
authoriveresov
Mon, 05 Oct 2015 20:02:40 -0700
changeset 33089 f4e956ed8b43
parent 33088 34fe49ecee13
child 33090 141ffb78d088
child 33156 e17908b59e82
8132207: update for x86 exp in the math lib Summary: Add new java.lang.Math() intrinsics from x86 Reviewed-by: kvn, iveresov Contributed-by: vivek.r.deshpande@intel.com
hotspot/src/cpu/x86/vm/assembler_x86.cpp
hotspot/src/cpu/x86/vm/assembler_x86.hpp
hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp
hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp
hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
hotspot/src/cpu/x86/vm/macroAssembler_x86_libm.cpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
hotspot/src/cpu/x86/vm/x86_32.ad
hotspot/src/cpu/x86/vm/x86_64.ad
hotspot/src/share/vm/adlc/formssel.cpp
hotspot/src/share/vm/c1/c1_LIR.cpp
hotspot/src/share/vm/c1/c1_LIR.hpp
hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
hotspot/src/share/vm/c1/c1_LinearScan.cpp
hotspot/src/share/vm/c1/c1_Runtime1.cpp
hotspot/src/share/vm/opto/classes.hpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/subnode.cpp
hotspot/src/share/vm/opto/subnode.hpp
hotspot/src/share/vm/runtime/stubRoutines.cpp
hotspot/src/share/vm/runtime/stubRoutines.hpp
hotspot/src/share/vm/runtime/vmStructs.cpp
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -770,6 +770,7 @@
     case 0x55: // andnps
     case 0x56: // orps
     case 0x57: // xorps
+    case 0x59: //mulpd
     case 0x6E: // movd
     case 0x7E: // movd
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@@ -3030,6 +3031,15 @@
   emit_int8(imm8);
 }
 
+void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
+                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  emit_int8(0x15);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
@@ -3048,6 +3058,15 @@
   emit_int8(imm8);
 }
 
+void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
+                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  emit_int8((unsigned char)0xC4);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   if (VM_Version::supports_evex()) {
@@ -4063,6 +4082,16 @@
   }
 }
 
+void Assembler::mulpd(XMMRegister dst, Address src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
   _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -4251,6 +4280,26 @@
   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
 }
 
+void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
+  }
+}
+
+void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   if (VM_Version::supports_avx512dq()) {
@@ -4871,8 +4920,9 @@
 }
 
 
-// AND packed integers
+// logical operations packed integers
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
 }
@@ -4893,6 +4943,17 @@
   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
 }
 
+void Assembler::pandn(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
+  }
+  else {
+    emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::por(XMMRegister dst, XMMRegister src) {
   _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -1679,10 +1679,14 @@
   // SSE 4.1 extract
   void pextrd(Register dst, XMMRegister src, int imm8);
   void pextrq(Register dst, XMMRegister src, int imm8);
+  // SSE 2 extract
+  void pextrw(Register dst, XMMRegister src, int imm8);
 
   // SSE 4.1 insert
   void pinsrd(XMMRegister dst, Register src, int imm8);
   void pinsrq(XMMRegister dst, Register src, int imm8);
+  // SSE 2 insert
+  void pinsrw(XMMRegister dst, Register src, int imm8);
 
   // SSE4.1 packed move
   void pmovzxbw(XMMRegister dst, XMMRegister src);
@@ -1933,6 +1937,7 @@
 
   // Multiply Packed Floating-Point Values
   void mulpd(XMMRegister dst, XMMRegister src);
+  void mulpd(XMMRegister dst, Address src);
   void mulps(XMMRegister dst, XMMRegister src);
   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1959,6 +1964,9 @@
   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
+  void unpckhpd(XMMRegister dst, XMMRegister src);
+  void unpcklpd(XMMRegister dst, XMMRegister src);
+
   // Bitwise Logical XOR of Packed Floating-Point Values
   void xorpd(XMMRegister dst, XMMRegister src);
   void xorps(XMMRegister dst, XMMRegister src);
@@ -2054,6 +2062,9 @@
   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
+  // Andn packed integers
+  void pandn(XMMRegister dst, XMMRegister src);
+
   // Or packed integers
   void por(XMMRegister dst, XMMRegister src);
   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -2457,9 +2457,6 @@
         // Should consider not saving rbx, if not necessary
         __ trigfunc('t', op->as_Op2()->fpu_stack_size());
         break;
-      case lir_exp :
-        __ exp_with_fallback(op->as_Op2()->fpu_stack_size());
-        break;
       case lir_pow :
         __ pow_with_fallback(op->as_Op2()->fpu_stack_size());
         break;
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -808,6 +808,12 @@
 
 void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
   assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
+
+  if (x->id() == vmIntrinsics::_dexp) {
+    do_ExpIntrinsic(x);
+    return;
+  }
+
   LIRItem value(x->argument_at(0), this);
 
   bool use_fpu = false;
@@ -818,7 +824,6 @@
       case vmIntrinsics::_dtan:
       case vmIntrinsics::_dlog:
       case vmIntrinsics::_dlog10:
-      case vmIntrinsics::_dexp:
       case vmIntrinsics::_dpow:
         use_fpu = true;
     }
@@ -870,7 +875,6 @@
     case vmIntrinsics::_dtan:   __ tan  (calc_input, calc_result, tmp1, tmp2);              break;
     case vmIntrinsics::_dlog:   __ log  (calc_input, calc_result, tmp1);                    break;
     case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1);                    break;
-    case vmIntrinsics::_dexp:   __ exp  (calc_input, calc_result,              tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
     case vmIntrinsics::_dpow:   __ pow  (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
     default:                    ShouldNotReachHere();
   }
@@ -880,6 +884,32 @@
   }
 }
 
+void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) {
+  LIRItem value(x->argument_at(0), this);
+  value.set_destroys_register();
+
+  LIR_Opr calc_result = rlock_result(x);
+  LIR_Opr result_reg = result_register_for(x->type());
+
+  BasicTypeList signature(1);
+  signature.append(T_DOUBLE);
+  CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+  value.load_item_force(cc->at(0));
+
+#ifndef _LP64
+  LIR_Opr tmp = FrameMap::fpu0_double_opr;
+  result_reg = tmp;
+  if (VM_Version::supports_sse2()) {
+    __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+  } else {
+    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
+  }
+#else
+  __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+#endif
+  __ move(result_reg, calc_result);
+}
 
 void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
   assert(x->number_of_arguments() == 5, "wrong type");
--- a/hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -814,8 +814,7 @@
 
     case lir_tan:
     case lir_sin:
-    case lir_cos:
-    case lir_exp: {
+    case lir_cos: {
       // sin, cos and exp need two temporary fpu stack slots, so there are two temporary
       // registers (stored in right and temp of the operation).
       // the stack allocator must guarantee that the stack slots are really free,
--- a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -151,11 +151,15 @@
       __ pop_fTOS();
       break;
     case Interpreter::java_lang_math_exp:
-      __ exp_with_fallback(0);
-      // Store to stack to convert 80bit precision back to 64bits
-      __ push_fTOS();
-      __ pop_fTOS();
-      break;
+      __ subptr(rsp, 2*wordSize);
+      __ fstp_d(Address(rsp, 0));
+      if (VM_Version::supports_sse2()) {
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
+      } else {
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
+      }
+      __ addptr(rsp, 2*wordSize);
+    break;
     default                              :
         ShouldNotReachHere();
   }
--- a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -252,6 +252,9 @@
 
   if (kind == Interpreter::java_lang_math_sqrt) {
     __ sqrtsd(xmm0, Address(rsp, wordSize));
+  } else if (kind == Interpreter::java_lang_math_exp) {
+    __ movdbl(xmm0, Address(rsp, wordSize));
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
   } else {
     __ fld_d(Address(rsp, wordSize));
     switch (kind) {
@@ -278,9 +281,6 @@
                                               // empty stack slot)
           __ pow_with_fallback(0);
           break;
-      case Interpreter::java_lang_math_exp:
-          __ exp_with_fallback(0);
-           break;
       default                              :
           ShouldNotReachHere();
     }
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -3033,6 +3033,15 @@
   Assembler::fldcw(as_Address(src));
 }
 
+void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulpd(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::pow_exp_core_encoding() {
   // kills rax, rcx, rdx
   subptr(rsp,sizeof(jdouble));
@@ -3105,19 +3114,7 @@
   BLOCK_COMMENT("} fast_pow");
 }
 
-void MacroAssembler::fast_exp() {
-  // computes exp(X) = 2^(X * log2(e))
-  // if fast computation is not possible, result is NaN. Requires
-  // fallback from user of this macro.
-  // increase precision for intermediate steps of the computation
-  increase_precision();
-  fldl2e();                // Stack: log2(e) X ...
-  fmulp(1);                // Stack: (X*log2(e)) ...
-  pow_exp_core_encoding(); // Stack: exp(X) ...
-  restore_precision();
-}
-
-void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
+void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
   // kills rax, rcx, rdx
   // pow and exp needs 2 extra registers on the fpu stack.
   Label slow_case, done;
@@ -3129,182 +3126,164 @@
   Register tmp2 = rax;
   Register tmp3 = rcx;
 
-  if (is_exp) {
-    // Stack: X
-    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
-    fast_exp();                 // Stack: exp(X) X
-    fcmp(tmp, 0, false, false); // Stack: exp(X) X
-    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate argument. Stack: exp(X)
-    if (num_fpu_regs_in_use > 0) {
-      fxch();
-      fpop();
-    } else {
-      ffree(1);
-    }
-    jmp(done);
-  } else {
-    // Stack: X Y
-    Label x_negative, y_not_2;
-
-    static double two = 2.0;
-    ExternalAddress two_addr((address)&two);
-
-    // constant maybe too far on 64 bit
-    lea(tmp2, two_addr);
-    fld_d(Address(tmp2, 0));    // Stack: 2 X Y
-    fcmp(tmp, 2, true, false);  // Stack: X Y
-    jcc(Assembler::parity, y_not_2);
-    jcc(Assembler::notEqual, y_not_2);
-
-    fxch(); fpop();             // Stack: X
-    fmul(0);                    // Stack: X*X
-
-    jmp(done);
-
-    bind(y_not_2);
-
-    fldz();                     // Stack: 0 X Y
-    fcmp(tmp, 1, true, false);  // Stack: X Y
-    jcc(Assembler::above, x_negative);
-
-    // X >= 0
-
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fast_pow();                 // Stack: X^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
-    // X^Y not equal to itself: X^Y is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-    jmp(done);
-
-    // X <= 0
-    bind(x_negative);
-
-    fld_s(1);                   // Stack: Y X Y
-    frndint();                  // Stack: int(Y) X Y
-    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
-    jcc(Assembler::notEqual, slow_case);
-
-    subptr(rsp, 8);
-
-    // For X^Y, when X < 0, Y has to be an integer and the final
-    // result depends on whether it's odd or even. We just checked
-    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
-    // integer to test its parity. If int(Y) is huge and doesn't fit
-    // in the 64 bit integer range, the integer indefinite value will
-    // end up in the gp registers. Huge numbers are all even, the
-    // integer indefinite number is even so it's fine.
+  // Stack: X Y
+  Label x_negative, y_not_2;
+
+  static double two = 2.0;
+  ExternalAddress two_addr((address)&two);
+
+  // constant maybe too far on 64 bit
+  lea(tmp2, two_addr);
+  fld_d(Address(tmp2, 0));    // Stack: 2 X Y
+  fcmp(tmp, 2, true, false);  // Stack: X Y
+  jcc(Assembler::parity, y_not_2);
+  jcc(Assembler::notEqual, y_not_2);
+
+  fxch(); fpop();             // Stack: X
+  fmul(0);                    // Stack: X*X
+
+  jmp(done);
+
+  bind(y_not_2);
+
+  fldz();                     // Stack: 0 X Y
+  fcmp(tmp, 1, true, false);  // Stack: X Y
+  jcc(Assembler::above, x_negative);
+
+  // X >= 0
+
+  fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+  fld_s(1);                   // Stack: X Y X Y
+  fast_pow();                 // Stack: X^Y X Y
+  fcmp(tmp, 0, false, false); // Stack: X^Y X Y
+  // X^Y not equal to itself: X^Y is NaN go to slow case.
+  jcc(Assembler::parity, slow_case);
+  // get rid of duplicate arguments. Stack: X^Y
+  if (num_fpu_regs_in_use > 0) {
+    fxch(); fpop();
+    fxch(); fpop();
+  } else {
+    ffree(2);
+    ffree(1);
+  }
+  jmp(done);
+
+  // X <= 0
+  bind(x_negative);
+
+  fld_s(1);                   // Stack: Y X Y
+  frndint();                  // Stack: int(Y) X Y
+  fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
+  jcc(Assembler::notEqual, slow_case);
+
+  subptr(rsp, 8);
+
+  // For X^Y, when X < 0, Y has to be an integer and the final
+  // result depends on whether it's odd or even. We just checked
+  // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
+  // integer to test its parity. If int(Y) is huge and doesn't fit
+  // in the 64 bit integer range, the integer indefinite value will
+  // end up in the gp registers. Huge numbers are all even, the
+  // integer indefinite number is even so it's fine.
 
 #ifdef ASSERT
-    // Let's check we don't end up with an integer indefinite number
-    // when not expected. First test for huge numbers: check whether
-    // int(Y)+1 == int(Y) which is true for very large numbers and
-    // those are all even. A 64 bit integer is guaranteed to not
-    // overflow for numbers where y+1 != y (when precision is set to
-    // double precision).
-    Label y_not_huge;
-
-    fld1();                     // Stack: 1 int(Y) X Y
-    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
+  // Let's check we don't end up with an integer indefinite number
+  // when not expected. First test for huge numbers: check whether
+  // int(Y)+1 == int(Y) which is true for very large numbers and
+  // those are all even. A 64 bit integer is guaranteed to not
+  // overflow for numbers where y+1 != y (when precision is set to
+  // double precision).
+  Label y_not_huge;
+
+  fld1();                     // Stack: 1 int(Y) X Y
+  fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
 
 #ifdef _LP64
-    // trip to memory to force the precision down from double extended
-    // precision
-    fstp_d(Address(rsp, 0));
-    fld_d(Address(rsp, 0));
+  // trip to memory to force the precision down from double extended
+  // precision
+  fstp_d(Address(rsp, 0));
+  fld_d(Address(rsp, 0));
 #endif
 
-    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
+  fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
 #endif
 
-    // move int(Y) as 64 bit integer to thread's stack
-    fistp_d(Address(rsp,0));    // Stack: X Y
+  // move int(Y) as 64 bit integer to thread's stack
+  fistp_d(Address(rsp,0));    // Stack: X Y
 
 #ifdef ASSERT
-    jcc(Assembler::notEqual, y_not_huge);
-
-    // Y is huge so we know it's even. It may not fit in a 64 bit
-    // integer and we don't want the debug code below to see the
-    // integer indefinite value so overwrite int(Y) on the thread's
-    // stack with 0.
-    movl(Address(rsp, 0), 0);
-    movl(Address(rsp, 4), 0);
-
-    bind(y_not_huge);
+  jcc(Assembler::notEqual, y_not_huge);
+
+  // Y is huge so we know it's even. It may not fit in a 64 bit
+  // integer and we don't want the debug code below to see the
+  // integer indefinite value so overwrite int(Y) on the thread's
+  // stack with 0.
+  movl(Address(rsp, 0), 0);
+  movl(Address(rsp, 4), 0);
+
+  bind(y_not_huge);
 #endif
 
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fabs();                     // Stack: abs(X) Y X Y
-    fast_pow();                 // Stack: abs(X)^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
-    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
-
-    pop(tmp2);
-    NOT_LP64(pop(tmp3));
-    jcc(Assembler::parity, slow_case);
+  fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+  fld_s(1);                   // Stack: X Y X Y
+  fabs();                     // Stack: abs(X) Y X Y
+  fast_pow();                 // Stack: abs(X)^Y X Y
+  fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
+  // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
+
+  pop(tmp2);
+  NOT_LP64(pop(tmp3));
+  jcc(Assembler::parity, slow_case);
 
 #ifdef ASSERT
-    // Check that int(Y) is not integer indefinite value (int
-    // overflow). Shouldn't happen because for values that would
-    // overflow, 1+int(Y)==Y which was tested earlier.
+  // Check that int(Y) is not integer indefinite value (int
+  // overflow). Shouldn't happen because for values that would
+  // overflow, 1+int(Y)==Y which was tested earlier.
 #ifndef _LP64
-    {
-      Label integer;
-      testl(tmp2, tmp2);
-      jcc(Assembler::notZero, integer);
-      cmpl(tmp3, 0x80000000);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
+  {
+    Label integer;
+    testl(tmp2, tmp2);
+    jcc(Assembler::notZero, integer);
+    cmpl(tmp3, 0x80000000);
+    jcc(Assembler::notZero, integer);
+    STOP("integer indefinite value shouldn't be seen here");
+    bind(integer);
+  }
 #else
-    {
-      Label integer;
-      mov(tmp3, tmp2); // preserve tmp2 for parity check below
-      shlq(tmp3, 1);
-      jcc(Assembler::carryClear, integer);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
+  {
+    Label integer;
+    mov(tmp3, tmp2); // preserve tmp2 for parity check below
+    shlq(tmp3, 1);
+    jcc(Assembler::carryClear, integer);
+    jcc(Assembler::notZero, integer);
+    STOP("integer indefinite value shouldn't be seen here");
+    bind(integer);
+  }
 #endif
 #endif
 
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-
-    testl(tmp2, 1);
-    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
-    // X <= 0, Y even: X^Y = -abs(X)^Y
-
-    fchs();                     // Stack: -abs(X)^Y Y
-    jmp(done);
-  }
+  // get rid of duplicate arguments. Stack: X^Y
+  if (num_fpu_regs_in_use > 0) {
+    fxch(); fpop();
+    fxch(); fpop();
+  } else {
+    ffree(2);
+    ffree(1);
+  }
+
+  testl(tmp2, 1);
+  jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
+  // X <= 0, Y even: X^Y = -abs(X)^Y
+
+  fchs();                     // Stack: -abs(X)^Y Y
+  jmp(done);
 
   // slow case: runtime call
   bind(slow_case);
 
   fpop();                       // pop incorrect result or int(Y)
 
-  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
-                      is_exp ? 1 : 2, num_fpu_regs_in_use);
+  fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);
 
   // Come here with result in F-TOS
   bind(done);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -907,14 +907,14 @@
   // all corner cases and may result in NaN and require fallback to a
   // runtime call.
   void fast_pow();
-  void fast_exp();
+  void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
+                XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
+                Register rax, Register rcx, Register rdx, Register tmp);
   void increase_precision();
   void restore_precision();
 
-  // computes exp(x). Fallback to runtime call included.
-  void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
   // computes pow(x,y). Fallback to runtime call included.
-  void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
+  void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(num_fpu_regs_in_use); }
 
 private:
 
@@ -925,7 +925,7 @@
   void pow_exp_core_encoding();
 
   // computes pow(x,y) or exp(x). Fallback to runtime call included.
-  void pow_or_exp(bool is_exp, int num_fpu_regs_in_use);
+  void pow_or_exp(int num_fpu_regs_in_use);
 
   // these are private because users should be doing movflt/movdbl
 
@@ -971,6 +971,10 @@
   void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
   void movsd(XMMRegister dst, AddressLiteral src);
 
+  void mulpd(XMMRegister dst, XMMRegister src)    { Assembler::mulpd(dst, src); }
+  void mulpd(XMMRegister dst, Address src)        { Assembler::mulpd(dst, src); }
+  void mulpd(XMMRegister dst, AddressLiteral src);
+
   void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
   void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
   void mulsd(XMMRegister dst, AddressLiteral src);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86_libm.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -0,0 +1,677 @@
+/*
+ * Copyright (c) 2015, Intel Corporation.
+ * Intel Math Library (LIBM) Source Code
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/******************************************************************************/
+//                     ALGORITHM DESCRIPTION
+//                     ---------------------
+//
+// Description:
+//  Let K = 64 (table size).
+//        x    x/log(2)     n
+//       e  = 2          = 2 * T[j] * (1 + P(y))
+//  where
+//       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
+//       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
+//                  j/K
+//       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
+//
+//       P(y) is a minimax polynomial approximation of exp(x)-1
+//       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
+//
+//  To avoid problems with arithmetic overflow and underflow,
+//            n                        n1  n2
+//  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
+//  where BIAS is a value of exponent bias.
+//
+// Special cases:
+//  exp(NaN) = NaN
+//  exp(+INF) = +INF
+//  exp(-INF) = 0
+//  exp(x) = 1 for subnormals
+//  for finite argument, only exp(0)=1 is exact
+//  For IEEE double
+//    if x >  709.782712893383973096 then exp(x) overflow
+//    if x < -745.133219101941108420 then exp(x) underflow
+//
+/******************************************************************************/
+
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "macroAssembler_x86.hpp"
+
+#ifdef _MSC_VER
+#define ALIGNED_(x) __declspec(align(x))
+#else
+#define ALIGNED_(x) __attribute__ ((aligned(x)))
+#endif
+
+#ifdef _LP64
+
+ALIGNED_(16) juint _cv[] =
+{
+    0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
+    0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
+    0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
+    0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
+    0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
+};
+
+ALIGNED_(16) juint _shifter[] =
+{
+    0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
+};
+
+ALIGNED_(16) juint _mmask[] =
+{
+    0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
+};
+
+ALIGNED_(16) juint _bias[] =
+{
+    0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
+};
+
+ALIGNED_(16) juint _Tbl_addr[] =
+{
+    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
+    0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
+    0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
+    0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
+    0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
+    0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
+    0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
+    0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
+    0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
+    0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
+    0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
+    0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
+    0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
+    0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
+    0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
+    0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
+    0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
+    0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
+    0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
+    0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
+    0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
+    0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
+    0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
+    0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
+    0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
+    0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
+    0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
+    0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
+    0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
+    0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
+    0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
+    0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
+    0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
+    0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
+    0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
+    0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
+    0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
+    0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
+    0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
+    0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
+    0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
+    0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
+    0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
+    0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
+    0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
+    0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
+    0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
+    0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
+    0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
+    0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
+    0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
+    0x000fa7c1UL
+};
+
+ALIGNED_(16) juint _ALLONES[] =
+{
+    0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
+};
+
+ALIGNED_(16) juint _ebias[] =
+{
+    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
+};
+
+ALIGNED_(4) juint _XMAX[] =
+{
+    0xffffffffUL, 0x7fefffffUL
+};
+
+ALIGNED_(4) juint _XMIN[] =
+{
+    0x00000000UL, 0x00100000UL
+};
+
+ALIGNED_(4) juint _INF[] =
+{
+    0x00000000UL, 0x7ff00000UL
+};
+
+ALIGNED_(4) juint _ZERO[] =
+{
+    0x00000000UL, 0x00000000UL
+};
+
+ALIGNED_(4) juint _ONE_val[] =
+{
+    0x00000000UL, 0x3ff00000UL
+};
+
+
+// Registers:
+// input: xmm0
+// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+//          rax, rdx, rcx, tmp - r11
+
+// Code generated by Intel C compiler for LIBM library
+
+void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
+  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
+  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
+  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
+  Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
+
+  assert_different_registers(tmp, eax, ecx, edx);
+  jmp(start);
+  address cv = (address)_cv;
+  address Shifter = (address)_shifter;
+  address mmask = (address)_mmask;
+  address bias = (address)_bias;
+  address Tbl_addr = (address)_Tbl_addr;
+  address ALLONES = (address)_ALLONES;
+  address ebias = (address)_ebias;
+  address XMAX = (address)_XMAX;
+  address XMIN = (address)_XMIN;
+  address INF = (address)_INF;
+  address ZERO = (address)_ZERO;
+  address ONE_val = (address)_ONE_val;
+
+  bind(start);
+  subq(rsp, 24);
+  movsd(Address(rsp, 8), xmm0);
+  unpcklpd(xmm0, xmm0);
+  movdqu(xmm1, ExternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
+  movdqu(xmm6, ExternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
+  movdqu(xmm2, ExternalAddress(16+cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
+  movdqu(xmm3, ExternalAddress(32+cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
+  pextrw(eax, xmm0, 3);
+  andl(eax, 32767);
+  movl(edx, 16527);
+  subl(edx, eax);
+  subl(eax, 15504);
+  orl(edx, eax);
+  cmpl(edx, INT_MIN);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
+  mulpd(xmm1, xmm0);
+  addpd(xmm1, xmm6);
+  movapd(xmm7, xmm1);
+  subpd(xmm1, xmm6);
+  mulpd(xmm2, xmm1);
+  movdqu(xmm4, ExternalAddress(64+cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
+  mulpd(xmm3, xmm1);
+  movdqu(xmm5, ExternalAddress(80+cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
+  subpd(xmm0, xmm2);
+  movdl(eax, xmm7);
+  movl(ecx, eax);
+  andl(ecx, 63);
+  shll(ecx, 4);
+  sarl(eax, 6);
+  movl(edx, eax);
+  movdqu(xmm6, ExternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
+  pand(xmm7, xmm6);
+  movdqu(xmm6, ExternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
+  paddq(xmm7, xmm6);
+  psllq(xmm7, 46);
+  subpd(xmm0, xmm3);
+  lea(tmp, ExternalAddress(Tbl_addr));
+  movdqu(xmm2, Address(ecx,tmp));
+  mulpd(xmm4, xmm0);
+  movapd(xmm6, xmm0);
+  movapd(xmm1, xmm0);
+  mulpd(xmm6, xmm6);
+  mulpd(xmm0, xmm6);
+  addpd(xmm5, xmm4);
+  mulsd(xmm0, xmm6);
+  mulpd(xmm6, ExternalAddress(48+cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
+  addsd(xmm1, xmm2);
+  unpckhpd(xmm2, xmm2);
+  mulpd(xmm0, xmm5);
+  addsd(xmm1, xmm0);
+  por(xmm2, xmm7);
+  unpckhpd(xmm0, xmm0);
+  addsd(xmm0, xmm1);
+  addsd(xmm0, xmm6);
+  addl(edx, 894);
+  cmpl(edx, 1916);
+  jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
+  mulsd(xmm0, xmm2);
+  addsd(xmm0, xmm2);
+  jmp (B1_5);
+
+  bind(L_2TAG_PACKET_1_0_2);
+  xorpd(xmm3, xmm3);
+  movdqu(xmm4, ExternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
+  movl(edx, -1022);
+  subl(edx, eax);
+  movdl(xmm5, edx);
+  psllq(xmm4, xmm5);
+  movl(ecx, eax);
+  sarl(eax, 1);
+  pinsrw(xmm3, eax, 3);
+  movdqu(xmm6, ExternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
+  psllq(xmm3, 4);
+  psubd(xmm2, xmm3);
+  mulsd(xmm0, xmm2);
+  cmpl(edx, 52);
+  jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
+  pand(xmm4, xmm2);
+  paddd(xmm3, xmm6);
+  subsd(xmm2, xmm4);
+  addsd(xmm0, xmm2);
+  cmpl(ecx, 1023);
+  jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
+  pextrw(ecx, xmm0, 3);
+  andl(ecx, 32768);
+  orl(edx, ecx);
+  cmpl(edx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
+  movapd(xmm6, xmm0);
+  addsd(xmm0, xmm4);
+  mulsd(xmm0, xmm3);
+  pextrw(ecx, xmm0, 3);
+  andl(ecx, 32752);
+  cmpl(ecx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_5_0_2);
+  mulsd(xmm6, xmm3);
+  mulsd(xmm4, xmm3);
+  movdqu(xmm0, xmm6);
+  pxor(xmm6, xmm4);
+  psrad(xmm6, 31);
+  pshufd(xmm6, xmm6, 85);
+  psllq(xmm0, 1);
+  psrlq(xmm0, 1);
+  pxor(xmm0, xmm6);
+  psrlq(xmm6, 63);
+  paddq(xmm0, xmm6);
+  paddq(xmm0, xmm4);
+  movl(Address(rsp,0), 15);
+  jmp(L_2TAG_PACKET_6_0_2);
+
+  bind(L_2TAG_PACKET_4_0_2);
+  addsd(xmm0, xmm4);
+  mulsd(xmm0, xmm3);
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_3_0_2);
+  addsd(xmm0, xmm4);
+  mulsd(xmm0, xmm3);
+  pextrw(ecx, xmm0, 3);
+  andl(ecx, 32752);
+  cmpl(ecx, 32752);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_2_0_2);
+  paddd(xmm3, xmm6);
+  addpd(xmm0, xmm2);
+  mulsd(xmm0, xmm3);
+  movl(Address(rsp,0), 15);
+  jmp(L_2TAG_PACKET_6_0_2);
+
+  bind(L_2TAG_PACKET_8_0_2);
+  cmpl(eax, 2146435072);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
+  movl(eax, Address(rsp,12));
+  cmpl(eax, INT_MIN);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
+  movsd(xmm0, ExternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
+  mulsd(xmm0, xmm0);
+
+  bind(L_2TAG_PACKET_7_0_2);
+  movl(Address(rsp,0), 14);
+  jmp(L_2TAG_PACKET_6_0_2);
+
+  bind(L_2TAG_PACKET_10_0_2);
+  movsd(xmm0, ExternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
+  mulsd(xmm0, xmm0);
+  movl(Address(rsp,0), 15);
+  jmp(L_2TAG_PACKET_6_0_2);
+
+  bind(L_2TAG_PACKET_9_0_2);
+  movl(edx, Address(rsp,8));
+  cmpl(eax, 2146435072);
+  jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
+  cmpl(edx, 0);
+  jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
+  movl(eax, Address(rsp,12));
+  cmpl(eax, 2146435072);
+  jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
+  movsd(xmm0, ExternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_12_0_2);
+  movsd(xmm0, ExternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_11_0_2);
+  movsd(xmm0, Address(rsp, 8));
+  addsd(xmm0, xmm0);
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_0_0_2);
+  movl(eax, Address(rsp, 12));
+  andl(eax, 2147483647);
+  cmpl(eax, 1083179008);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
+  movsd(Address(rsp, 8), xmm0);
+  addsd(xmm0, ExternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_6_0_2);
+  movq(Address(rsp, 16), xmm0);
+
+  bind(B1_3);
+  movq(xmm0, Address(rsp, 16));
+
+  bind(B1_5);
+  addq(rsp, 24);
+}
+#endif
+
+#ifndef _LP64
+
+ALIGNED_(16) juint _static_const_table[] =
+{
+    0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
+    0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
+    0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
+    0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
+    0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
+    0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
+    0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
+    0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
+    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
+    0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
+    0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
+    0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
+    0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
+    0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
+    0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
+    0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
+    0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
+    0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
+    0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
+    0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
+    0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
+    0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
+    0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
+    0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
+    0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
+    0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
+    0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
+    0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
+    0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
+    0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
+    0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
+    0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
+    0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
+    0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
+    0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
+    0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
+    0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
+    0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
+    0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
+    0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
+    0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
+    0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
+    0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
+    0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
+    0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
+    0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
+    0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
+    0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
+    0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
+    0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
+    0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
+    0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
+    0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
+    0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
+    0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
+    0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
+    0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
+    0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
+    0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
+    0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
+    0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
+    0x00100000UL
+};
+
+//registers,
+// input: (rbp + 8)
+// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+//          rax, rdx, rcx, rbx (tmp)
+
+// Code generated by Intel C compiler for LIBM library
+
+void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
+  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
+  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
+  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
+  Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
+
+  assert_different_registers(tmp, eax, ecx, edx);
+  jmp(start);
+  address static_const_table = (address)_static_const_table;
+
+  bind(start);
+  subl(rsp, 120);
+  movl(Address(rsp, 64), tmp);
+  lea(tmp, ExternalAddress(static_const_table));
+  movdqu(xmm0, Address(rsp, 128));
+  unpcklpd(xmm0, xmm0);
+  movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
+  movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
+  movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
+  movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
+  pextrw(eax, xmm0, 3);
+  andl(eax, 32767);
+  movl(edx, 16527);
+  subl(edx, eax);
+  subl(eax, 15504);
+  orl(edx, eax);
+  cmpl(edx, INT_MIN);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
+  mulpd(xmm1, xmm0);
+  addpd(xmm1, xmm6);
+  movapd(xmm7, xmm1);
+  subpd(xmm1, xmm6);
+  mulpd(xmm2, xmm1);
+  movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
+  mulpd(xmm3, xmm1);
+  movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
+  subpd(xmm0, xmm2);
+  movdl(eax, xmm7);
+  movl(ecx, eax);
+  andl(ecx, 63);
+  shll(ecx, 4);
+  sarl(eax, 6);
+  movl(edx, eax);
+  movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
+  pand(xmm7, xmm6);
+  movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
+  paddq(xmm7, xmm6);
+  psllq(xmm7, 46);
+  subpd(xmm0, xmm3);
+  movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
+  mulpd(xmm4, xmm0);
+  movapd(xmm6, xmm0);
+  movapd(xmm1, xmm0);
+  mulpd(xmm6, xmm6);
+  mulpd(xmm0, xmm6);
+  addpd(xmm5, xmm4);
+  mulsd(xmm0, xmm6);
+  mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
+  addsd(xmm1, xmm2);
+  unpckhpd(xmm2, xmm2);
+  mulpd(xmm0, xmm5);
+  addsd(xmm1, xmm0);
+  por(xmm2, xmm7);
+  unpckhpd(xmm0, xmm0);
+  addsd(xmm0, xmm1);
+  addsd(xmm0, xmm6);
+  addl(edx, 894);
+  cmpl(edx, 1916);
+  jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
+  mulsd(xmm0, xmm2);
+  addsd(xmm0, xmm2);
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_1_0_2);
+  fnstcw(Address(rsp, 24));
+  movzwl(edx, Address(rsp, 24));
+  orl(edx, 768);
+  movw(Address(rsp, 28), edx);
+  fldcw(Address(rsp, 28));
+  movl(edx, eax);
+  sarl(eax, 1);
+  subl(edx, eax);
+  movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
+  pandn(xmm6, xmm2);
+  addl(eax, 1023);
+  movdl(xmm3, eax);
+  psllq(xmm3, 52);
+  por(xmm6, xmm3);
+  addl(edx, 1023);
+  movdl(xmm4, edx);
+  psllq(xmm4, 52);
+  movsd(Address(rsp, 8), xmm0);
+  fld_d(Address(rsp, 8));
+  movsd(Address(rsp, 16), xmm6);
+  fld_d(Address(rsp, 16));
+  fmula(1);
+  faddp(1);
+  movsd(Address(rsp, 8), xmm4);
+  fld_d(Address(rsp, 8));
+  fmulp(1);
+  fstp_d(Address(rsp, 8));
+  movsd(xmm0,Address(rsp, 8));
+  fldcw(Address(rsp, 24));
+  pextrw(ecx, xmm0, 3);
+  andl(ecx, 32752);
+  cmpl(ecx, 32752);
+  jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
+  cmpl(ecx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
+  jmp(L_2TAG_PACKET_2_0_2);
+  cmpl(ecx, INT_MIN);
+  jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
+  cmpl(ecx, -1064950997);
+  jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
+  jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
+  movl(edx, Address(rsp, 128));
+  cmpl(edx ,-17155601);
+  jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
+  jmp(L_2TAG_PACKET_4_0_2);
+
+  bind(L_2TAG_PACKET_3_0_2);
+  movl(edx, 14);
+  jmp(L_2TAG_PACKET_5_0_2);
+
+  bind(L_2TAG_PACKET_4_0_2);
+  movl(edx, 15);
+
+  bind(L_2TAG_PACKET_5_0_2);
+  movsd(Address(rsp, 0), xmm0);
+  movsd(xmm0, Address(rsp, 128));
+  fld_d(Address(rsp, 0));
+  jmp(L_2TAG_PACKET_6_0_2);
+
+  bind(L_2TAG_PACKET_7_0_2);
+  cmpl(eax, 2146435072);
+  jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
+  movl(eax, Address(rsp, 132));
+  cmpl(eax, INT_MIN);
+  jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
+  movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
+  mulsd(xmm0, xmm0);
+  movl(edx, 14);
+  jmp(L_2TAG_PACKET_5_0_2);
+
+  bind(L_2TAG_PACKET_9_0_2);
+  movsd(xmm0, Address(tmp, 1216));
+  mulsd(xmm0, xmm0);
+  movl(edx, 15);
+  jmp(L_2TAG_PACKET_5_0_2);
+
+  bind(L_2TAG_PACKET_8_0_2);
+  movl(edx, Address(rsp, 128));
+  cmpl(eax, 2146435072);
+  jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
+  cmpl(edx, 0);
+  jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
+  movl(eax, Address(rsp, 132));
+  cmpl(eax, 2146435072);
+  jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
+  movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_11_0_2);
+  movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_10_0_2);
+  movsd(xmm0, Address(rsp, 128));
+  addsd(xmm0, xmm0);
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_0_0_2);
+  movl(eax, Address(rsp, 132));
+  andl(eax, 2147483647);
+  cmpl(eax, 1083179008);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
+  movsd(xmm0, Address(rsp, 128));
+  addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_2_0_2);
+  movsd(Address(rsp, 48), xmm0);
+  fld_d(Address(rsp, 48));
+
+  bind(L_2TAG_PACKET_6_0_2);
+  movl(tmp, Address(rsp, 64));
+}
+
+#endif
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -2135,14 +2135,6 @@
       __ ret(0);
     }
     {
-      StubCodeMark mark(this, "StubRoutines", "exp");
-      StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
-
-      __ fld_d(Address(rsp, 4));
-      __ exp_with_fallback(0);
-      __ ret(0);
-    }
-    {
       StubCodeMark mark(this, "StubRoutines", "pow");
       StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
 
@@ -3048,6 +3040,32 @@
     return start;
   }
 
+ address generate_libmExp() {
+    address start = __ pc();
+
+    const XMMRegister x0  = xmm0;
+    const XMMRegister x1  = xmm1;
+    const XMMRegister x2  = xmm2;
+    const XMMRegister x3  = xmm3;
+
+    const XMMRegister x4  = xmm4;
+    const XMMRegister x5  = xmm5;
+    const XMMRegister x6  = xmm6;
+    const XMMRegister x7  = xmm7;
+
+    const Register tmp   = rbx;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+
+  }
+
+
   // Safefetch stubs.
   void generate_safefetch(const char* name, int size, address* entry,
                           address* fault_pc, address* continuation_pc) {
@@ -3268,6 +3286,9 @@
       StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
     }
+    if (VM_Version::supports_sse2()) {
+      StubRoutines::_dexp = generate_libmExp();
+    }
   }
 
 
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -3039,19 +3039,6 @@
       __ ret(0);
     }
     {
-      StubCodeMark mark(this, "StubRoutines", "exp");
-      StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
-
-      __ subq(rsp, 8);
-      __ movdbl(Address(rsp, 0), xmm0);
-      __ fld_d(Address(rsp, 0));
-      __ exp_with_fallback(0);
-      __ fstp_d(Address(rsp, 0));
-      __ movdbl(xmm0, Address(rsp, 0));
-      __ addq(rsp, 8);
-      __ ret(0);
-    }
-    {
       StubCodeMark mark(this, "StubRoutines", "pow");
       StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
 
@@ -4180,6 +4167,44 @@
     return start;
   }
 
+  address generate_libmExp() {
+    address start = __ pc();
+
+    const XMMRegister x0  = xmm0;
+    const XMMRegister x1  = xmm1;
+    const XMMRegister x2  = xmm2;
+    const XMMRegister x3  = xmm3;
+
+    const XMMRegister x4  = xmm4;
+    const XMMRegister x5  = xmm5;
+    const XMMRegister x6  = xmm6;
+    const XMMRegister x7  = xmm7;
+
+    const Register tmp   = r11;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-7
+    __ movdqu(xmm_save(6), as_XMMRegister(6));
+    __ movdqu(xmm_save(7), as_XMMRegister(7));
+#endif
+      __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    __ movdqu(as_XMMRegister(6), xmm_save(6));
+    __ movdqu(as_XMMRegister(7), xmm_save(7));
+#endif
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+
+  }
+
 
 #undef __
 #define __ masm->
@@ -4367,6 +4392,7 @@
       StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
     }
+    StubRoutines::_dexp = generate_libmExp();
   }
 
   void generate_all() {
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Mon Oct 05 20:02:40 2015 -0700
@@ -9911,35 +9911,6 @@
   ins_pipe( pipe_slow );
 %}
 
-
-instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
-  predicate (UseSSE<=1);
-  match(Set dpr1 (ExpD dpr1));
-  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
-  format %{ "fast_exp $dpr1 -> $dpr1  // KILL $rax, $rcx, $rdx" %}
-  ins_encode %{
-    __ fast_exp();
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
-  predicate (UseSSE>=2);
-  match(Set dst (ExpD src));
-  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
-  format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
-  ins_encode %{
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-    __ fast_exp();
-    __ fstp_d(Address(rsp, 0));
-    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
-    __ addptr(rsp, 8);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   // The source Double operand on FPU stack
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Mon Oct 05 20:02:40 2015 -0700
@@ -9898,22 +9898,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
-  match(Set dst (ExpD src));
-  effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
-  format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
-  ins_encode %{
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-    __ fast_exp();
-    __ fstp_d(Address(rsp, 0));
-    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
-    __ addptr(rsp, 8);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 //----------Arithmetic Conversion Instructions---------------------------------
 
 instruct roundFloat_nop(regF dst)
--- a/hotspot/src/share/vm/adlc/formssel.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/adlc/formssel.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -4006,7 +4006,6 @@
         strcmp(opType,"DivD")==0 ||
         strcmp(opType,"DivF")==0 ||
         strcmp(opType,"DivI")==0 ||
-        strcmp(opType,"ExpD")==0 ||
         strcmp(opType,"LogD")==0 ||
         strcmp(opType,"Log10D")==0 ||
         strcmp(opType,"ModD")==0 ||
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -732,8 +732,7 @@
     case lir_sin:
     case lir_cos:
     case lir_log:
-    case lir_log10:
-    case lir_exp: {
+    case lir_log10: {
       assert(op->as_Op2() != NULL, "must be");
       LIR_Op2* op2 = (LIR_Op2*)op;
 
@@ -743,9 +742,6 @@
       // overlap with the input.
       assert(op2->_info == NULL, "not used");
       assert(op2->_tmp5->is_illegal(), "not used");
-      assert(op2->_tmp2->is_valid() == (op->code() == lir_exp), "not used");
-      assert(op2->_tmp3->is_valid() == (op->code() == lir_exp), "not used");
-      assert(op2->_tmp4->is_valid() == (op->code() == lir_exp), "not used");
       assert(op2->_opr1->is_valid(), "used");
       do_input(op2->_opr1); do_temp(op2->_opr1);
 
@@ -1775,7 +1771,6 @@
      case lir_tan:                   s = "tan";           break;
      case lir_log:                   s = "log";           break;
      case lir_log10:                 s = "log10";         break;
-     case lir_exp:                   s = "exp";           break;
      case lir_pow:                   s = "pow";           break;
      case lir_logic_and:             s = "logic_and";     break;
      case lir_logic_or:              s = "logic_or";      break;
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -961,7 +961,6 @@
       , lir_tan
       , lir_log
       , lir_log10
-      , lir_exp
       , lir_pow
       , lir_logic_and
       , lir_logic_or
@@ -2199,7 +2198,6 @@
   void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
   void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
   void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
-  void exp (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5)                { append(new LIR_Op2(lir_exp , from, tmp1, to, tmp2, tmp3, tmp4, tmp5)); }
   void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
 
   void add (LIR_Opr left, LIR_Opr right, LIR_Opr res)      { append(new LIR_Op2(lir_add, left, right, res)); }
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -739,7 +739,6 @@
     case lir_cos:
     case lir_log:
     case lir_log10:
-    case lir_exp:
     case lir_pow:
       intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
       break;
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -244,6 +244,7 @@
   void do_getClass(Intrinsic* x);
   void do_currentThread(Intrinsic* x);
   void do_MathIntrinsic(Intrinsic* x);
+  void do_ExpIntrinsic(Intrinsic* x);
   void do_ArrayCopy(Intrinsic* x);
   void do_CompareAndSwap(Intrinsic* x, ValueType* type);
   void do_NIOCheckIndex(Intrinsic* x);
--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -6588,7 +6588,6 @@
         case lir_log10:
         case lir_log:
         case lir_pow:
-        case lir_exp:
         case lir_logic_and:
         case lir_logic_or:
         case lir_logic_xor:
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -317,6 +317,7 @@
   FUNCTION_CASE(entry, TRACE_TIME_METHOD);
 #endif
   FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
+  FUNCTION_CASE(entry, StubRoutines::dexp());
 
 #undef FUNCTION_CASE
 
--- a/hotspot/src/share/vm/opto/classes.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/opto/classes.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -131,7 +131,6 @@
 macro(EncodeISOArray)
 macro(EncodeP)
 macro(EncodePKlass)
-macro(ExpD)
 macro(FastLock)
 macro(FastUnlock)
 macro(Goto)
--- a/hotspot/src/share/vm/opto/library_call.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -222,7 +222,6 @@
   bool inline_math_negateExactL();
   bool inline_math_subtractExactI(bool is_decrement);
   bool inline_math_subtractExactL(bool is_decrement);
-  bool inline_exp();
   bool inline_pow();
   Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
   bool inline_min_max(vmIntrinsics::ID id);
@@ -1535,20 +1534,6 @@
   }
 }
 
-//------------------------------inline_exp-------------------------------------
-// Inline exp instructions, if possible.  The Intel hardware only misses
-// really odd corner cases (+/- Infinity).  Just uncommon-trap them.
-bool LibraryCallKit::inline_exp() {
-  Node* arg = round_double_node(argument(0));
-  Node* n   = _gvn.transform(new ExpDNode(C, control(), arg));
-
-  n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
-  set_result(n);
-
-  C->set_has_split_ifs(true); // Has chance for split-if optimization
-  return true;
-}
-
 //------------------------------inline_pow-------------------------------------
 // Inline power instructions, if possible.
 bool LibraryCallKit::inline_pow() {
@@ -1776,8 +1761,9 @@
   case vmIntrinsics::_dsqrt:  return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
   case vmIntrinsics::_dabs:   return Matcher::has_match_rule(Op_AbsD)   ? inline_math(id) : false;
 
-  case vmIntrinsics::_dexp:   return Matcher::has_match_rule(Op_ExpD)   ? inline_exp()    :
-    runtime_math(OptoRuntime::Math_D_D_Type(),  FN_PTR(SharedRuntime::dexp),  "EXP");
+  case vmIntrinsics::_dexp:
+    return (UseSSE >= 2) ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dexp(),  "dexp") :
+    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp),  "EXP");
   case vmIntrinsics::_dpow:   return Matcher::has_match_rule(Op_PowD)   ? inline_pow()    :
     runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow),  "POW");
 #undef FN_PTR
--- a/hotspot/src/share/vm/opto/subnode.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/opto/subnode.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -1532,18 +1532,6 @@
 
 //=============================================================================
 //------------------------------Value------------------------------------------
-// Compute exp
-const Type *ExpDNode::Value( PhaseTransform *phase ) const {
-  const Type *t1 = phase->type( in(1) );
-  if( t1 == Type::TOP ) return Type::TOP;
-  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
-  double d = t1->getd();
-  return TypeD::make( StubRoutines::intrinsic_exp( d ) );
-}
-
-
-//=============================================================================
-//------------------------------Value------------------------------------------
 // Compute pow
 const Type *PowDNode::Value( PhaseTransform *phase ) const {
   const Type *t1 = phase->type( in(1) );
--- a/hotspot/src/share/vm/opto/subnode.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/opto/subnode.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -477,20 +477,6 @@
   virtual const Type *Value( PhaseTransform *phase ) const;
 };
 
-//------------------------------ExpDNode---------------------------------------
-//  Exponentiate a double
-class ExpDNode : public Node {
-public:
-  ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
-    init_flags(Flag_is_expensive);
-    C->add_expensive_node(this);
-  }
-  virtual int Opcode() const;
-  const Type *bottom_type() const { return Type::DOUBLE; }
-  virtual uint ideal_reg() const { return Op_RegD; }
-  virtual const Type *Value( PhaseTransform *phase ) const;
-};
-
 //------------------------------LogDNode---------------------------------------
 // Log_e of a double
 class LogDNode : public Node {
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -148,9 +148,10 @@
 address StubRoutines::_montgomeryMultiply = NULL;
 address StubRoutines::_montgomerySquare = NULL;
 
+address StubRoutines::_dexp = NULL;
+
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
-double (* StubRoutines::_intrinsic_exp   )(double) = NULL;
 double (* StubRoutines::_intrinsic_pow   )(double, double) = NULL;
 double (* StubRoutines::_intrinsic_sin   )(double) = NULL;
 double (* StubRoutines::_intrinsic_cos   )(double) = NULL;
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp	Mon Oct 05 20:02:40 2015 -0700
@@ -207,6 +207,8 @@
   static address _montgomeryMultiply;
   static address _montgomerySquare;
 
+  static address _dexp;
+
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
   // constant folding in the compiler to ensure equivalence.  If the
@@ -215,7 +217,6 @@
   // SharedRuntime.
   static double (*_intrinsic_log)(double);
   static double (*_intrinsic_log10)(double);
-  static double (*_intrinsic_exp)(double);
   static double (*_intrinsic_pow)(double, double);
   static double (*_intrinsic_sin)(double);
   static double (*_intrinsic_cos)(double);
@@ -375,6 +376,8 @@
   static address montgomeryMultiply()  { return _montgomeryMultiply; }
   static address montgomerySquare()    { return _montgomerySquare; }
 
+  static address dexp()                {return _dexp; }
+
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
   static address zero_aligned_words()   { return _zero_aligned_words; }
@@ -387,10 +390,6 @@
     assert(_intrinsic_log != NULL, "must be defined");
     return _intrinsic_log10(d);
   }
-  static double  intrinsic_exp(double d) {
-    assert(_intrinsic_exp != NULL, "must be defined");
-    return _intrinsic_exp(d);
-  }
   static double  intrinsic_pow(double d, double d2) {
     assert(_intrinsic_pow != NULL, "must be defined");
     return _intrinsic_pow(d, d2);
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Sep 30 04:35:39 2015 -0400
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Mon Oct 05 20:02:40 2015 -0700
@@ -837,6 +837,7 @@
      static_field(StubRoutines,                _multiplyToLen,                                address)                               \
      static_field(StubRoutines,                _squareToLen,                                  address)                               \
      static_field(StubRoutines,                _mulAdd,                                       address)                               \
+     static_field(StubRoutines,                _dexp,                                         address)                               \
                                                                                                                                      \
   /*****************/                                                                                                                \
   /* SharedRuntime */                                                                                                                \
@@ -1992,7 +1993,6 @@
   declare_c2_type(TanDNode, Node)                                         \
   declare_c2_type(AtanDNode, Node)                                        \
   declare_c2_type(SqrtDNode, Node)                                        \
-  declare_c2_type(ExpDNode, Node)                                         \
   declare_c2_type(LogDNode, Node)                                         \
   declare_c2_type(Log10DNode, Node)                                       \
   declare_c2_type(PowDNode, Node)                                         \