8226721: Missing intrinsics for Math.ceil, floor, rint
authorneliasso
Tue, 01 Oct 2019 11:43:10 +0200
changeset 58421 6fc57e391539
parent 58420 35cbd86e088a
child 58422 d7dbabd226ff
8226721: Missing intrinsics for Math.ceil, floor, rint Reviewed-by: neliasso, vlivanov, ecaspole Contributed-by: jatin.bhateja@intel.com
src/hotspot/cpu/x86/assembler_x86.cpp
src/hotspot/cpu/x86/assembler_x86.hpp
src/hotspot/cpu/x86/macroAssembler_x86.cpp
src/hotspot/cpu/x86/macroAssembler_x86.hpp
src/hotspot/cpu/x86/x86.ad
src/hotspot/share/adlc/formssel.cpp
src/hotspot/share/classfile/vmSymbols.cpp
src/hotspot/share/classfile/vmSymbols.hpp
src/hotspot/share/opto/c2compiler.cpp
src/hotspot/share/opto/classes.hpp
src/hotspot/share/opto/convertnode.cpp
src/hotspot/share/opto/convertnode.hpp
src/hotspot/share/opto/library_call.cpp
src/hotspot/share/opto/superword.cpp
src/hotspot/share/opto/vectornode.cpp
src/hotspot/share/opto/vectornode.hpp
src/java.base/share/classes/java/lang/Math.java
test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java
test/micro/org/openjdk/bench/java/math/FpRoundingBenchmark.java
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -4742,6 +4742,25 @@
   emit_int8((unsigned char)0xA5);
 }
 
+void Assembler::roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) {
+  assert(VM_Version::supports_sse4_1(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x0B);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)rmode);
+}
+
+void Assembler::roundsd(XMMRegister dst, Address src, int32_t rmode) {
+  assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x0B);
+  emit_operand(dst, src);
+  emit_int8((unsigned char)rmode);
+}
+
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -5539,6 +5558,49 @@
   emit_operand(dst, src);
 }
 
+void Assembler::vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x09);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)(rmode));
+}
+
+void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode,  int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x09);
+  emit_operand(dst, src);
+  emit_int8((unsigned char)(rmode));
+}
+
+void Assembler::vrndscalepd(XMMRegister dst,  XMMRegister src,  int32_t rmode, int vector_len) {
+  assert(VM_Version::supports_evex(), "requires EVEX support");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8((unsigned char)0x09);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)(rmode));
+}
+
+void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len) {
+  assert(VM_Version::supports_evex(), "requires EVEX support");
+  assert(dst != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8((unsigned char)0x09);
+  emit_operand(dst, src);
+  emit_int8((unsigned char)(rmode));
+}
+
+
 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Tue Oct 01 11:43:10 2019 +0200
@@ -1856,6 +1856,9 @@
   void sqrtsd(XMMRegister dst, Address src);
   void sqrtsd(XMMRegister dst, XMMRegister src);
 
+  void roundsd(XMMRegister dst, Address src, int32_t rmode);
+  void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
+
   // Compute Square Root of Scalar Single-Precision Floating-Point Value
   void sqrtss(XMMRegister dst, Address src);
   void sqrtss(XMMRegister dst, XMMRegister src);
@@ -2020,6 +2023,12 @@
   void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
   void vsqrtps(XMMRegister dst, Address src, int vector_len);
 
+  // Round Packed Double precision value.
+  void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
+  void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
+  void vrndscalepd(XMMRegister dst,  XMMRegister src,  int32_t rmode, int vector_len);
+  void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
+
   // Bitwise Logical AND of Packed Floating-Point Values
   void andpd(XMMRegister dst, XMMRegister src);
   void andps(XMMRegister dst, XMMRegister src);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -3661,6 +3661,15 @@
   }
 }
 
+void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) {
+  if (reachable(src)) {
+    Assembler::roundsd(dst, as_Address(src), rmode);
+  } else {
+    lea(scratch_reg, src);
+    Assembler::roundsd(dst, Address(scratch_reg, 0), rmode);
+  }
+}
+
 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     Assembler::subss(dst, as_Address(src));
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Tue Oct 01 11:43:10 2019 +0200
@@ -1180,6 +1180,10 @@
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, AddressLiteral src);
 
+  void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode)    { Assembler::roundsd(dst, src, rmode); }
+  void roundsd(XMMRegister dst, Address src, int32_t rmode)        { Assembler::roundsd(dst, src, rmode); }
+  void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg);
+
   void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
   void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
   void sqrtss(XMMRegister dst, AddressLiteral src);
--- a/src/hotspot/cpu/x86/x86.ad	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/cpu/x86/x86.ad	Tue Oct 01 11:43:10 2019 +0200
@@ -1485,6 +1485,10 @@
         ret_value = false;
       }
       break;
+    case Op_RoundDoubleMode:
+      if (UseSSE < 4)
+         ret_value = false;
+      break;
   }
 
   return ret_value;  // Per default match rules are supported.
@@ -1536,6 +1540,10 @@
         if (vlen != 4)
           ret_value  = false;
         break;
+      case Op_RoundDoubleModeV:
+        if (VM_Version::supports_avx() == false)
+          ret_value = false;
+        break;
     }
   }
 
@@ -2854,6 +2862,108 @@
   ins_pipe(pipe_slow);
 %}
 
+
+instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
+  predicate(UseSSE>=4);
+  match(Set dst (RoundDoubleMode src rmode));
+  format %{ "roundsd  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{
+  predicate(UseSSE>=4);
+  match(Set dst (RoundDoubleMode (LoadD src) rmode));
+  format %{ "roundsd  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{
+  predicate(UseSSE>=4);
+  match(Set dst (RoundDoubleMode con rmode));
+  effect(TEMP scratch_reg);
+  format %{ "roundsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vround2D_reg(legVecX dst, legVecX src, immU8 rmode) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RoundDoubleModeV src rmode));
+  format %{ "vroundpd  $dst, $src, $rmode\t! round packed2D" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vround2D_mem(legVecX dst, memory mem, immU8 rmode) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
+  format %{ "vroundpd $dst, $mem, $rmode\t! round packed2D" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vround4D_reg(legVecY dst, legVecY src, legVecY rmode) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RoundDoubleModeV src rmode));
+  format %{ "vroundpd  $dst, $src, $rmode\t! round packed4D" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vround4D_mem(legVecY dst, memory mem, immU8 rmode) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
+  format %{ "vroundpd $dst, $mem, $rmode\t! round packed4D" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct vround8D_reg(vecZ dst, vecZ src, immU8 rmode) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+  match(Set dst (RoundDoubleModeV src rmode));
+  format %{ "vrndscalepd $dst, $src, $rmode\t! round packed8D" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vround8D_mem(vecZ dst, memory mem, immU8 rmode) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+  match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
+  format %{ "vrndscalepd $dst, $mem, $rmode\t! round packed8D" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct onspinwait() %{
   match(OnSpinWait);
   ins_cost(200);
--- a/src/hotspot/share/adlc/formssel.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/adlc/formssel.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -4047,6 +4047,7 @@
         strcmp(opType,"FmaD") == 0 ||
         strcmp(opType,"FmaF") == 0 ||
         strcmp(opType,"RoundDouble")==0 ||
+        strcmp(opType,"RoundDoubleMode")==0 ||
         strcmp(opType,"RoundFloat")==0 ||
         strcmp(opType,"ReverseBytesI")==0 ||
         strcmp(opType,"ReverseBytesL")==0 ||
@@ -4175,7 +4176,7 @@
     "URShiftVB","URShiftVS","URShiftVI","URShiftVL",
     "MaxReductionV", "MinReductionV",
     "ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
-    "LoadVector","StoreVector",
+    "RoundDoubleModeV","LoadVector","StoreVector",
     "FmaVD", "FmaVF","PopCountVI",
     // Next are not supported currently.
     "PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
--- a/src/hotspot/share/classfile/vmSymbols.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/classfile/vmSymbols.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -575,6 +575,9 @@
   case vmIntrinsics::_intBitsToFloat:
   case vmIntrinsics::_doubleToRawLongBits:
   case vmIntrinsics::_longBitsToDouble:
+  case vmIntrinsics::_ceil:
+  case vmIntrinsics::_floor:
+  case vmIntrinsics::_rint:
   case vmIntrinsics::_dabs:
   case vmIntrinsics::_fabs:
   case vmIntrinsics::_iabs:
--- a/src/hotspot/share/classfile/vmSymbols.hpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/classfile/vmSymbols.hpp	Tue Oct 01 11:43:10 2019 +0200
@@ -766,6 +766,7 @@
   do_name(tan_name,"tan")       do_name(atan2_name,"atan2")     do_name(sqrt_name,"sqrt")                               \
   do_name(log_name,"log")       do_name(log10_name,"log10")     do_name(pow_name,"pow")                                 \
   do_name(exp_name,"exp")       do_name(min_name,"min")         do_name(max_name,"max")                                 \
+  do_name(floor_name, "floor")  do_name(ceil_name, "ceil")      do_name(rint_name, "rint")                              \
                                                                                                                         \
   do_name(addExact_name,"addExact")                                                                                     \
   do_name(decrementExact_name,"decrementExact")                                                                         \
@@ -781,6 +782,9 @@
   do_intrinsic(_iabs,                     java_lang_Math,         abs_name,   int_int_signature,           F_S)   \
   do_intrinsic(_labs,                     java_lang_Math,         abs_name,   long_long_signature,           F_S)   \
   do_intrinsic(_dsin,                     java_lang_Math,         sin_name,   double_double_signature,           F_S)   \
+  do_intrinsic(_floor,                    java_lang_Math,         floor_name, double_double_signature,           F_S)   \
+  do_intrinsic(_ceil,                     java_lang_Math,         ceil_name,  double_double_signature,           F_S)   \
+  do_intrinsic(_rint,                     java_lang_Math,         rint_name,  double_double_signature,           F_S)   \
   do_intrinsic(_dcos,                     java_lang_Math,         cos_name,   double_double_signature,           F_S)   \
   do_intrinsic(_dtan,                     java_lang_Math,         tan_name,   double_double_signature,           F_S)   \
   do_intrinsic(_datan2,                   java_lang_Math,         atan2_name, double2_double_signature,          F_S)   \
--- a/src/hotspot/share/opto/c2compiler.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/c2compiler.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -462,6 +462,11 @@
   case vmIntrinsics::_writebackPostSync0:
     if (!Matcher::match_rule_supported(Op_CacheWBPostSync)) return false;
     break;
+  case vmIntrinsics::_rint:
+  case vmIntrinsics::_ceil:
+  case vmIntrinsics::_floor:
+    if (!Matcher::match_rule_supported(Op_RoundDoubleMode)) return false;
+    break;
   case vmIntrinsics::_hashCode:
   case vmIntrinsics::_identityHashCode:
   case vmIntrinsics::_getClass:
--- a/src/hotspot/share/opto/classes.hpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/classes.hpp	Tue Oct 01 11:43:10 2019 +0200
@@ -274,6 +274,8 @@
 macro(Return)
 macro(Root)
 macro(RoundDouble)
+macro(RoundDoubleMode)
+macro(RoundDoubleModeV)
 macro(RoundFloat)
 macro(SafePoint)
 macro(SafePointScalarObject)
--- a/src/hotspot/share/opto/convertnode.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/convertnode.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -531,4 +531,16 @@
   return phase->type( in(1) );
 }
 
-
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Remove redundant roundings.
+Node* RoundDoubleModeNode::Identity(PhaseGVN* phase) {
+  int op = in(1)->Opcode();
+  // Redundant rounding e.g. floor(ceil(n)) -> ceil(n)
+  if(op == Op_RoundDoubleMode) return in(1);
+  return this;
+}
+const Type* RoundDoubleModeNode::Value(PhaseGVN* phase) const {
+  return Type::DOUBLE;
+}
+//=============================================================================
--- a/src/hotspot/share/opto/convertnode.hpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/convertnode.hpp	Tue Oct 01 11:43:10 2019 +0200
@@ -212,5 +212,16 @@
   virtual const Type* Value(PhaseGVN* phase) const;
 };
 
+//-----------------------------RoundDoubleModeNode-----------------------------
+class RoundDoubleModeNode: public Node {
+  public:
+  RoundDoubleModeNode(Node *in1, Node * rmode): Node(0, in1, rmode) {}
+  virtual int   Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint  ideal_reg() const { return Op_RegD; }
+  virtual Node* Identity(PhaseGVN* phase);
+  virtual const Type* Value(PhaseGVN* phase) const;
+};
+
 
 #endif // SHARE_OPTO_CONVERTNODE_HPP
--- a/src/hotspot/share/opto/library_call.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/library_call.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -534,6 +534,9 @@
   case vmIntrinsics::_identityHashCode:         return inline_native_hashcode(/*!virtual*/ false,         is_static);
   case vmIntrinsics::_getClass:                 return inline_native_getClass();
 
+  case vmIntrinsics::_ceil:
+  case vmIntrinsics::_floor:
+  case vmIntrinsics::_rint:
   case vmIntrinsics::_dsin:
   case vmIntrinsics::_dcos:
   case vmIntrinsics::_dtan:
@@ -1818,6 +1821,9 @@
   switch (id) {
   case vmIntrinsics::_dabs:   n = new AbsDNode(                arg);  break;
   case vmIntrinsics::_dsqrt:  n = new SqrtDNode(C, control(),  arg);  break;
+  case vmIntrinsics::_ceil:   n = new RoundDoubleModeNode(arg, makecon(TypeInt::make(2))); break;
+  case vmIntrinsics::_floor:  n = new RoundDoubleModeNode(arg, makecon(TypeInt::make(1))); break;
+  case vmIntrinsics::_rint:   n = new RoundDoubleModeNode(arg, makecon(TypeInt::make(0))); break;
   default:  fatal_unexpected_iid(id);  break;
   }
   set_result(_gvn.transform(n));
@@ -1891,6 +1897,9 @@
       runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
 
     // These intrinsics are supported on all hardware
+  case vmIntrinsics::_ceil:
+  case vmIntrinsics::_floor:
+  case vmIntrinsics::_rint:   return Matcher::match_rule_supported(Op_RoundDoubleMode) ? inline_double_math(id) : false;
   case vmIntrinsics::_dsqrt:  return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
   case vmIntrinsics::_dabs:   return Matcher::has_match_rule(Op_AbsD)   ? inline_double_math(id) : false;
   case vmIntrinsics::_fabs:   return Matcher::match_rule_supported(Op_AbsF)   ? inline_math(id) : false;
--- a/src/hotspot/share/opto/superword.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/superword.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -2401,6 +2401,12 @@
         const TypePtr* atyp = n->adr_type();
         vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
         vlen_in_bytes = vn->as_StoreVector()->memory_size();
+      } else if (VectorNode::is_roundopD(n)) {
+        Node* in1 = vector_opd(p, 1);
+        Node* in2 = low_adr->in(2);
+        assert(in2->is_Con(), "Constant rounding mode expected.");
+        vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
+        vlen_in_bytes = vn->as_Vector()->length_in_bytes();
       } else if (VectorNode::is_muladds2i(n)) {
         assert(n->req() == 5u, "MulAddS2I should have 4 operands.");
         Node* in1 = vector_opd(p, 1);
--- a/src/hotspot/share/opto/vectornode.cpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/vectornode.cpp	Tue Oct 01 11:43:10 2019 +0200
@@ -128,6 +128,9 @@
   case Op_NegD:
     assert(bt == T_DOUBLE, "must be");
     return Op_NegVD;
+  case Op_RoundDoubleMode:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_RoundDoubleModeV;
   case Op_SqrtF:
     assert(bt == T_FLOAT, "must be");
     return Op_SqrtVF;
@@ -259,6 +262,13 @@
   return false;
 }
 
+bool VectorNode::is_roundopD(Node *n) {
+  if (n->Opcode() == Op_RoundDoubleMode) {
+    return true;
+  }
+  return false;
+}
+
 bool VectorNode::is_shift(Node* n) {
   switch (n->Opcode()) {
   case Op_LShiftI:
@@ -407,6 +417,8 @@
   case Op_MinV: return new MinVNode(n1, n2, vt);
   case Op_MaxV: return new MaxVNode(n1, n2, vt);
 
+  case Op_RoundDoubleModeV: return new RoundDoubleModeVNode(n1, n2, vt);
+
   case Op_MulAddVS2VI: return new MulAddVS2VINode(n1, n2, vt);
   default:
     fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
--- a/src/hotspot/share/opto/vectornode.hpp	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/hotspot/share/opto/vectornode.hpp	Tue Oct 01 11:43:10 2019 +0200
@@ -70,6 +70,7 @@
   static bool is_type_transition_short_to_int(Node* n);
   static bool is_type_transition_to_int(Node* n);
   static bool is_muladds2i(Node* n);
+  static bool is_roundopD(Node * n);
   static bool is_invariant_vector(Node* n);
   // [Start, end) half-open range defining which operands are vectors
   static void vector_operands(Node* n, uint* start, uint* end);
@@ -447,6 +448,13 @@
   SqrtVFNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
   virtual int Opcode() const;
 };
+//------------------------------RoundDoubleVNode--------------------------------
+// Vector round double
+class RoundDoubleModeVNode : public VectorNode {
+ public:
+  RoundDoubleModeVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
+  virtual int Opcode() const;
+};
 
 //------------------------------SqrtVDNode--------------------------------------
 // Vector Sqrt double
--- a/src/java.base/share/classes/java/lang/Math.java	Thu Sep 26 13:51:43 2019 +0200
+++ b/src/java.base/share/classes/java/lang/Math.java	Tue Oct 01 11:43:10 2019 +0200
@@ -440,6 +440,7 @@
      *          floating-point value that is greater than or equal to
      *          the argument and is equal to a mathematical integer.
      */
+    @HotSpotIntrinsicCandidate
     public static double ceil(double a) {
         return StrictMath.ceil(a); // default impl. delegates to StrictMath
     }
@@ -459,6 +460,7 @@
      *          floating-point value that less than or equal to the argument
      *          and is equal to a mathematical integer.
      */
+    @HotSpotIntrinsicCandidate
     public static double floor(double a) {
         return StrictMath.floor(a); // default impl. delegates to StrictMath
     }
@@ -478,6 +480,7 @@
      * @return  the closest floating-point value to {@code a} that is
      *          equal to a mathematical integer.
      */
+    @HotSpotIntrinsicCandidate
     public static double rint(double a) {
         return StrictMath.rint(a); // default impl. delegates to StrictMath
     }
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java	Thu Sep 26 13:51:43 2019 +0200
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java	Tue Oct 01 11:43:10 2019 +0200
@@ -87,6 +87,9 @@
       test_divv(a0, a1, -VALUE);
       test_diva(a0, a1, a3);
       test_negc(a0, a1);
+      test_rint(a0, a1);
+      test_ceil(a0, a1);
+      test_floor(a0, a1);
     }
     // Test and verify results
     System.out.println("Verification");
@@ -351,6 +354,56 @@
         errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
       }
 
+      // To test -ve and +ve Zero scenarios.
+      double [] other_corner_cases     = { -0.0, 0.0, 9.007199254740992E15 };
+      double [] other_corner_cases_res = new double[3];
+      test_floor(a0, a1);
+      errn += verify("test_floor: ", 0, a0[0], Double.NaN);
+      errn += verify("test_floor: ", 1, a0[1], Double.POSITIVE_INFINITY);
+      errn += verify("test_floor: ", 2, a0[2], Double.NEGATIVE_INFINITY);
+      errn += verify("test_floor: ", 3, a0[3], Double.MAX_VALUE);
+      errn += verify("test_floor: ", 4, a0[4], 0.0);
+      errn += verify("test_floor: ", 5, a0[5], 0.0);
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_floor: ", i, a0[i], ((double)(ADD_INIT+i)));
+      }
+      test_floor_cc(other_corner_cases_res, other_corner_cases);
+      errn += verify("test_floor_cc: ", 0, other_corner_cases_res[0], -0.0);
+      errn += verify("test_floor_cc: ", 1, other_corner_cases_res[1], 0.0);
+      errn += verify("test_floor_cc: ", 2, other_corner_cases_res[2], 9.007199254740992E15);
+
+      test_ceil(a0, a1);
+      errn += verify("test_ceil: ", 0, a0[0], Double.NaN);
+      errn += verify("test_ceil: ", 1, a0[1], Double.POSITIVE_INFINITY);
+      errn += verify("test_ceil: ", 2, a0[2], Double.NEGATIVE_INFINITY);
+      errn += verify("test_ceil: ", 3, a0[3], Double.MAX_VALUE);
+      errn += verify("test_ceil: ", 4, a0[4], 1.0);
+      errn += verify("test_ceil: ", 5, a0[5], 1.0);
+      for (int i=6; i<ARRLEN; i++) {
+        errn += verify("test_ceil: ", i, a0[i], ((double)(ADD_INIT+i+1.0)));
+      }
+      test_ceil_cc(other_corner_cases_res, other_corner_cases);
+      errn += verify("test_ceil_cc: ", 0, other_corner_cases_res[0], -0.0);
+      errn += verify("test_ceil_cc: ", 1, other_corner_cases_res[1], 0.0);
+      errn += verify("test_ceil_cc: ", 2, other_corner_cases_res[2], 9.007199254740992E15);
+
+      test_rint(a0, a1);
+      errn += verify("test_rint: ", 0, a0[0], Double.NaN);
+      errn += verify("test_rint: ", 1, a0[1], Double.POSITIVE_INFINITY);
+      errn += verify("test_rint: ", 2, a0[2], Double.NEGATIVE_INFINITY);
+      errn += verify("test_rint: ", 3, a0[3], Double.MAX_VALUE);
+      errn += verify("test_rint: ", 4, a0[4], 0.0);
+      errn += verify("test_rint: ", 5, a0[5], 0.0);
+      for (int i=6; i<ARRLEN; i++) {
+        if ( i <= 500 )
+           errn += verify("test_rint: ", i, a0[i], ((double)(ADD_INIT+i)));
+        else
+           errn += verify("test_rint: ", i, a0[i], ((double)(ADD_INIT+i+1.0)));
+      }
+      test_rint_cc(other_corner_cases_res, other_corner_cases);
+      errn += verify("test_rint_cc: ", 0, other_corner_cases_res[0], -0.0);
+      errn += verify("test_rint_cc: ", 1, other_corner_cases_res[1], 0.0);
+      errn += verify("test_rint_cc: ", 2, other_corner_cases_res[2], 9.007199254740992E15);
     }
 
     if (errn > 0)
@@ -577,6 +630,37 @@
     }
   }
 
+  static void test_rint(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.rint(a1[i] + ((double)(i))/1000);
+    }
+  }
+  static void test_ceil(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.ceil(a1[i] + ((double)(i))/1000);
+    }
+  }
+  static void test_floor(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.floor(a1[i] + ((double)(i))/1000);
+    }
+  }
+  static void test_rint_cc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.rint(a1[i]);
+    }
+  }
+  static void test_ceil_cc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.ceil(a1[i]);
+    }
+  }
+  static void test_floor_cc(double[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.floor(a1[i]);
+    }
+  }
+
   static int verify(String text, int i, double elem, double val) {
     if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/micro/org/openjdk/bench/java/math/FpRoundingBenchmark.java	Tue Oct 01 11:43:10 2019 +0200
@@ -0,0 +1,78 @@
+//
+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+package org.openjdk.bench.java.math;
+
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.Blackhole;
+
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Thread)
+public class FpRoundingBenchmark {
+
+  @Param({"1024"})
+  public int TESTSIZE;
+
+  public double[] DargV1;
+
+  public double[] Res;
+
+  public final double[] DspecialVals = {
+      0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY};
+
+  @Setup(Level.Trial)
+  public void BmSetup() {
+    int i = 0;
+    Random r = new Random(1024);
+    DargV1 = new double[TESTSIZE];
+    Res = new double[TESTSIZE];
+
+    for (; i < DspecialVals.length; i++) {
+      DargV1[i] = DspecialVals[i];
+    }
+
+    for (; i < TESTSIZE; i++) {
+      DargV1[i] = r.nextDouble()*TESTSIZE;
+    }
+  }
+
+  @Benchmark
+  public void testceil(Blackhole bh) {
+    for (int i = 0; i < TESTSIZE; i++)
+      Res[i] = Math.ceil(DargV1[i]);
+  }
+
+  @Benchmark
+  public void testfloor(Blackhole bh) {
+    for (int i = 0; i < TESTSIZE; i++)
+      Res[i] = Math.floor(DargV1[i]);
+  }
+
+  @Benchmark
+  public void testrint(Blackhole bh) {
+    for (int i = 0; i < TESTSIZE; i++)
+      Res[i] = Math.rint(DargV1[i]);
+  }
+}