8160425: Vectorization with signalling NaN returns wrong result
authorthartmann
Thu, 30 Jun 2016 08:24:51 +0200
changeset 40040 7644f470d923
parent 40039 47f35984ec67
child 40041 c6da347e21a8
8160425: Vectorization with signalling NaN returns wrong result Summary: Should not use doubles/floats for vector constants in the C code. Reviewed-by: kvn, vlivanov
hotspot/src/cpu/sparc/vm/sparc.ad
hotspot/src/cpu/x86/vm/x86.ad
hotspot/src/share/vm/asm/assembler.hpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/compile.hpp
hotspot/test/compiler/vectorization/TestNaNVector.java
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Jun 29 18:04:04 2016 +0300
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Thu Jun 30 08:24:51 2016 +0200
@@ -714,7 +714,7 @@
   return offset;
 }
 
-static inline jdouble replicate_immI(int con, int count, int width) {
+static inline jlong replicate_immI(int con, int count, int width) {
   // Load a constant replicated "count" times with width "width"
   assert(count*width == 8 && width <= 4, "sanity");
   int bit_width = width * 8;
@@ -723,17 +723,15 @@
   for (int i = 0; i < count - 1; i++) {
     val |= (val << bit_width);
   }
-  jdouble dval = *((jdouble*) &val);  // coerce to double type
-  return dval;
-}
-
-static inline jdouble replicate_immF(float con) {
+  return val;
+}
+
+static inline jlong replicate_immF(float con) {
   // Replicate float con 2 times and pack into vector.
   int val = *((int*)&con);
   jlong lval = val;
   lval = (lval << 32) | (lval & 0xFFFFFFFFl);
-  jdouble dval = *((jdouble*) &lval);  // coerce to double type
-  return dval;
+  return lval;
 }
 
 // Standard Sparc opcode form2 field breakdown
--- a/hotspot/src/cpu/x86/vm/x86.ad	Wed Jun 29 18:04:04 2016 +0300
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Thu Jun 30 08:24:51 2016 +0200
@@ -2131,7 +2131,7 @@
   return size+offset_size;
 }
 
-static inline jfloat replicate4_imm(int con, int width) {
+static inline jint replicate4_imm(int con, int width) {
   // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
   assert(width == 1 || width == 2, "only byte or short types here");
   int bit_width = width * 8;
@@ -2141,11 +2141,10 @@
     val |= (val << bit_width);
     bit_width <<= 1;
   }
-  jfloat fval = *((jfloat*) &val);  // coerce to float type
-  return fval;
+  return val;
 }
 
-static inline jdouble replicate8_imm(int con, int width) {
+static inline jlong replicate8_imm(int con, int width) {
   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
   int bit_width = width * 8;
@@ -2155,8 +2154,7 @@
     val |= (val << bit_width);
     bit_width <<= 1;
   }
-  jdouble dval = *((jdouble*) &val);  // coerce to double type
-  return dval;
+  return val;
 }
 
 #ifndef PRODUCT
--- a/hotspot/src/share/vm/asm/assembler.hpp	Wed Jun 29 18:04:04 2016 +0300
+++ b/hotspot/src/share/vm/asm/assembler.hpp	Thu Jun 30 08:24:51 2016 +0200
@@ -337,6 +337,15 @@
   //
   // We must remember the code section (insts or stubs) in c1
   // so we can reset to the proper section in end_a_const().
+  address int_constant(jint c) {
+    CodeSection* c1 = _code_section;
+    address ptr = start_a_const(sizeof(c), sizeof(c));
+    if (ptr != NULL) {
+      emit_int32(c);
+      end_a_const(c1);
+    }
+    return ptr;
+  }
   address long_constant(jlong c) {
     CodeSection* c1 = _code_section;
     address ptr = start_a_const(sizeof(c), sizeof(c));
--- a/hotspot/src/share/vm/opto/compile.cpp	Wed Jun 29 18:04:04 2016 +0300
+++ b/hotspot/src/share/vm/opto/compile.cpp	Thu Jun 30 08:24:51 2016 +0200
@@ -3814,6 +3814,7 @@
   if (can_be_reused() != other.can_be_reused())  return false;
   // For floating point values we compare the bit pattern.
   switch (type()) {
+  case T_INT:
   case T_FLOAT:   return (_v._value.i == other._v._value.i);
   case T_LONG:
   case T_DOUBLE:  return (_v._value.j == other._v._value.j);
@@ -3828,6 +3829,7 @@
 
 static int type_to_size_in_bytes(BasicType t) {
   switch (t) {
+  case T_INT:     return sizeof(jint   );
   case T_LONG:    return sizeof(jlong  );
   case T_FLOAT:   return sizeof(jfloat );
   case T_DOUBLE:  return sizeof(jdouble);
@@ -3896,6 +3898,7 @@
     Constant con = _constants.at(i);
     address constant_addr = NULL;
     switch (con.type()) {
+    case T_INT:    constant_addr = _masm.int_constant(   con.get_jint()   ); break;
     case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
     case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
     case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
--- a/hotspot/src/share/vm/opto/compile.hpp	Wed Jun 29 18:04:04 2016 +0300
+++ b/hotspot/src/share/vm/opto/compile.hpp	Thu Jun 30 08:24:51 2016 +0200
@@ -264,6 +264,7 @@
 
     BasicType type()      const    { return _type; }
 
+    jint    get_jint()    const    { return _v._value.i; }
     jlong   get_jlong()   const    { return _v._value.j; }
     jfloat  get_jfloat()  const    { return _v._value.f; }
     jdouble get_jdouble() const    { return _v._value.d; }
@@ -320,6 +321,14 @@
     Constant add(MachConstantNode* n, BasicType type, jvalue value);
     Constant add(Metadata* metadata);
     Constant add(MachConstantNode* n, MachOper* oper);
+    Constant add(MachConstantNode* n, jint i) {
+      jvalue value; value.i = i;
+      return add(n, T_INT, value);
+    }
+    Constant add(MachConstantNode* n, jlong j) {
+      jvalue value; value.j = j;
+      return add(n, T_LONG, value);
+    }
     Constant add(MachConstantNode* n, jfloat f) {
       jvalue value; value.f = f;
       return add(n, T_FLOAT, value);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/vectorization/TestNaNVector.java	Thu Jun 30 08:24:51 2016 +0200
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8160425
+ * @summary Test vectorization with a signalling NaN.
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill TestNaNVector
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill -XX:MaxVectorSize=4 TestNaNVector
+ */
+public class TestNaNVector {
+    private char[] array;
+    private static final int LEN = 1024;
+
+    public static void main(String args[]) {
+        TestNaNVector test = new TestNaNVector();
+        // Check double precision NaN
+        for (int i = 0; i < 10_000; ++i) {
+          test.vectorizeNaNDP();
+        }
+        System.out.println("Checking double precision Nan");
+        test.checkResult(0xfff7);
+
+        // Check single precision NaN
+        for (int i = 0; i < 10_000; ++i) {
+          test.vectorizeNaNSP();
+        }
+        System.out.println("Checking single precision Nan");
+        test.checkResult(0xff80);
+    }
+
+    public TestNaNVector() {
+        array = new char[LEN];
+    }
+
+    public void vectorizeNaNDP() {
+        // This loop will be vectorized and the array store will be replaced by
+        // a 64-bit vector store to four subsequent array elements. The vector
+        // should look like this '0xfff7fff7fff7fff7' and is read from the constant
+        // table. However, in floating point arithmetic this is a signalling NaN
+        // which may be converted to a quiet NaN when processed by the x87 FPU.
+        // If the signalling bit is set, the vector ends up in the constant table
+        // as '0xfffffff7fff7fff7' which leads to an incorrect result.
+        for (int i = 0; i < LEN; ++i) {
+            array[i] = 0xfff7;
+        }
+    }
+
+    public void vectorizeNaNSP() {
+        // Same as above but with single precision
+        for (int i = 0; i < LEN; ++i) {
+            array[i] = 0xff80;
+        }
+    }
+
+    public void checkResult(int expected) {
+        for (int i = 0; i < LEN; ++i) {
+            if (array[i] != expected) {
+                throw new RuntimeException("Invalid result: array[" + i + "] = " + (int)array[i] + " != " + expected);
+            }
+        }
+    }
+}
+