8183103: Post loop vectorization produces incorrect results
authorthartmann
Wed, 05 Jul 2017 09:06:59 +0200
changeset 45804 41b19cb98a96
parent 45803 8ee2c9c86cfe
child 45805 24c9d900fadf
8183103: Post loop vectorization produces incorrect results Summary: Disable AVX=3 and PostLoopMultiversioning by default and make them experimental in JDK 9. Reviewed-by: kvn
hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp
hotspot/src/cpu/arm/vm/c2_globals_arm.hpp
hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
hotspot/src/cpu/s390/vm/c2_globals_s390.hpp
hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
hotspot/src/cpu/x86/vm/c2_globals_x86.hpp
hotspot/src/cpu/x86/vm/globals_x86.hpp
hotspot/src/share/vm/opto/c2_globals.hpp
hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp
hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp
hotspot/test/compiler/rangechecks/TestRangeCheckEliminationDisabled.java
--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -55,7 +55,6 @@
 define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
 define_pd_global(intx, LoopUnrollLimit,              60);
 define_pd_global(intx, LoopPercentProfileLimit,      10);
-define_pd_global(intx, PostLoopMultiversioning,      false);
 // InitialCodeCacheSize derived from specjbb2000 run.
 define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
 define_pd_global(intx, CodeCacheExpansionSize,       64*K);
--- a/hotspot/src/cpu/arm/vm/c2_globals_arm.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/arm/vm/c2_globals_arm.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -70,7 +70,6 @@
 define_pd_global(bool, ResizeTLAB,                   true);
 define_pd_global(intx, LoopUnrollLimit,              60); // Design center runs on 1.3.1
 define_pd_global(intx, LoopPercentProfileLimit,      10);
-define_pd_global(intx, PostLoopMultiversioning,      false);
 define_pd_global(intx, MinJumpTableSize,             16);
 
 // Peephole and CISC spilling both break the graph, and so makes the
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -55,7 +55,6 @@
 define_pd_global(bool, ResizeTLAB,                   true);
 define_pd_global(intx, LoopUnrollLimit,              60);
 define_pd_global(intx, LoopPercentProfileLimit,      10);
-define_pd_global(intx, PostLoopMultiversioning,      false);
 
 // Peephole and CISC spilling both break the graph, and so make the
 // scheduler sick.
--- a/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -56,7 +56,6 @@
 define_pd_global(bool, ResizeTLAB,                   true);
 define_pd_global(intx, LoopUnrollLimit,              60);
 define_pd_global(intx, LoopPercentProfileLimit,      10);
-define_pd_global(intx, PostLoopMultiversioning,      false);
 define_pd_global(intx, MinJumpTableSize,             18);
 
 // Peephole and CISC spilling both break the graph, and so makes the
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -53,7 +53,6 @@
 define_pd_global(bool, ResizeTLAB,                   true);
 define_pd_global(intx, LoopUnrollLimit,              60); // Design center runs on 1.3.1
 define_pd_global(intx, LoopPercentProfileLimit,      10);
-define_pd_global(intx, PostLoopMultiversioning,      false);
 define_pd_global(intx, MinJumpTableSize,             5);
 
 // Peephole and CISC spilling both break the graph, and so makes the
--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -47,7 +47,6 @@
 define_pd_global(intx, FreqInlineSize,               325);
 define_pd_global(intx, MinJumpTableSize,             10);
 define_pd_global(intx, LoopPercentProfileLimit,      30);
-define_pd_global(intx, PostLoopMultiversioning,      true);
 #ifdef AMD64
 define_pd_global(intx, INTPRESSURE,                  13);
 define_pd_global(intx, FLOATPRESSURE,                14);
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -116,9 +116,10 @@
   product(bool, UseStoreImmI16, true,                                       \
           "Use store immediate 16-bits value instruction on x86")           \
                                                                             \
-  product(intx, UseAVX, 99,                                                 \
+  product(intx, UseAVX, 2,                                                  \
           "Highest supported AVX instructions set on x86/x64")              \
           range(0, 99)                                                      \
+          constraint(UseAVXConstraintFunc, AtParse)                         \
                                                                             \
   product(bool, UseCLMUL, false,                                            \
           "Control whether CLMUL instructions can be used on x86/x64")      \
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -181,7 +181,7 @@
            "Map number of unrolls for main loop via "                       \
            "Superword Level Parallelism analysis")                          \
                                                                             \
-  diagnostic_pd(bool, PostLoopMultiversioning,                              \
+  experimental(bool, PostLoopMultiversioning, false,                        \
            "Multi versioned post loops to eliminate range checks")          \
                                                                             \
   notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false,                 \
--- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Wed Jul 05 09:06:59 2017 +0200
@@ -332,6 +332,17 @@
   }
 }
 
+Flag::Error UseAVXConstraintFunc(intx value, bool verbose) {
+  if (value > 2 && !UnlockExperimentalVMOptions) {
+    CommandLineError::print(verbose,
+                            "UseAVX (" UINTX_FORMAT ") is experimental and must be "
+                            "enabled via -XX:+UnlockExperimentalVMOptions \n", value);
+    return Flag::VIOLATES_CONSTRAINT;
+  } else {
+    return Flag::SUCCESS;
+  }
+}
+
 #ifdef COMPILER2
 Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
   if (InteriorEntryAlignment > CodeEntryAlignment) {
--- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp	Wed Jul 05 09:06:59 2017 +0200
@@ -64,6 +64,8 @@
 
 Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose);
 
+Flag::Error UseAVXConstraintFunc(intx value, bool verbose);
+
 #ifdef COMPILER2
 Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose);
 
--- a/hotspot/test/compiler/rangechecks/TestRangeCheckEliminationDisabled.java	Tue Jul 04 11:45:09 2017 -0700
+++ b/hotspot/test/compiler/rangechecks/TestRangeCheckEliminationDisabled.java	Wed Jul 05 09:06:59 2017 +0200
@@ -26,7 +26,7 @@
  * @bug 8154763
  * @summary Tests PostLoopMultiversioning with RangeCheckElimination disabled.
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
- *                   -XX:+PostLoopMultiversioning -XX:-RangeCheckElimination
+ *                   -XX:+UnlockExperimentalVMOptions -XX:+PostLoopMultiversioning -XX:-RangeCheckElimination
  *                   compiler.rangechecks.TestRangeCheckEliminationDisabled
  */