8135231: aarch64: add support for vectorizing double precision sqrt
Reviewed-by: roland, aph
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Sep 28 16:18:15 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed Sep 23 12:39:30 2015 -0400
@@ -15209,6 +15209,20 @@
ins_pipe(pipe_class_default);
%}
+// --------------------------------- SQRT -------------------------------------
+
+instruct vsqrt2D(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (SqrtVD src));
+ format %{ "fsqrt $dst, $src\t# vector (2D)" %}
+ ins_encode %{
+ __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// --------------------------------- AND --------------------------------------
instruct vand8B(vecD dst, vecD src1, vecD src2)
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Sep 28 16:18:15 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Sep 23 12:39:30 2015 -0400
@@ -2311,6 +2311,10 @@
#define MSG "invalid arrangement"
+#define ASSERTION (T == T2S || T == T4S || T == T2D)
+ INSN(fsqrt, 1, 0b11111);
+#undef ASSERTION
+
#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
INSN(rev64, 0, 0b00000);
#undef ASSERTION
--- a/hotspot/test/compiler/loopopts/superword/SumRedSqrt_Double.java Mon Sep 28 16:18:15 2015 +0000
+++ b/hotspot/test/compiler/loopopts/superword/SumRedSqrt_Double.java Wed Sep 23 12:39:30 2015 -0400
@@ -26,7 +26,7 @@
* @test
* @bug 8135028
* @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
-* @requires os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64"
+* @requires os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
*
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedSqrt_Double
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedSqrt_Double