--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Mar 07 10:03:06 2016 -0300
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Mar 07 15:03:48 2016 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -5699,8 +5699,9 @@
}
-void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
@@ -5709,11 +5710,12 @@
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5721,26 +5723,29 @@
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 256 bits
// 0x01 - insert into upper 256 bits
- emit_int8(value & 0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionMark im(this);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
// swap src<->dst for encoding
- vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1A);
emit_operand(dst, src);
// 0x00 - insert into lower 256 bits
- // 0x01 - insert into upper 128 bits
- emit_int8(value & 0x01);
-}
-
-void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
+ // 0x01 - insert into upper 256 bits
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5750,57 +5755,64 @@
// 0x01 - insert into q1 128 bits (128..255)
// 0x02 - insert into q2 128 bits (256..383)
// 0x03 - insert into q3 128 bits (384..511)
- emit_int8(value & 0x3);
-}
-
-void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
+ emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
+ assert(imm8 <= 0x03, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding
- vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
emit_operand(dst, src);
// 0x00 - insert into q0 128 bits (0..127)
// 0x01 - insert into q1 128 bits (128..255)
// 0x02 - insert into q2 128 bits (256..383)
// 0x03 - insert into q3 128 bits (384..511)
- emit_int8(value & 0x3);
-}
-
-void Assembler::vinsertf128h(XMMRegister dst, Address src) {
+ emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(dst != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding
- vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x18);
emit_operand(dst, src);
+ // 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
- // 0x00 - insert into lower 128 bits
- // 0x01 - insert into upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vextractf128h(Address dst, XMMRegister src) {
+ // 0x00 - extract from lower 128 bits
+ // 0x01 - extract from upper 128 bits
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
assert(src != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
@@ -5808,12 +5820,14 @@
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_operand(src, dst);
+ // 0x00 - extract from lower 128 bits
// 0x01 - extract from upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
@@ -5822,11 +5836,12 @@
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5834,39 +5849,44 @@
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - insert into lower 256 bits
// 0x01 - insert into upper 256 bits
- emit_int8(value & 0x01);
-}
-
-void Assembler::vinserti128h(XMMRegister dst, Address src) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), "");
assert(dst != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
// swap src<->dst for encoding
- vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x38);
emit_operand(dst, src);
+ // 0x00 - insert into lower 128 bits
// 0x01 - insert into upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
- // 0x00 - insert into lower 128 bits
- // 0x01 - insert into upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vextracti128h(Address dst, XMMRegister src) {
+ // 0x00 - extract from lower 128 bits
+ // 0x01 - extract from upper 128 bits
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx2(), "");
assert(src != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
@@ -5874,47 +5894,53 @@
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_operand(src, dst);
+ // 0x00 - extract from lower 128 bits
// 0x01 - extract from upper 128 bits
- emit_int8(0x01);
-}
-
-void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x3B);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - extract from lower 256 bits
// 0x01 - extract from upper 256 bits
- emit_int8(value & 0x01);
-}
-
-void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x39);
emit_int8((unsigned char)(0xC0 | encode));
+ // 0x00 - extract from bits 127:0
// 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384
- emit_int8(value & 0x3);
-}
-
-void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x1B);
emit_int8((unsigned char)(0xC0 | encode));
// 0x00 - extract from lower 256 bits
// 0x01 - extract from upper 256 bits
- emit_int8(value & 0x1);
-}
-
-void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
+ assert(imm8 <= 0x01, "imm8: %u", imm8);
InstructionMark im(this);
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit);
@@ -5923,11 +5949,12 @@
emit_operand(src, dst);
// 0x00 - extract from lower 256 bits
// 0x01 - extract from upper 256 bits
- emit_int8(value & 0x01);
-}
-
-void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_avx(), "");
+ assert(imm8 <= 0x03, "imm8: %u", imm8);
int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5937,12 +5964,13 @@
// 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384
- emit_int8(value & 0x3);
-}
-
-void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
+ assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionMark im(this);
InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
@@ -5953,19 +5981,21 @@
// 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384
- emit_int8(value & 0x3);
-}
-
-void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
+ emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
assert(VM_Version::supports_evex(), "");
+ assert(imm8 <= 0x03, "imm8: %u", imm8);
InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x19);
emit_int8((unsigned char)(0xC0 | encode));
+ // 0x00 - extract from bits 127:0
// 0x01 - extract from bits 255:128
// 0x02 - extract from bits 383:256
// 0x03 - extract from bits 511:384
- emit_int8(value & 0x3);
+ emit_int8(imm8 & 0x03);
}
// duplicate 4-bytes integer data from src into 8 locations in dest
--- a/hotspot/src/cpu/x86/vm/x86.ad Mon Mar 07 10:03:06 2016 -0300
+++ b/hotspot/src/cpu/x86/vm/x86.ad Mon Mar 07 15:03:48 2016 -0800
@@ -3179,13 +3179,13 @@
"punpcklbw $dst,$dst\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+ "vinserti128_high $dst,$dst\t! replicate32B" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3196,12 +3196,12 @@
format %{ "punpcklbw $dst,$mem\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+ "vinserti128_high $dst,$dst\t! replicate32B" %}
ins_encode %{
__ punpcklbw($dst$$XMMRegister, $mem$$Address);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3223,11 +3223,11 @@
match(Set dst (ReplicateB con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
+ "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3298,12 +3298,12 @@
format %{ "movd $dst,$src\n\t"
"pshuflw $dst,$dst,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+ "vinserti128_high $dst,$dst\t! replicate16S" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3313,11 +3313,11 @@
match(Set dst (ReplicateS (LoadS mem)));
format %{ "pshuflw $dst,$mem,0x00\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+ "vinserti128_high $dst,$dst\t! replicate16S" %}
ins_encode %{
__ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3327,11 +3327,11 @@
match(Set dst (ReplicateS con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
+ "vinserti128_high $dst,$dst\t! replicate16S($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3363,11 +3363,11 @@
match(Set dst (ReplicateI src));
format %{ "movd $dst,$src\n\t"
"pshufd $dst,$dst,0x00\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+ "vinserti128_high $dst,$dst\t! replicate8I" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3376,10 +3376,10 @@
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateI (LoadI mem)));
format %{ "pshufd $dst,$mem,0x00\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+ "vinserti128_high $dst,$dst\t! replicate8I" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3401,11 +3401,11 @@
match(Set dst (ReplicateI con));
format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst" %}
+ "vinserti128_high $dst,$dst" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3430,11 +3430,11 @@
match(Set dst (ReplicateL src));
format %{ "movdq $dst,$src\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+ "vinserti128_high $dst,$dst\t! replicate4L" %}
ins_encode %{
__ movdq($dst$$XMMRegister, $src$$Register);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3447,13 +3447,13 @@
"movdl $tmp,$src.hi\n\t"
"punpckldq $dst,$tmp\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+ "vinserti128_high $dst,$dst\t! replicate4L" %}
ins_encode %{
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3464,11 +3464,11 @@
match(Set dst (ReplicateL con));
format %{ "movq $dst,[$constantaddress]\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
+ "vinserti128_high $dst,$dst\t! replicate4L($con)" %}
ins_encode %{
__ movq($dst$$XMMRegister, $constantaddress($con));
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3478,11 +3478,11 @@
match(Set dst (ReplicateL (LoadL mem)));
format %{ "movq $dst,$mem\n\t"
"punpcklqdq $dst,$dst\n\t"
- "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+ "vinserti128_high $dst,$dst\t! replicate4L" %}
ins_encode %{
__ movq($dst$$XMMRegister, $mem$$Address);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
- __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3511,10 +3511,10 @@
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF src));
format %{ "pshufd $dst,$src,0x00\n\t"
- "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+ "vinsertf128_high $dst,$dst\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3523,10 +3523,10 @@
predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateF (LoadF mem)));
format %{ "pshufd $dst,$mem,0x00\n\t"
- "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+ "vinsertf128_high $dst,$dst\t! replicate8F" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
- __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3576,10 +3576,10 @@
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD src));
format %{ "pshufd $dst,$src,0x44\n\t"
- "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+ "vinsertf128_high $dst,$dst\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
- __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -3588,10 +3588,10 @@
predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
match(Set dst (ReplicateD (LoadD mem)));
format %{ "pshufd $dst,$mem,0x44\n\t"
- "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+ "vinsertf128_high $dst,$dst\t! replicate4D" %}
ins_encode %{
__ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
- __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@@ -4791,7 +4791,7 @@
effect(TEMP tmp, TEMP tmp2);
format %{ "vphaddd $tmp,$src2,$src2\n\t"
"vphaddd $tmp,$tmp,$tmp2\n\t"
- "vextracti128 $tmp2,$tmp\n\t"
+ "vextracti128_high $tmp2,$tmp\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
"movd $tmp2,$src1\n\t"
"vpaddd $tmp2,$tmp2,$tmp\n\t"
@@ -4800,7 +4800,7 @@
int vector_len = 1;
__ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
__ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
- __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
+ __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
__ movdl($tmp2$$XMMRegister, $src1$$Register);
__ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4813,7 +4813,7 @@
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti128 $tmp,$src2\n\t"
+ format %{ "vextracti128_high $tmp,$src2\n\t"
"vpaddd $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
@@ -4824,7 +4824,7 @@
"movd $dst,$tmp2\t! add reduction8I" %}
ins_encode %{
int vector_len = 0;
- __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
@@ -4841,9 +4841,9 @@
predicate(UseAVX > 2);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
+ format %{ "vextracti64x4_high $tmp3,$src2\n\t"
"vpaddd $tmp3,$tmp3,$src2\n\t"
- "vextracti128 $tmp,$tmp3\n\t"
+ "vextracti128_high $tmp,$tmp3\n\t"
"vpaddd $tmp,$tmp,$tmp3\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpaddd $tmp,$tmp,$tmp2\n\t"
@@ -4853,9 +4853,9 @@
"vpaddd $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
- __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
__ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
- __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@@ -4892,7 +4892,7 @@
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti128 $tmp,$src2\n\t"
+ format %{ "vextracti128_high $tmp,$src2\n\t"
"vpaddq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
@@ -4900,7 +4900,7 @@
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction4L" %}
ins_encode %{
- __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4915,9 +4915,9 @@
predicate(UseAVX > 2);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
+ format %{ "vextracti64x4_high $tmp2,$src2\n\t"
"vpaddq $tmp2,$tmp2,$src2\n\t"
- "vextracti128 $tmp,$tmp2\n\t"
+ "vextracti128_high $tmp,$tmp2\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpaddq $tmp2,$tmp2,$tmp\n\t"
@@ -4925,9 +4925,9 @@
"vpaddq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! add reduction8L" %}
ins_encode %{
- __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
- __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5026,7 +5026,7 @@
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
- "vextractf128 $tmp2,$src2\n\t"
+ "vextractf128_high $tmp2,$src2\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
@@ -5042,7 +5042,7 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5065,7 +5065,7 @@
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
@@ -5073,7 +5073,7 @@
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
@@ -5081,7 +5081,7 @@
"vaddss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vaddss $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vaddss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vaddss $dst,$dst,$tmp\n\t"
@@ -5097,7 +5097,7 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5105,7 +5105,7 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5113,7 +5113,7 @@
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5162,7 +5162,7 @@
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
- "vextractf32x4h $tmp2,$src2, 0x1\n\t"
+ "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction4D" %}
@@ -5170,7 +5170,7 @@
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5185,15 +5185,15 @@
format %{ "vaddsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vaddsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vaddsd $dst,$dst,$tmp\t! add reduction8D" %}
@@ -5201,15 +5201,15 @@
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5307,7 +5307,7 @@
predicate(UseAVX > 0);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti128 $tmp,$src2\n\t"
+ format %{ "vextracti128_high $tmp,$src2\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
@@ -5318,7 +5318,7 @@
"movd $dst,$tmp2\t! mul reduction8I" %}
ins_encode %{
int vector_len = 0;
- __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
@@ -5335,9 +5335,9 @@
predicate(UseAVX > 2);
match(Set dst (MulReductionVI src1 src2));
effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
- format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t"
+ format %{ "vextracti64x4_high $tmp3,$src2\n\t"
"vpmulld $tmp3,$tmp3,$src2\n\t"
- "vextracti128 $tmp,$tmp3\n\t"
+ "vextracti128_high $tmp,$tmp3\n\t"
"vpmulld $tmp,$tmp,$src2\n\t"
"pshufd $tmp2,$tmp,0xE\n\t"
"vpmulld $tmp,$tmp,$tmp2\n\t"
@@ -5347,9 +5347,9 @@
"vpmulld $tmp2,$tmp,$tmp2\n\t"
"movd $dst,$tmp2\t! mul reduction16I" %}
ins_encode %{
- __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
__ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
- __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
__ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
__ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@@ -5386,7 +5386,7 @@
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti128 $tmp,$src2\n\t"
+ format %{ "vextracti128_high $tmp,$src2\n\t"
"vpmullq $tmp2,$tmp,$src2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
@@ -5394,7 +5394,7 @@
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction4L" %}
ins_encode %{
- __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5409,9 +5409,9 @@
predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
match(Set dst (MulReductionVL src1 src2));
effect(TEMP tmp, TEMP tmp2);
- format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t"
+ format %{ "vextracti64x4_high $tmp2,$src2\n\t"
"vpmullq $tmp2,$tmp2,$src2\n\t"
- "vextracti128 $tmp,$tmp2\n\t"
+ "vextracti128_high $tmp,$tmp2\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vpmullq $tmp2,$tmp2,$tmp\n\t"
@@ -5419,9 +5419,9 @@
"vpmullq $tmp2,$tmp2,$tmp\n\t"
"movdq $dst,$tmp2\t! mul reduction8L" %}
ins_encode %{
- __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
+ __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
- __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5520,7 +5520,7 @@
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
- "vextractf128 $tmp2,$src2\n\t"
+ "vextractf128_high $tmp2,$src2\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
@@ -5536,7 +5536,7 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5559,7 +5559,7 @@
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$src2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
@@ -5567,7 +5567,7 @@
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
@@ -5575,7 +5575,7 @@
"vmulss $dst,$dst,$tmp\n\t"
"pshufd $tmp,$tmp2,0x03\n\t"
"vmulss $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vmulss $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0x01\n\t"
"vmulss $dst,$dst,$tmp\n\t"
@@ -5591,7 +5591,7 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5599,7 +5599,7 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5607,7 +5607,7 @@
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
__ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5656,7 +5656,7 @@
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
- "vextractf128 $tmp2,$src2\n\t"
+ "vextractf128_high $tmp2,$src2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction4D" %}
@@ -5664,7 +5664,7 @@
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+ __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5679,15 +5679,15 @@
format %{ "vmulsd $dst,$dst,$src2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x1\n\t"
+ "vextractf32x4 $tmp2,$src2,0x1\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$src2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x2\n\t"
+ "vextractf32x4 $tmp2,$src2,0x2\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\n\t"
- "vextractf32x4 $tmp2,$src2, 0x3\n\t"
+ "vextractf32x4 $tmp2,$src2,0x3\n\t"
"vmulsd $dst,$dst,$tmp2\n\t"
"pshufd $tmp,$tmp2,0xE\n\t"
"vmulsd $dst,$dst,$tmp\t! mul reduction8D" %}
@@ -5695,15 +5695,15 @@
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
- __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+ __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
__ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
__ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);