--- a/hotspot/src/cpu/sparc/vm/sparc.ad Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
//
-// Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -678,18 +678,26 @@
static inline jdouble replicate_immI(int con, int count, int width) {
// Load a constant replicated "count" times with width "width"
+ assert(count*width == 8 && width <= 4, "sanity");
int bit_width = width * 8;
- jlong elt_val = con;
- elt_val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
- jlong val = elt_val;
+ jlong val = con;
+ val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
for (int i = 0; i < count - 1; i++) {
- val <<= bit_width;
- val |= elt_val;
+ val |= (val << bit_width);
}
jdouble dval = *((jdouble*) &val); // coerce to double type
return dval;
}
+static inline jdouble replicate_immF(float con) {
+ // Replicate float con 2 times and pack into vector.
+ int val = *((int*)&con);
+ jlong lval = val;
+ lval = (lval << 32) | (lval & 0xFFFFFFFFl);
+ jdouble dval = *((jdouble*) &lval); // coerce to double type
+ return dval;
+}
+
// Standard Sparc opcode form2 field breakdown
static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) {
f0 &= (1<<19)-1; // Mask displacement to 19 bits
@@ -791,6 +799,7 @@
case Assembler::stdf_op3: st_op = Op_StoreD; break;
case Assembler::ldsb_op3: ld_op = Op_LoadB; break;
+ case Assembler::ldub_op3: ld_op = Op_LoadUB; break;
case Assembler::lduh_op3: ld_op = Op_LoadUS; break;
case Assembler::ldsh_op3: ld_op = Op_LoadS; break;
case Assembler::ldx_op3: // may become LoadP or stay LoadI
@@ -799,7 +808,6 @@
case Assembler::ldd_op3: ld_op = Op_LoadL; break;
case Assembler::ldf_op3: ld_op = Op_LoadF; break;
case Assembler::lddf_op3: ld_op = Op_LoadD; break;
- case Assembler::ldub_op3: ld_op = Op_LoadB; break;
case Assembler::prefetch_op3: ld_op = Op_LoadI; break;
default: ShouldNotReachHere();
@@ -840,10 +848,7 @@
!(n->ideal_Opcode()==Op_PrefetchRead && ld_op==Op_LoadI) &&
!(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) &&
!(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) &&
- !(n->ideal_Opcode()==Op_Load2I && ld_op==Op_LoadD) &&
- !(n->ideal_Opcode()==Op_Load4C && ld_op==Op_LoadD) &&
- !(n->ideal_Opcode()==Op_Load4S && ld_op==Op_LoadD) &&
- !(n->ideal_Opcode()==Op_Load8B && ld_op==Op_LoadD) &&
+ !(n->ideal_Opcode()==Op_LoadVector && ld_op==Op_LoadD) &&
!(n->rule() == loadUB_rule)) {
verify_oops_warning(n, n->ideal_Opcode(), ld_op);
}
@@ -855,9 +860,7 @@
!(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) &&
!(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) &&
!(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) &&
- !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) &&
- !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) &&
- !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) &&
+ !(n->ideal_Opcode()==Op_StoreVector && st_op==Op_StoreD) &&
!(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) {
verify_oops_warning(n, n->ideal_Opcode(), st_op);
}
@@ -1849,16 +1852,45 @@
address last_rethrow = NULL; // debugging aid for Rethrow encoding
#endif
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+ Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+ Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+ 0, Op_RegD, 0, 0, /* Vectors */
+ Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+ 0, 0/*abio*/,
+ Op_RegP /* Return address */, 0, /* the memories */
+ Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+ 0 /*bottom*/
+};
+
// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+ assert(MaxVectorSize == 8, "");
return 8;
}
// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
+const int Matcher::vector_ideal_reg(int size) {
+ assert(MaxVectorSize == 8, "");
return Op_RegD;
}
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+ assert(is_java_primitive(bt), "only primitive type vectors");
+ return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+ return max_vector_size(bt); // Same as max.
+}
+
+// SPARC doesn't support misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+ return false;
+}
+
// USII supports fxtof through the whole range of number, USIII doesn't
const bool Matcher::convL2FSupported(void) {
return VM_Version::has_fast_fxtof();
@@ -3125,50 +3157,6 @@
__ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
%}
- enc_class enc_repl8b( iRegI src, iRegL dst ) %{
- MacroAssembler _masm(&cbuf);
- Register src_reg = reg_to_register_object($src$$reg);
- Register dst_reg = reg_to_register_object($dst$$reg);
- __ sllx(src_reg, 56, dst_reg);
- __ srlx(dst_reg, 8, O7);
- __ or3 (dst_reg, O7, dst_reg);
- __ srlx(dst_reg, 16, O7);
- __ or3 (dst_reg, O7, dst_reg);
- __ srlx(dst_reg, 32, O7);
- __ or3 (dst_reg, O7, dst_reg);
- %}
-
- enc_class enc_repl4b( iRegI src, iRegL dst ) %{
- MacroAssembler _masm(&cbuf);
- Register src_reg = reg_to_register_object($src$$reg);
- Register dst_reg = reg_to_register_object($dst$$reg);
- __ sll(src_reg, 24, dst_reg);
- __ srl(dst_reg, 8, O7);
- __ or3(dst_reg, O7, dst_reg);
- __ srl(dst_reg, 16, O7);
- __ or3(dst_reg, O7, dst_reg);
- %}
-
- enc_class enc_repl4s( iRegI src, iRegL dst ) %{
- MacroAssembler _masm(&cbuf);
- Register src_reg = reg_to_register_object($src$$reg);
- Register dst_reg = reg_to_register_object($dst$$reg);
- __ sllx(src_reg, 48, dst_reg);
- __ srlx(dst_reg, 16, O7);
- __ or3 (dst_reg, O7, dst_reg);
- __ srlx(dst_reg, 32, O7);
- __ or3 (dst_reg, O7, dst_reg);
- %}
-
- enc_class enc_repl2i( iRegI src, iRegL dst ) %{
- MacroAssembler _masm(&cbuf);
- Register src_reg = reg_to_register_object($src$$reg);
- Register dst_reg = reg_to_register_object($dst$$reg);
- __ sllx(src_reg, 32, dst_reg);
- __ srlx(dst_reg, 32, O7);
- __ or3 (dst_reg, O7, dst_reg);
- %}
-
%}
//----------FRAME--------------------------------------------------------------
@@ -5932,50 +5920,6 @@
ins_pipe(iload_mem);
%}
-// Load Aligned Packed Byte into a Double Register
-instruct loadA8B(regD dst, memory mem) %{
- match(Set dst (Load8B mem));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "LDDF $mem,$dst\t! packed8B" %}
- opcode(Assembler::lddf_op3);
- ins_encode(simple_form3_mem_reg( mem, dst ) );
- ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Char into a Double Register
-instruct loadA4C(regD dst, memory mem) %{
- match(Set dst (Load4C mem));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "LDDF $mem,$dst\t! packed4C" %}
- opcode(Assembler::lddf_op3);
- ins_encode(simple_form3_mem_reg( mem, dst ) );
- ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Short into a Double Register
-instruct loadA4S(regD dst, memory mem) %{
- match(Set dst (Load4S mem));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "LDDF $mem,$dst\t! packed4S" %}
- opcode(Assembler::lddf_op3);
- ins_encode(simple_form3_mem_reg( mem, dst ) );
- ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Int into a Double Register
-instruct loadA2I(regD dst, memory mem) %{
- match(Set dst (Load2I mem));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "LDDF $mem,$dst\t! packed2I" %}
- opcode(Assembler::lddf_op3);
- ins_encode(simple_form3_mem_reg( mem, dst ) );
- ins_pipe(floadD_mem);
-%}
-
// Load Range
instruct loadRange(iRegI dst, memory mem) %{
match(Set dst (LoadRange mem));
@@ -6599,17 +6543,6 @@
ins_pipe(fstoreF_mem_zero);
%}
-// Store Aligned Packed Bytes in Double register to memory
-instruct storeA8B(memory mem, regD src) %{
- match(Set mem (Store8B mem src));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "STDF $src,$mem\t! packed8B" %}
- opcode(Assembler::stdf_op3);
- ins_encode(simple_form3_mem_reg( mem, src ) );
- ins_pipe(fstoreD_mem_reg);
-%}
-
// Convert oop pointer into compressed form
instruct encodeHeapOop(iRegN dst, iRegP src) %{
predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
@@ -6654,62 +6587,6 @@
%}
-// Store Zero into Aligned Packed Bytes
-instruct storeA8B0(memory mem, immI0 zero) %{
- match(Set mem (Store8B mem zero));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "STX $zero,$mem\t! packed8B" %}
- opcode(Assembler::stx_op3);
- ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
- ins_pipe(fstoreD_mem_zero);
-%}
-
-// Store Aligned Packed Chars/Shorts in Double register to memory
-instruct storeA4C(memory mem, regD src) %{
- match(Set mem (Store4C mem src));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "STDF $src,$mem\t! packed4C" %}
- opcode(Assembler::stdf_op3);
- ins_encode(simple_form3_mem_reg( mem, src ) );
- ins_pipe(fstoreD_mem_reg);
-%}
-
-// Store Zero into Aligned Packed Chars/Shorts
-instruct storeA4C0(memory mem, immI0 zero) %{
- match(Set mem (Store4C mem (Replicate4C zero)));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "STX $zero,$mem\t! packed4C" %}
- opcode(Assembler::stx_op3);
- ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
- ins_pipe(fstoreD_mem_zero);
-%}
-
-// Store Aligned Packed Ints in Double register to memory
-instruct storeA2I(memory mem, regD src) %{
- match(Set mem (Store2I mem src));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "STDF $src,$mem\t! packed2I" %}
- opcode(Assembler::stdf_op3);
- ins_encode(simple_form3_mem_reg( mem, src ) );
- ins_pipe(fstoreD_mem_reg);
-%}
-
-// Store Zero into Aligned Packed Ints
-instruct storeA2I0(memory mem, immI0 zero) %{
- match(Set mem (Store2I mem zero));
- ins_cost(MEMORY_REF_COST);
- size(4);
- format %{ "STX $zero,$mem\t! packed2I" %}
- opcode(Assembler::stx_op3);
- ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
- ins_pipe(fstoreD_mem_zero);
-%}
-
-
//----------MemBar Instructions-----------------------------------------------
// Memory barrier flavors
@@ -8880,150 +8757,6 @@
ins_pipe(ialu_reg_imm);
%}
-// Replicate scalar to packed byte values in Double register
-instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{
- effect(DEF dst, USE src);
- format %{ "SLLX $src,56,$dst\n\t"
- "SRLX $dst, 8,O7\n\t"
- "OR $dst,O7,$dst\n\t"
- "SRLX $dst,16,O7\n\t"
- "OR $dst,O7,$dst\n\t"
- "SRLX $dst,32,O7\n\t"
- "OR $dst,O7,$dst\t! replicate8B" %}
- ins_encode( enc_repl8b(src, dst));
- ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed byte values in Double register
-instruct Repl8B_reg(stackSlotD dst, iRegI src) %{
- match(Set dst (Replicate8B src));
- expand %{
- iRegL tmp;
- Repl8B_reg_helper(tmp, src);
- regL_to_stkD(dst, tmp);
- %}
-%}
-
-// Replicate scalar constant to packed byte values in Double register
-instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
- match(Set dst (Replicate8B con));
- effect(KILL tmp);
- format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
- ins_encode %{
- // XXX This is a quick fix for 6833573.
- //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
- RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
- __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
- %}
- ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed char values into stack slot
-instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{
- effect(DEF dst, USE src);
- format %{ "SLLX $src,48,$dst\n\t"
- "SRLX $dst,16,O7\n\t"
- "OR $dst,O7,$dst\n\t"
- "SRLX $dst,32,O7\n\t"
- "OR $dst,O7,$dst\t! replicate4C" %}
- ins_encode( enc_repl4s(src, dst) );
- ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed char values into stack slot
-instruct Repl4C_reg(stackSlotD dst, iRegI src) %{
- match(Set dst (Replicate4C src));
- expand %{
- iRegL tmp;
- Repl4C_reg_helper(tmp, src);
- regL_to_stkD(dst, tmp);
- %}
-%}
-
-// Replicate scalar constant to packed char values in Double register
-instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{
- match(Set dst (Replicate4C con));
- effect(KILL tmp);
- format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %}
- ins_encode %{
- // XXX This is a quick fix for 6833573.
- //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
- RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
- __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
- %}
- ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed short values into stack slot
-instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{
- effect(DEF dst, USE src);
- format %{ "SLLX $src,48,$dst\n\t"
- "SRLX $dst,16,O7\n\t"
- "OR $dst,O7,$dst\n\t"
- "SRLX $dst,32,O7\n\t"
- "OR $dst,O7,$dst\t! replicate4S" %}
- ins_encode( enc_repl4s(src, dst) );
- ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed short values into stack slot
-instruct Repl4S_reg(stackSlotD dst, iRegI src) %{
- match(Set dst (Replicate4S src));
- expand %{
- iRegL tmp;
- Repl4S_reg_helper(tmp, src);
- regL_to_stkD(dst, tmp);
- %}
-%}
-
-// Replicate scalar constant to packed short values in Double register
-instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
- match(Set dst (Replicate4S con));
- effect(KILL tmp);
- format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
- ins_encode %{
- // XXX This is a quick fix for 6833573.
- //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
- RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
- __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
- %}
- ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed int values in Double register
-instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{
- effect(DEF dst, USE src);
- format %{ "SLLX $src,32,$dst\n\t"
- "SRLX $dst,32,O7\n\t"
- "OR $dst,O7,$dst\t! replicate2I" %}
- ins_encode( enc_repl2i(src, dst));
- ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed int values in Double register
-instruct Repl2I_reg(stackSlotD dst, iRegI src) %{
- match(Set dst (Replicate2I src));
- expand %{
- iRegL tmp;
- Repl2I_reg_helper(tmp, src);
- regL_to_stkD(dst, tmp);
- %}
-%}
-
-// Replicate scalar zero constant to packed int values in Double register
-instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
- match(Set dst (Replicate2I con));
- effect(KILL tmp);
- format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
- ins_encode %{
- // XXX This is a quick fix for 6833573.
- //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
- RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
- __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
- %}
- ins_pipe(loadConFD);
-%}
-
//----------Control Flow Instructions------------------------------------------
// Compare Instructions
// Compare Integers
@@ -10742,6 +10475,308 @@
ins_pipe(istore_mem_reg);
%}
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load Aligned Packed values into a Double Register
+instruct loadV8(regD dst, memory mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 8);
+ match(Set dst (LoadVector mem));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "LDDF $mem,$dst\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ ldf(FloatRegisterImpl::D, $mem$$Address, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(floadD_mem);
+%}
+
+// Store Vector in Double register to memory
+instruct storeV8(memory mem, regD src) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem src));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STDF $src,$mem\t! store vector (8 bytes)" %}
+ ins_encode %{
+ __ stf(FloatRegisterImpl::D, as_DoubleFloatRegister($src$$reg), $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_reg);
+%}
+
+// Store Zero into vector in memory
+instruct storeV8B_zero(memory mem, immI0 zero) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem (ReplicateB zero)));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STX $zero,$mem\t! store zero vector (8 bytes)" %}
+ ins_encode %{
+ __ stx(G0, $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV4S_zero(memory mem, immI0 zero) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem (ReplicateS zero)));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STX $zero,$mem\t! store zero vector (4 shorts)" %}
+ ins_encode %{
+ __ stx(G0, $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV2I_zero(memory mem, immI0 zero) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem (ReplicateI zero)));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STX $zero,$mem\t! store zero vector (2 ints)" %}
+ ins_encode %{
+ __ stx(G0, $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV2F_zero(memory mem, immF0 zero) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem (ReplicateF zero)));
+ ins_cost(MEMORY_REF_COST);
+ size(4);
+ format %{ "STX $zero,$mem\t! store zero vector (2 floats)" %}
+ ins_encode %{
+ __ stx(G0, $mem$$Address);
+ %}
+ ins_pipe(fstoreD_mem_zero);
+%}
+
+// Replicate scalar to packed byte values into Double register
+instruct Repl8B_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+ predicate(n->as_Vector()->length() == 8 && UseVIS >= 3);
+ match(Set dst (ReplicateB src));
+ effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+ format %{ "SLLX $src,56,$tmp\n\t"
+ "SRLX $tmp, 8,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\n\t"
+ "SRLX $tmp,16,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\n\t"
+ "SRLX $tmp,32,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\t! replicate8B\n\t"
+ "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+ Register Rtmp2 = $tmp2$$Register;
+ __ sllx(Rsrc, 56, Rtmp);
+ __ srlx(Rtmp, 8, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ srlx(Rtmp, 16, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ srlx(Rtmp, 32, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed byte values into Double stack
+instruct Repl8B_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+ predicate(n->as_Vector()->length() == 8 && UseVIS < 3);
+ match(Set dst (ReplicateB src));
+ effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+ format %{ "SLLX $src,56,$tmp\n\t"
+ "SRLX $tmp, 8,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\n\t"
+ "SRLX $tmp,16,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\n\t"
+ "SRLX $tmp,32,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\t! replicate8B\n\t"
+ "STX $tmp,$dst\t! regL to stkD" %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+ Register Rtmp2 = $tmp2$$Register;
+ __ sllx(Rsrc, 56, Rtmp);
+ __ srlx(Rtmp, 8, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ srlx(Rtmp, 16, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ srlx(Rtmp, 32, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+ __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateB con));
+ effect(KILL tmp);
+ format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
+ ins_encode %{
+ // XXX This is a quick fix for 6833573.
+ //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
+ RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
+ __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed char/short values into Double register
+instruct Repl4S_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+ predicate(n->as_Vector()->length() == 4 && UseVIS >= 3);
+ match(Set dst (ReplicateS src));
+ effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+ format %{ "SLLX $src,48,$tmp\n\t"
+ "SRLX $tmp,16,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\n\t"
+ "SRLX $tmp,32,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\t! replicate4S\n\t"
+ "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+ Register Rtmp2 = $tmp2$$Register;
+ __ sllx(Rsrc, 48, Rtmp);
+ __ srlx(Rtmp, 16, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ srlx(Rtmp, 32, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed char/short values into Double stack
+instruct Repl4S_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+ predicate(n->as_Vector()->length() == 4 && UseVIS < 3);
+ match(Set dst (ReplicateS src));
+ effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+ format %{ "SLLX $src,48,$tmp\n\t"
+ "SRLX $tmp,16,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\n\t"
+ "SRLX $tmp,32,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\t! replicate4S\n\t"
+ "STX $tmp,$dst\t! regL to stkD" %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+ Register Rtmp2 = $tmp2$$Register;
+ __ sllx(Rsrc, 48, Rtmp);
+ __ srlx(Rtmp, 16, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ srlx(Rtmp, 32, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+ __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar constant to packed char/short values in Double register
+instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateS con));
+ effect(KILL tmp);
+ format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
+ ins_encode %{
+ // XXX This is a quick fix for 6833573.
+ //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
+ RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
+ __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed int values into Double register
+instruct Repl2I_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+ predicate(n->as_Vector()->length() == 2 && UseVIS >= 3);
+ match(Set dst (ReplicateI src));
+ effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+ format %{ "SLLX $src,32,$tmp\n\t"
+ "SRLX $tmp,32,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\t! replicate2I\n\t"
+ "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+ Register Rtmp2 = $tmp2$$Register;
+ __ sllx(Rsrc, 32, Rtmp);
+ __ srlx(Rtmp, 32, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed int values into Double stack
+instruct Repl2I_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+ predicate(n->as_Vector()->length() == 2 && UseVIS < 3);
+ match(Set dst (ReplicateI src));
+ effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+ format %{ "SLLX $src,32,$tmp\n\t"
+ "SRLX $tmp,32,$tmp2\n\t"
+ "OR $tmp,$tmp2,$tmp\t! replicate2I\n\t"
+ "STX $tmp,$dst\t! regL to stkD" %}
+ ins_encode %{
+ Register Rsrc = $src$$Register;
+ Register Rtmp = $tmp$$Register;
+ Register Rtmp2 = $tmp2$$Register;
+ __ sllx(Rsrc, 32, Rtmp);
+ __ srlx(Rtmp, 32, Rtmp2);
+ __ or3 (Rtmp, Rtmp2, Rtmp);
+ __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+ __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar zero constant to packed int values in Double register
+instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI con));
+ effect(KILL tmp);
+ format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
+ ins_encode %{
+ // XXX This is a quick fix for 6833573.
+ //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
+ RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
+ __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed float values into Double stack
+instruct Repl2F_stk(stackSlotD dst, regF src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF src));
+ ins_cost(MEMORY_REF_COST*2);
+ format %{ "STF $src,$dst.hi\t! packed2F\n\t"
+ "STF $src,$dst.lo" %}
+ opcode(Assembler::stf_op3);
+ ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, src));
+ ins_pipe(fstoreF_stk_reg);
+%}
+
+// Replicate scalar zero constant to packed float values in Double register
+instruct Repl2F_immF(regD dst, immF con, o7RegI tmp) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF con));
+ effect(KILL tmp);
+ format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2F($con)" %}
+ ins_encode %{
+ // XXX This is a quick fix for 6833573.
+ //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immF($con$$constant)), $dst$$FloatRegister);
+ RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immF($con$$constant)), $tmp$$Register);
+ __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+ %}
+ ins_pipe(loadConFD);
+%}
+
//----------PEEPHOLE RULES-----------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -217,6 +217,8 @@
// Currently not supported anywhere.
FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+ MaxVectorSize = 8;
+
assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
#endif
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1637,6 +1637,13 @@
emit_byte(0xC0 | encode);
}
+void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
+ emit_byte(0x16);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::movb(Register dst, Address src) {
NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
InstructionMark im(this);
@@ -1686,6 +1693,14 @@
emit_operand(dst, src);
}
+void Assembler::movdl(Address dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionMark im(this);
+ simd_prefix(dst, src, VEX_SIMD_66);
+ emit_byte(0x7E);
+ emit_operand(src, dst);
+}
+
void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
@@ -1716,6 +1731,35 @@
emit_operand(src, dst);
}
+// Move Unaligned 256bit Vector
+void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+ assert(UseAVX, "");
+ bool vector256 = true;
+ int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+ emit_byte(0x6F);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmovdqu(XMMRegister dst, Address src) {
+ assert(UseAVX, "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+ emit_byte(0x6F);
+ emit_operand(dst, src);
+}
+
+void Assembler::vmovdqu(Address dst, XMMRegister src) {
+ assert(UseAVX, "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ // swap src<->dst for encoding
+ assert(src != xnoreg, "sanity");
+ vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
+ emit_byte(0x7F);
+ emit_operand(src, dst);
+}
+
// Uses zero extension on 64bit
void Assembler::movl(Register dst, int32_t imm32) {
@@ -3112,6 +3156,13 @@
emit_operand(dst, src);
}
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
+ emit_byte(0x57);
+ emit_byte(0xC0 | encode);
+}
+
void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
@@ -3120,6 +3171,30 @@
emit_operand(dst, src);
}
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+ assert(VM_Version::supports_avx(), "");
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256);
+ emit_byte(0x57);
+ emit_byte(0xC0 | encode);
+}
+
+void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ bool vector256 = true;
+ int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+ emit_byte(0x18);
+ emit_byte(0xC0 | encode);
+ // 0x00 - insert into lower 128 bits
+ // 0x01 - insert into upper 128 bits
+ emit_byte(0x01);
+}
+
+void Assembler::vzeroupper() {
+ assert(VM_Version::supports_avx(), "");
+ (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
+ emit_byte(0x77);
+}
+
#ifndef _LP64
// 32bit only pieces of the assembler
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -591,8 +591,9 @@
void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
VexSimdPrefix pre, bool vector256 = false) {
- vex_prefix(src, nds->encoding(), dst->encoding(),
- pre, VEX_OPCODE_0F, false, vector256);
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
}
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
@@ -600,9 +601,12 @@
bool vex_w, bool vector256);
int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
- VexSimdPrefix pre, bool vector256 = false) {
- return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
- pre, VEX_OPCODE_0F, false, vector256);
+ VexSimdPrefix pre, bool vector256 = false,
+ VexOpcode opc = VEX_OPCODE_0F) {
+ int src_enc = src->encoding();
+ int dst_enc = dst->encoding();
+ int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+ return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
}
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
@@ -1261,6 +1265,7 @@
void movdl(XMMRegister dst, Register src);
void movdl(Register dst, XMMRegister src);
void movdl(XMMRegister dst, Address src);
+ void movdl(Address dst, XMMRegister src);
// Move Double Quadword
void movdq(XMMRegister dst, Register src);
@@ -1274,6 +1279,14 @@
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
+ // Move Unaligned 256bit Vector
+ void vmovdqu(Address dst, XMMRegister src);
+ void vmovdqu(XMMRegister dst, Address src);
+ void vmovdqu(XMMRegister dst, XMMRegister src);
+
+ // Move lower 64bit to high 64bit in 128bit register
+ void movlhps(XMMRegister dst, XMMRegister src);
+
void movl(Register dst, int32_t imm32);
void movl(Address dst, int32_t imm32);
void movl(Register dst, Register src);
@@ -1615,6 +1628,17 @@
void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+ // AVX Vector instrucitons.
+ void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+ void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+
+ // AVX instruction which is used to clear upper 128 bits of YMM registers and
+ // to avoid transaction penalty between AVX and SSE states. There is no
+ // penalty if legacy SSE instructions are encoded using VEX prefix because
+ // they always clear upper 128 bits. It should be used before calling
+ // runtime code and native libraries.
+ void vzeroupper();
protected:
// Next instructions require address alignment 16 bytes SSE mode.
@@ -2529,9 +2553,13 @@
void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+ // AVX Vector instructions
+
+ void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+ void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
--- a/hotspot/src/cpu/x86/vm/register_x86.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/register_x86.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
2 * FloatRegisterImpl::number_of_registers;
const int ConcreteRegisterImpl::max_xmm = ConcreteRegisterImpl::max_fpr +
- 2 * XMMRegisterImpl::number_of_registers;
+ 8 * XMMRegisterImpl::number_of_registers;
const char* RegisterImpl::name() const {
const char* names[number_of_registers] = {
#ifndef AMD64
--- a/hotspot/src/cpu/x86/vm/register_x86.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/register_x86.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -158,7 +158,7 @@
XMMRegister successor() const { return as_XMMRegister(encoding() + 1); }
// accessors
- int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+ int encoding() const { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; }
bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
const char* name() const;
};
@@ -216,7 +216,7 @@
RegisterImpl::number_of_registers + // "H" half of a 64bit register
#endif // AMD64
2 * FloatRegisterImpl::number_of_registers +
- 2 * XMMRegisterImpl::number_of_registers +
+ 8 * XMMRegisterImpl::number_of_registers +
1 // eflags
};
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -467,6 +467,32 @@
if (!supports_avx ()) // Drop to 0 if no AVX support
UseAVX = 0;
+#ifdef COMPILER2
+ if (UseFPUForSpilling) {
+ if (UseSSE < 2) {
+ // Only supported with SSE2+
+ FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+ }
+ }
+ if (MaxVectorSize > 0) {
+ if (!is_power_of_2(MaxVectorSize)) {
+ warning("MaxVectorSize must be a power of 2");
+ FLAG_SET_DEFAULT(MaxVectorSize, 32);
+ }
+ if (MaxVectorSize > 32) {
+ FLAG_SET_DEFAULT(MaxVectorSize, 32);
+ }
+ if (MaxVectorSize > 16 && UseAVX == 0) {
+ // Only supported with AVX+
+ FLAG_SET_DEFAULT(MaxVectorSize, 16);
+ }
+ if (UseSSE < 2) {
+ // Only supported with SSE2+
+ FLAG_SET_DEFAULT(MaxVectorSize, 0);
+ }
+ }
+#endif
+
// On new cpus instructions which update whole XMM register should be used
// to prevent partial register stall due to dependencies on high half.
//
@@ -544,6 +570,12 @@
}
}
+#ifdef COMPILER2
+ if (MaxVectorSize > 16) {
+ // Limit vectors size to 16 bytes on current AMD cpus.
+ FLAG_SET_DEFAULT(MaxVectorSize, 16);
+ }
+#endif // COMPILER2
}
if( is_intel() ) { // Intel cpus specific settings
@@ -606,15 +638,6 @@
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
}
-#ifdef COMPILER2
- if (UseFPUForSpilling) {
- if (UseSSE < 2) {
- // Only supported with SSE2+
- FLAG_SET_DEFAULT(UseFPUForSpilling, false);
- }
- }
-#endif
-
assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
--- a/hotspot/src/cpu/x86/vm/vmreg_x86.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/vmreg_x86.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,8 +48,9 @@
XMMRegister xreg = ::as_XMMRegister(0);
for ( ; i < ConcreteRegisterImpl::max_xmm ; ) {
- regName[i++] = xreg->name();
- regName[i++] = xreg->name();
+ for (int j = 0 ; j < 8 ; j++) {
+ regName[i++] = xreg->name();
+ }
xreg = xreg->successor();
}
for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
--- a/hotspot/src/cpu/x86/vm/vmreg_x86.inline.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/vmreg_x86.inline.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,7 @@
}
inline VMReg XMMRegisterImpl::as_VMReg() {
- return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr);
+ return VMRegImpl::as_VMReg((encoding() << 3) + ConcreteRegisterImpl::max_fpr);
}
@@ -75,7 +75,7 @@
inline XMMRegister VMRegImpl::as_XMMRegister() {
assert( is_XMMRegister() && is_even(value()), "must be" );
// Yuk
- return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 1);
+ return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 3);
}
inline bool VMRegImpl::is_concrete() {
--- a/hotspot/src/cpu/x86/vm/x86.ad Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/x86.ad Thu Jun 28 10:35:28 2012 -0700
@@ -24,6 +24,456 @@
// X86 Common Architecture Description File
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def" name ( register save type, C convention save type,
+// ideal register type, encoding );
+// Register Save Types:
+//
+// NS = No-Save: The register allocator assumes that these registers
+// can be used without saving upon entry to the method, &
+// that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call: The register allocator assumes that these registers
+// can be used without saving upon entry to the method,
+// but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, but they do not need to be saved at call
+// sites.
+//
+// AS = Always-Save: The register allocator assumes that these registers
+// must be saved before using them upon entry to the
+// method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
+// Word a in each register holds a Float, words ab hold a Double.
+// The whole registers are used in SSE4.2 version intrinsics,
+// array copy stubs and superword operations (see UseSSE42Intrinsics,
+// UseXMMForArrayCopy and UseSuperword flags).
+// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
+// Linux ABI: No register preserved across function calls
+// XMM0-XMM7 might hold parameters
+// Windows ABI: XMM6-XMM15 preserved across function calls
+// XMM0-XMM3 might hold parameters
+
+reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
+reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
+reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next());
+reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next());
+reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next());
+reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
+reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
+reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next());
+reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next());
+reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next());
+reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
+reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
+reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next());
+reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next());
+reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next());
+reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
+reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
+reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next());
+reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next());
+reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next());
+reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
+reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
+reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next());
+reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next());
+reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next());
+reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
+reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
+reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next());
+reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next());
+reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next());
+reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#ifdef _WIN64
+
+reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
+reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
+reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
+reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
+reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
+reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
+reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
+reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
+reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
+reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
+reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
+reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
+reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
+reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
+reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
+reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
+reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
+reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
+reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
+reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
+reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
+reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
+reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
+reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
+reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
+reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
+reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
+reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
+reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
+reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
+reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
+reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
+reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
+reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
+reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
+reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
+reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
+reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
+reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
+reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
+reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
+reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
+reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
+reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
+reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
+reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
+reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
+reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
+reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
+reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
+reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#else // _WIN64
+
+reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
+reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
+reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
+reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
+reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
+reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
+reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
+reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
+reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
+reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
+reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#ifdef _LP64
+
+reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
+reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
+reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
+reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
+reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
+reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
+reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
+reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
+reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
+reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
+reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
+reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
+reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
+reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
+reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
+reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
+reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
+reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
+reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
+reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
+reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
+reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
+reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
+reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
+reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
+reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
+reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
+reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
+reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
+reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
+reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
+reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
+reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
+reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
+reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
+reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
+reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
+reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
+reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
+reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
+reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#endif // _LP64
+
+#endif // _WIN64
+
+#ifdef _LP64
+reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
+#else
+reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
+#endif // _LP64
+
+alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+#endif
+ );
+
+// flags allocation class should be last.
+alloc_class chunk2(RFLAGS);
+
+// Singleton class for condition codes
+reg_class int_flags(RFLAGS);
+
+// Class for all float registers
+reg_class float_reg(XMM0,
+ XMM1,
+ XMM2,
+ XMM3,
+ XMM4,
+ XMM5,
+ XMM6,
+ XMM7
+#ifdef _LP64
+ ,XMM8,
+ XMM9,
+ XMM10,
+ XMM11,
+ XMM12,
+ XMM13,
+ XMM14,
+ XMM15
+#endif
+ );
+
+// Class for all double registers
+reg_class double_reg(XMM0, XMM0b,
+ XMM1, XMM1b,
+ XMM2, XMM2b,
+ XMM3, XMM3b,
+ XMM4, XMM4b,
+ XMM5, XMM5b,
+ XMM6, XMM6b,
+ XMM7, XMM7b
+#ifdef _LP64
+ ,XMM8, XMM8b,
+ XMM9, XMM9b,
+ XMM10, XMM10b,
+ XMM11, XMM11b,
+ XMM12, XMM12b,
+ XMM13, XMM13b,
+ XMM14, XMM14b,
+ XMM15, XMM15b
+#endif
+ );
+
+// Class for all 32bit vector registers
+reg_class vectors_reg(XMM0,
+ XMM1,
+ XMM2,
+ XMM3,
+ XMM4,
+ XMM5,
+ XMM6,
+ XMM7
+#ifdef _LP64
+ ,XMM8,
+ XMM9,
+ XMM10,
+ XMM11,
+ XMM12,
+ XMM13,
+ XMM14,
+ XMM15
+#endif
+ );
+
+// Class for all 64bit vector registers
+reg_class vectord_reg(XMM0, XMM0b,
+ XMM1, XMM1b,
+ XMM2, XMM2b,
+ XMM3, XMM3b,
+ XMM4, XMM4b,
+ XMM5, XMM5b,
+ XMM6, XMM6b,
+ XMM7, XMM7b
+#ifdef _LP64
+ ,XMM8, XMM8b,
+ XMM9, XMM9b,
+ XMM10, XMM10b,
+ XMM11, XMM11b,
+ XMM12, XMM12b,
+ XMM13, XMM13b,
+ XMM14, XMM14b,
+ XMM15, XMM15b
+#endif
+ );
+
+// Class for all 128bit vector registers
+reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
+ XMM1, XMM1b, XMM1c, XMM1d,
+ XMM2, XMM2b, XMM2c, XMM2d,
+ XMM3, XMM3b, XMM3c, XMM3d,
+ XMM4, XMM4b, XMM4c, XMM4d,
+ XMM5, XMM5b, XMM5c, XMM5d,
+ XMM6, XMM6b, XMM6c, XMM6d,
+ XMM7, XMM7b, XMM7c, XMM7d
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d,
+ XMM9, XMM9b, XMM9c, XMM9d,
+ XMM10, XMM10b, XMM10c, XMM10d,
+ XMM11, XMM11b, XMM11c, XMM11d,
+ XMM12, XMM12b, XMM12c, XMM12d,
+ XMM13, XMM13b, XMM13c, XMM13d,
+ XMM14, XMM14b, XMM14c, XMM14d,
+ XMM15, XMM15b, XMM15c, XMM15d
+#endif
+ );
+
+// Class for all 256bit vector registers
+reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
+ XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
+ XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
+ XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
+ XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
+ XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
+ XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
+ XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
+#ifdef _LP64
+ ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
+ XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
+ XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+ XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+ XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+ XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+ XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+ XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+#endif
+ );
+
+%}
+
source %{
// Float masks come from different places depending on platform.
#ifdef _LP64
@@ -38,6 +488,252 @@
static address double_signflip() { return (address)double_signflip_pool; }
#endif
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+ Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+ Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+ Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
+ Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+ 0, 0/*abio*/,
+ Op_RegP /* Return address */, 0, /* the memories */
+ Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+ 0 /*bottom*/
+};
+
+// Max vector size in bytes. 0 if not supported.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+ assert(is_java_primitive(bt), "only primitive type vectors");
+ if (UseSSE < 2) return 0;
+ // SSE2 supports 128bit vectors for all types.
+ // AVX2 supports 256bit vectors for all types.
+ int size = (UseAVX > 1) ? 32 : 16;
+ // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
+ if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
+ size = 32;
+ // Use flag to limit vector size.
+ size = MIN2(size,(int)MaxVectorSize);
+ // Minimum 2 values in vector (or 4 for bytes).
+ switch (bt) {
+ case T_DOUBLE:
+ case T_LONG:
+ if (size < 16) return 0;
+ case T_FLOAT:
+ case T_INT:
+ if (size < 8) return 0;
+ case T_BOOLEAN:
+ case T_BYTE:
+ case T_CHAR:
+ case T_SHORT:
+ if (size < 4) return 0;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ return size;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+ return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+ int max_size = max_vector_size(bt);
+ // Min size which can be loaded into vector is 4 bytes.
+ int size = (type2aelembytes(bt) == 1) ? 4 : 2;
+ return MIN2(size,max_size);
+}
+
+// Vector ideal reg corresponding to specidied size in bytes
+const int Matcher::vector_ideal_reg(int size) {
+ assert(MaxVectorSize >= size, "");
+ switch(size) {
+ case 4: return Op_VecS;
+ case 8: return Op_VecD;
+ case 16: return Op_VecX;
+ case 32: return Op_VecY;
+ }
+ ShouldNotReachHere();
+ return 0;
+}
+
+// x86 supports misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+ return !AlignVector; // can be changed by flag
+}
+
+// Helper methods for MachSpillCopyNode::implementation().
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+ int src_hi, int dst_hi, uint ireg, outputStream* st) {
+ // In 64-bit VM size calculation is very complex. Emitting instructions
+ // into scratch buffer is used to get size in 64-bit VM.
+ LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
+ assert(ireg == Op_VecS || // 32bit vector
+ (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
+ (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
+ "no non-adjacent vector moves" );
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ int offset = __ offset();
+ switch (ireg) {
+ case Op_VecS: // copy whole register
+ case Op_VecD:
+ case Op_VecX:
+ __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+ break;
+ case Op_VecY:
+ __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ int size = __ offset() - offset;
+#ifdef ASSERT
+ // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+ assert(!do_size || size == 4, "incorrect size calculattion");
+#endif
+ return size;
+#ifndef PRODUCT
+ } else if (!do_size) {
+ switch (ireg) {
+ case Op_VecS:
+ case Op_VecD:
+ case Op_VecX:
+ st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+ break;
+ case Op_VecY:
+ st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+#endif
+ }
+ // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
+ return 4;
+}
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+ int stack_offset, int reg, uint ireg, outputStream* st) {
+ // In 64-bit VM size calculation is very complex. Emitting instructions
+ // into scratch buffer is used to get size in 64-bit VM.
+ LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ int offset = __ offset();
+ if (is_load) {
+ switch (ireg) {
+ case Op_VecS:
+ __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ break;
+ case Op_VecD:
+ __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ break;
+ case Op_VecX:
+ __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ break;
+ case Op_VecY:
+ __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else { // store
+ switch (ireg) {
+ case Op_VecS:
+ __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ break;
+ case Op_VecD:
+ __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ break;
+ case Op_VecX:
+ __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ break;
+ case Op_VecY:
+ __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ int size = __ offset() - offset;
+#ifdef ASSERT
+ int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+ // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+ assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
+#endif
+ return size;
+#ifndef PRODUCT
+ } else if (!do_size) {
+ if (is_load) {
+ switch (ireg) {
+ case Op_VecS:
+ st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+ break;
+ case Op_VecD:
+ st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+ break;
+ case Op_VecX:
+ st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+ break;
+ case Op_VecY:
+ st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ } else { // store
+ switch (ireg) {
+ case Op_VecS:
+ st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+ break;
+ case Op_VecD:
+ st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+ break;
+ case Op_VecX:
+ st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+ break;
+ case Op_VecY:
+ st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+#endif
+ }
+ int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+ // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+ return 5+offset_size;
+}
+
+static inline jfloat replicate4_imm(int con, int width) {
+ // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
+ assert(width == 1 || width == 2, "only byte or short types here");
+ int bit_width = width * 8;
+ jint val = con;
+ val &= (1 << bit_width) - 1; // mask off sign bits
+ while(bit_width < 32) {
+ val |= (val << bit_width);
+ bit_width <<= 1;
+ }
+ jfloat fval = *((jfloat*) &val); // coerce to float type
+ return fval;
+}
+
+static inline jdouble replicate8_imm(int con, int width) {
+ // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
+ assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
+ int bit_width = width * 8;
+ jlong val = con;
+ val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
+ while(bit_width < 64) {
+ val |= (val << bit_width);
+ bit_width <<= 1;
+ }
+ jdouble dval = *((jdouble*) &val); // coerce to double type
+ return dval;
+}
+
#ifndef PRODUCT
void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
st->print("nop \t# %d bytes pad for loops and calls", _count);
@@ -103,6 +799,46 @@
%}
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+// Vectors
+operand vecS() %{
+ constraint(ALLOC_IN_RC(vectors_reg));
+ match(VecS);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vecD() %{
+ constraint(ALLOC_IN_RC(vectord_reg));
+ match(VecD);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vecX() %{
+ constraint(ALLOC_IN_RC(vectorx_reg));
+ match(VecX);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+operand vecY() %{
+ constraint(ALLOC_IN_RC(vectory_reg));
+ match(VecY);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
+
// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
// ============================================================================
@@ -852,3 +1588,797 @@
ins_pipe(pipe_slow);
%}
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load vectors (4 bytes long)
+instruct loadV4(vecS dst, memory mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 4);
+ match(Set dst (LoadVector mem));
+ ins_cost(125);
+ format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $mem$$Address);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Load vectors (8 bytes long)
+instruct loadV8(vecD dst, memory mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 8);
+ match(Set dst (LoadVector mem));
+ ins_cost(125);
+ format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Load vectors (16 bytes long)
+instruct loadV16(vecX dst, memory mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(125);
+ format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
+ ins_encode %{
+ __ movdqu($dst$$XMMRegister, $mem$$Address);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Load vectors (32 bytes long)
+instruct loadV32(vecY dst, memory mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 32);
+ match(Set dst (LoadVector mem));
+ ins_cost(125);
+ format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
+ ins_encode %{
+ __ vmovdqu($dst$$XMMRegister, $mem$$Address);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Store vectors
+instruct storeV4(memory mem, vecS src) %{
+ predicate(n->as_StoreVector()->memory_size() == 4);
+ match(Set mem (StoreVector mem src));
+ ins_cost(145);
+ format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
+ ins_encode %{
+ __ movdl($mem$$Address, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeV8(memory mem, vecD src) %{
+ predicate(n->as_StoreVector()->memory_size() == 8);
+ match(Set mem (StoreVector mem src));
+ ins_cost(145);
+ format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeV16(memory mem, vecX src) %{
+ predicate(n->as_StoreVector()->memory_size() == 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(145);
+ format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
+ ins_encode %{
+ __ movdqu($mem$$Address, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeV32(memory mem, vecY src) %{
+ predicate(n->as_StoreVector()->memory_size() == 32);
+ match(Set mem (StoreVector mem src));
+ ins_cost(145);
+ format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
+ ins_encode %{
+ __ vmovdqu($mem$$Address, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar to be vector
+instruct Repl4B(vecS dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\t! replicate4B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B(vecD dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\t! replicate8B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B(vecX dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "movlhps $dst,$dst\t! replicate16B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B(vecY dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 32);
+ match(Set dst (ReplicateB src));
+ format %{ "movd $dst,$src\n\t"
+ "punpcklbw $dst,$dst\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate32B" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar immediate to be vector by loading from const table.
+instruct Repl4B_imm(vecS dst, immI con) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateB con));
+ format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B_imm(vecD dst, immI con) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateB con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_imm(vecX dst, immI con) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateB con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t"
+ "movlhps $dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_imm(vecY dst, immI con) %{
+ predicate(n->as_Vector()->length() == 32);
+ match(Set dst (ReplicateB con));
+ format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar zero to be vector
+instruct Repl4B_zero(vecS dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateB zero));
+ format %{ "pxor $dst,$dst\t! replicate4B zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8B_zero(vecD dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateB zero));
+ format %{ "pxor $dst,$dst\t! replicate8B zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16B_zero(vecX dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateB zero));
+ format %{ "pxor $dst,$dst\t! replicate16B zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl32B_zero(vecY dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 32);
+ match(Set dst (ReplicateB zero));
+ format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+ bool vector256 = true;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate char/short (2 byte) scalar to be vector
+instruct Repl2S(vecS dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateS src));
+ format %{ "movd $dst,$src\n\t"
+ "pshuflw $dst,$dst,0x00\t! replicate2S" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S(vecD dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateS src));
+ format %{ "movd $dst,$src\n\t"
+ "pshuflw $dst,$dst,0x00\t! replicate4S" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S(vecX dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateS src));
+ format %{ "movd $dst,$src\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "movlhps $dst,$dst\t! replicate8S" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S(vecY dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateS src));
+ format %{ "movd $dst,$src\n\t"
+ "pshuflw $dst,$dst,0x00\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate16S" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2S_imm(vecS dst, immI con) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateS con));
+ format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_imm(vecD dst, immI con) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateS con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S_imm(vecX dst, immI con) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateS con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t"
+ "movlhps $dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_imm(vecY dst, immI con) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateS con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate char/short (2 byte) scalar zero to be vector
+instruct Repl2S_zero(vecS dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateS zero));
+ format %{ "pxor $dst,$dst\t! replicate2S zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_zero(vecD dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateS zero));
+ format %{ "pxor $dst,$dst\t! replicate4S zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S_zero(vecX dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateS zero));
+ format %{ "pxor $dst,$dst\t! replicate8S zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16S_zero(vecY dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (ReplicateS zero));
+ format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+ bool vector256 = true;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate integer (4 byte) scalar to be vector
+instruct Repl2I(vecD dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\t! replicate2I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I(vecX dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\t! replicate4I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I(vecY dst, rRegI src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateI src));
+ format %{ "movd $dst,$src\n\t"
+ "pshufd $dst,$dst,0x00\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2I_imm(vecD dst, immI con) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_imm(vecX dst, immI con) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateI con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t"
+ "movlhps $dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_imm(vecY dst, immI con) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateI con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Integer could be loaded into xmm register directly from memory.
+instruct Repl2I_mem(vecD dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI (LoadVector mem)));
+ format %{ "movd $dst,$mem\n\t"
+ "pshufd $dst,$dst,0x00\t! replicate2I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $mem$$Address);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_mem(vecX dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateI (LoadVector mem)));
+ format %{ "movd $dst,$mem\n\t"
+ "pshufd $dst,$dst,0x00\t! replicate4I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $mem$$Address);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_mem(vecY dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateI (LoadVector mem)));
+ format %{ "movd $dst,$mem\n\t"
+ "pshufd $dst,$dst,0x00\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $mem$$Address);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate integer (4 byte) scalar zero to be vector
+instruct Repl2I_zero(vecD dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateI zero));
+ format %{ "pxor $dst,$dst\t! replicate2I" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_zero(vecX dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateI zero));
+ format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8I_zero(vecY dst, immI0 zero) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateI zero));
+ format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+ bool vector256 = true;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate long (8 byte) scalar to be vector
+#ifdef _LP64
+instruct Repl2L(vecX dst, rRegL src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL src));
+ format %{ "movdq $dst,$src\n\t"
+ "movlhps $dst,$dst\t! replicate2L" %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L(vecY dst, rRegL src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateL src));
+ format %{ "movdq $dst,$src\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#else // _LP64
+instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL src));
+ effect(TEMP dst, USE src, TEMP tmp);
+ format %{ "movdl $dst,$src.lo\n\t"
+ "movdl $tmp,$src.hi\n\t"
+ "punpckldq $dst,$tmp\n\t"
+ "movlhps $dst,$dst\t! replicate2L"%}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateL src));
+ effect(TEMP dst, USE src, TEMP tmp);
+ format %{ "movdl $dst,$src.lo\n\t"
+ "movdl $tmp,$src.hi\n\t"
+ "punpckldq $dst,$tmp\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif // _LP64
+
+// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2L_imm(vecX dst, immL con) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t"
+ "movlhps $dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress($con));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_imm(vecY dst, immL con) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateL con));
+ format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $constantaddress($con));
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Long could be loaded into xmm register directly from memory.
+instruct Repl2L_mem(vecX dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL (LoadVector mem)));
+ format %{ "movq $dst,$mem\n\t"
+ "movlhps $dst,$dst\t! replicate2L" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_mem(vecY dst, memory mem) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateL (LoadVector mem)));
+ format %{ "movq $dst,$mem\n\t"
+ "movlhps $dst,$dst\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate long (8 byte) scalar zero to be vector
+instruct Repl2L_zero(vecX dst, immL0 zero) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateL zero));
+ format %{ "pxor $dst,$dst\t! replicate2L zero" %}
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4L_zero(vecY dst, immL0 zero) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateL zero));
+ format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %}
+ ins_encode %{
+ // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+ bool vector256 = true;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate float (4 byte) scalar to be vector
+instruct Repl2F(vecD dst, regF src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF src));
+ format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F(vecX dst, regF src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateF src));
+ format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl8F(vecY dst, regF src) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateF src));
+ format %{ "pshufd $dst,$src,0x00\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate float (4 byte) scalar zero to be vector
+instruct Repl2F_zero(vecD dst, immF0 zero) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateF zero));
+ format %{ "xorps $dst,$dst\t! replicate2F zero" %}
+ ins_encode %{
+ __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F_zero(vecX dst, immF0 zero) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateF zero));
+ format %{ "xorps $dst,$dst\t! replicate4F zero" %}
+ ins_encode %{
+ __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8F_zero(vecY dst, immF0 zero) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (ReplicateF zero));
+ format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate double (8 bytes) scalar to be vector
+instruct Repl2D(vecX dst, regD src) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateD src));
+ format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct Repl4D(vecY dst, regD src) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateD src));
+ format %{ "pshufd $dst,$src,0x44\n\t"
+ "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+ __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Replicate double (8 byte) scalar zero to be vector
+instruct Repl2D_zero(vecX dst, immD0 zero) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (ReplicateD zero));
+ format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
+ ins_encode %{
+ __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4D_zero(vecY dst, immD0 zero) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (ReplicateD zero));
+ format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
+ ins_encode %{
+ bool vector256 = true;
+ __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Thu Jun 28 10:35:28 2012 -0700
@@ -74,9 +74,6 @@
reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
-// Special Registers
-reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
-
// Float registers. We treat TOS/FPR0 special. It is invisible to the
// allocator, and only shows up in the encodings.
reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
@@ -105,27 +102,6 @@
reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
-// XMM registers. 128-bit registers or 4 words each, labeled a-d.
-// Word a in each register holds a Float, words ab hold a Double.
-// We currently do not use the SIMD capabilities, so registers cd
-// are unused at the moment.
-reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
-reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
-reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
-reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
-reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
-reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
-reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
-reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
-reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
-reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
-reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
-reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
-reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
-
// Specify priority of register selection within phases of register
// allocation. Highest priority is first. A useful heuristic is to
// give registers a low priority when they are required by machine
@@ -138,15 +114,6 @@
FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
FPR6L, FPR6H, FPR7L, FPR7H );
-alloc_class chunk1( XMM0a, XMM0b,
- XMM1a, XMM1b,
- XMM2a, XMM2b,
- XMM3a, XMM3b,
- XMM4a, XMM4b,
- XMM5a, XMM5b,
- XMM6a, XMM6b,
- XMM7a, XMM7b, EFLAGS);
-
//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
@@ -159,12 +126,12 @@
// Class for all registers
reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
// Class for general registers
-reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
// Class for general registers which may be used for implicit null checks on win95
// Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
// Class of "X" registers
-reg_class x_reg(EBX, ECX, EDX, EAX);
+reg_class int_x_reg(EBX, ECX, EDX, EAX);
// Class of registers that can appear in an address with no offset.
// EBP and ESP require an extra instruction byte for zero offset.
// Used in fast-unlock
@@ -193,8 +160,6 @@
reg_class sp_reg(ESP);
// Singleton class for instruction pointer
// reg_class ip_reg(EIP);
-// Singleton class for condition codes
-reg_class int_flags(EFLAGS);
// Class of integer register pairs
reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
// Class of integer register pairs that aligns with calling convention
@@ -206,29 +171,18 @@
// Floating point registers. Notice FPR0 is not a choice.
// FPR0 is not ever allocated; we use clever encodings to fake
// a 2-address instructions out of Intels FP stack.
-reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
-
-// make a register class for SSE registers
-reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
-
-// make a double register class for SSE2 registers
-reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
- XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
-
-reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
- FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
- FPR7L,FPR7H );
-
-reg_class flt_reg0( FPR1L );
-reg_class dbl_reg0( FPR1L,FPR1H );
-reg_class dbl_reg1( FPR2L,FPR2H );
-reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
- FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
-
-// XMM6 and XMM7 could be used as temporary registers for long, float and
-// double values for SSE2.
-reg_class xdb_reg6( XMM6a,XMM6b );
-reg_class xdb_reg7( XMM7a,XMM7b );
+reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
+
+reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
+ FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
+ FPR7L,FPR7H );
+
+reg_class fp_flt_reg0( FPR1L );
+reg_class fp_dbl_reg0( FPR1L,FPR1H );
+reg_class fp_dbl_reg1( FPR2L,FPR2H );
+reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
+ FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
+
%}
@@ -412,7 +366,7 @@
}
}
- // eRegI ereg, memory mem) %{ // emit_reg_mem
+ // rRegI ereg, memory mem) %{ // emit_reg_mem
void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
// There is no index & no scale, use form without SIB byte
if ((index == 0x4) &&
@@ -787,7 +741,7 @@
#endif
}
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
- // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
+ // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
return size+5+offset_size;
}
@@ -821,7 +775,7 @@
}
#endif
}
- // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
+ // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
// Only MOVAPS SSE prefix uses 1 byte.
int sz = 4;
if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
@@ -903,6 +857,108 @@
return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
}
+// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+ int src_hi, int dst_hi, uint ireg, outputStream* st);
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+ int stack_offset, int reg, uint ireg, outputStream* st);
+
+static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
+ int dst_offset, uint ireg, outputStream* st) {
+ int calc_size = 0;
+ int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
+ int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
+ switch (ireg) {
+ case Op_VecS:
+ calc_size = 3+src_offset_size + 3+dst_offset_size;
+ break;
+ case Op_VecD:
+ calc_size = 3+src_offset_size + 3+dst_offset_size;
+ src_offset += 4;
+ dst_offset += 4;
+ src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
+ dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
+ calc_size += 3+src_offset_size + 3+dst_offset_size;
+ break;
+ case Op_VecX:
+ calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
+ break;
+ case Op_VecY:
+ calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ int offset = __ offset();
+ switch (ireg) {
+ case Op_VecS:
+ __ pushl(Address(rsp, src_offset));
+ __ popl (Address(rsp, dst_offset));
+ break;
+ case Op_VecD:
+ __ pushl(Address(rsp, src_offset));
+ __ popl (Address(rsp, dst_offset));
+ __ pushl(Address(rsp, src_offset+4));
+ __ popl (Address(rsp, dst_offset+4));
+ break;
+ case Op_VecX:
+ __ movdqu(Address(rsp, -16), xmm0);
+ __ movdqu(xmm0, Address(rsp, src_offset));
+ __ movdqu(Address(rsp, dst_offset), xmm0);
+ __ movdqu(xmm0, Address(rsp, -16));
+ break;
+ case Op_VecY:
+ __ vmovdqu(Address(rsp, -32), xmm0);
+ __ vmovdqu(xmm0, Address(rsp, src_offset));
+ __ vmovdqu(Address(rsp, dst_offset), xmm0);
+ __ vmovdqu(xmm0, Address(rsp, -32));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ int size = __ offset() - offset;
+ assert(size == calc_size, "incorrect size calculattion");
+ return size;
+#ifndef PRODUCT
+ } else if (!do_size) {
+ switch (ireg) {
+ case Op_VecS:
+ st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
+ "popl [rsp + #%d]",
+ src_offset, dst_offset);
+ break;
+ case Op_VecD:
+ st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
+ "popq [rsp + #%d]\n\t"
+ "pushl [rsp + #%d]\n\t"
+ "popq [rsp + #%d]",
+ src_offset, dst_offset, src_offset+4, dst_offset+4);
+ break;
+ case Op_VecX:
+ st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
+ "movdqu xmm0, [rsp + #%d]\n\t"
+ "movdqu [rsp + #%d], xmm0\n\t"
+ "movdqu xmm0, [rsp - #16]",
+ src_offset, dst_offset);
+ break;
+ case Op_VecY:
+ st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
+ "vmovdqu xmm0, [rsp + #%d]\n\t"
+ "vmovdqu [rsp + #%d], xmm0\n\t"
+ "vmovdqu xmm0, [rsp - #32]",
+ src_offset, dst_offset);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+#endif
+ }
+ return calc_size;
+}
+
uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
// Get registers to move
OptoReg::Name src_second = ra_->get_reg_second(in(1));
@@ -923,6 +979,29 @@
if( src_first == dst_first && src_second == dst_second )
return size; // Self copy, no move
+ if (bottom_type()->isa_vect() != NULL) {
+ uint ireg = ideal_reg();
+ assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+ assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
+ assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
+ if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+ // mem -> mem
+ int src_offset = ra_->reg2offset(src_first);
+ int dst_offset = ra_->reg2offset(dst_first);
+ return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
+ } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
+ return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
+ } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
+ int stack_offset = ra_->reg2offset(dst_first);
+ return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
+ } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
+ int stack_offset = ra_->reg2offset(src_first);
+ return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st);
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+
// --------------------------------------
// Check for mem-mem move. push/pop to move.
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
@@ -1313,16 +1392,6 @@
return true;
}
-// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
- return UseSSE >= 2 ? 8 : 0;
-}
-
-// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
- return Op_RegD;
-}
-
// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
@@ -1452,7 +1521,7 @@
// arguments in those registers not be available to the callee.
bool Matcher::can_be_java_arg( int reg ) {
if( reg == ECX_num || reg == EDX_num ) return true;
- if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
+ if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
return false;
}
@@ -1565,16 +1634,16 @@
emit_opcode(cbuf,0x66);
%}
- enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
+ enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
- enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
+ enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
emit_opcode(cbuf,$opcode$$constant);
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
- enc_class mov_r32_imm0( eRegI dst ) %{
+ enc_class mov_r32_imm0( rRegI dst ) %{
emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
emit_d32 ( cbuf, 0x0 ); // imm32==0x0
%}
@@ -1621,7 +1690,7 @@
%}
// Dense encoding for older common ops
- enc_class Opc_plus(immI opcode, eRegI reg) %{
+ enc_class Opc_plus(immI opcode, rRegI reg) %{
emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
%}
@@ -1637,7 +1706,7 @@
}
%}
- enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
+ enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
// Emit primary opcode and set sign-extend bit
// Check for 8-bit immediate, and set sign extend bit in opcode
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
@@ -1682,7 +1751,7 @@
else emit_d32(cbuf,con);
%}
- enc_class OpcSReg (eRegI dst) %{ // BSWAP
+ enc_class OpcSReg (rRegI dst) %{ // BSWAP
emit_cc(cbuf, $secondary, $dst$$reg );
%}
@@ -1700,7 +1769,7 @@
emit_rm(cbuf, 0x3, destlo, desthi);
%}
- enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
+ enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
emit_rm(cbuf, 0x3, $secondary, $div$$reg );
%}
@@ -1891,20 +1960,20 @@
// runtime_call_Relocation::spec(), RELOC_IMM32 );
// %}
- enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
+ enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR
$$$emit8$primary;
emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
$$$emit8$shift$$constant;
%}
- enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
+ enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate
// Load immediate does not have a zero or sign extended version
// for 8-bit immediates
emit_opcode(cbuf, 0xB8 + $dst$$reg);
$$$emit32$src$$constant;
%}
- enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
+ enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate
// Load immediate does not have a zero or sign extended version
// for 8-bit immediates
emit_opcode(cbuf, $primary + $dst$$reg);
@@ -1943,15 +2012,15 @@
// Encode a reg-reg copy. If it is useless, then empty encoding.
- enc_class enc_Copy( eRegI dst, eRegI src ) %{
+ enc_class enc_Copy( rRegI dst, rRegI src ) %{
encode_Copy( cbuf, $dst$$reg, $src$$reg );
%}
- enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
+ enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
encode_Copy( cbuf, $dst$$reg, $src$$reg );
%}
- enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
+ enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
@@ -1973,7 +2042,7 @@
emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
%}
- enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
+ enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
%}
@@ -2068,7 +2137,7 @@
cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand
%}
- enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
+ enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem
int reg_encoding = $ereg$$reg;
int base = $mem$$base;
int index = $mem$$index;
@@ -2132,7 +2201,7 @@
// Clone of RegMem but accepts an extra parameter to access each
// half of a double in memory; it never needs relocation info.
- enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
+ enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
emit_opcode(cbuf,$opcode$$constant);
int reg_encoding = $rm_reg$$reg;
int base = $mem$$base;
@@ -2168,7 +2237,7 @@
encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
%}
- enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
+ enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea
int reg_encoding = $dst$$reg;
int base = $src0$$reg; // 0xFFFFFFFF indicates no base
int index = 0x04; // 0x04 indicates no index
@@ -2178,7 +2247,7 @@
encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
%}
- enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
+ enc_class min_enc (rRegI dst, rRegI src) %{ // MIN
// Compare dst,src
emit_opcode(cbuf,0x3B);
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
@@ -2190,7 +2259,7 @@
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
%}
- enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
+ enc_class max_enc (rRegI dst, rRegI src) %{ // MAX
// Compare dst,src
emit_opcode(cbuf,0x3B);
emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
@@ -2221,7 +2290,7 @@
encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
%}
- enc_class neg_reg(eRegI dst) %{
+ enc_class neg_reg(rRegI dst) %{
// NEG $dst
emit_opcode(cbuf,0xF7);
emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
@@ -2251,7 +2320,7 @@
emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
%}
- enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
+ enc_class enc_cmpLTP_mem(rRegI p, rRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
int tmpReg = $tmp$$reg;
// SUB $p,$q
@@ -2390,12 +2459,12 @@
%}
// Special case for moving an integer register to a stack slot.
- enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
+ enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
%}
// Special case for moving a register to a stack slot.
- enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
+ enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
// Opcode already emitted
emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
@@ -2640,7 +2709,7 @@
// equal_result = 0;
// nan_result = -1;
- enc_class CmpF_Result(eRegI dst) %{
+ enc_class CmpF_Result(rRegI dst) %{
// fnstsw_ax();
emit_opcode( cbuf, 0xDF);
emit_opcode( cbuf, 0xE0);
@@ -2685,7 +2754,7 @@
// done:
%}
- enc_class convert_int_long( regL dst, eRegI src ) %{
+ enc_class convert_int_long( regL dst, rRegI src ) %{
// mov $dst.lo,$src
int dst_encoding = $dst$$reg;
int src_encoding = $src$$reg;
@@ -2754,7 +2823,7 @@
emit_rm( cbuf, 0x3, 0x4, $src$$reg);
%}
- enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
+ enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
// Basic idea: lo(result) = lo(x_lo * y_lo)
// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
// MOV $tmp,$src.lo
@@ -2780,7 +2849,7 @@
emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
%}
- enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
+ enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
// Basic idea: lo(result) = lo(src * y_lo)
// hi(result) = hi(src * y_lo) + lo(src * y_hi)
// IMUL $tmp,EDX,$src
@@ -2836,7 +2905,7 @@
emit_d8(cbuf, 4*4);
%}
- enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
+ enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
// MOV $tmp,$src.lo
emit_opcode(cbuf, 0x8B);
emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
@@ -2857,7 +2926,7 @@
emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
%}
- enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
+ enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
// CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
emit_opcode( cbuf, 0x3B );
emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
@@ -2869,7 +2938,7 @@
emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
%}
- enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
+ enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
// XOR $tmp,$tmp
emit_opcode(cbuf,0x33); // XOR
emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
@@ -3762,9 +3831,9 @@
// in SSE2+ mode we want to keep the FPU stack clean so pretend
// that C functions return float and double results in XMM0.
if( ideal_reg == Op_RegD && UseSSE>=2 )
- return OptoRegPair(XMM0b_num,XMM0a_num);
+ return OptoRegPair(XMM0b_num,XMM0_num);
if( ideal_reg == Op_RegF && UseSSE>=2 )
- return OptoRegPair(OptoReg::Bad,XMM0a_num);
+ return OptoRegPair(OptoReg::Bad,XMM0_num);
return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
%}
@@ -3775,9 +3844,9 @@
static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
if( ideal_reg == Op_RegD && UseSSE>=2 )
- return OptoRegPair(XMM0b_num,XMM0a_num);
+ return OptoRegPair(XMM0b_num,XMM0_num);
if( ideal_reg == Op_RegF && UseSSE>=1 )
- return OptoRegPair(OptoReg::Bad,XMM0a_num);
+ return OptoRegPair(OptoReg::Bad,XMM0_num);
return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
%}
@@ -4147,8 +4216,8 @@
// Register Operands
// Integer Register
-operand eRegI() %{
- constraint(ALLOC_IN_RC(e_reg));
+operand rRegI() %{
+ constraint(ALLOC_IN_RC(int_reg));
match(RegI);
match(xRegI);
match(eAXRegI);
@@ -4163,8 +4232,8 @@
%}
// Subset of Integer Register
-operand xRegI(eRegI reg) %{
- constraint(ALLOC_IN_RC(x_reg));
+operand xRegI(rRegI reg) %{
+ constraint(ALLOC_IN_RC(int_x_reg));
match(reg);
match(eAXRegI);
match(eBXRegI);
@@ -4179,7 +4248,7 @@
operand eAXRegI(xRegI reg) %{
constraint(ALLOC_IN_RC(eax_reg));
match(reg);
- match(eRegI);
+ match(rRegI);
format %{ "EAX" %}
interface(REG_INTER);
@@ -4189,7 +4258,7 @@
operand eBXRegI(xRegI reg) %{
constraint(ALLOC_IN_RC(ebx_reg));
match(reg);
- match(eRegI);
+ match(rRegI);
format %{ "EBX" %}
interface(REG_INTER);
@@ -4198,7 +4267,7 @@
operand eCXRegI(xRegI reg) %{
constraint(ALLOC_IN_RC(ecx_reg));
match(reg);
- match(eRegI);
+ match(rRegI);
format %{ "ECX" %}
interface(REG_INTER);
@@ -4207,7 +4276,7 @@
operand eDXRegI(xRegI reg) %{
constraint(ALLOC_IN_RC(edx_reg));
match(reg);
- match(eRegI);
+ match(rRegI);
format %{ "EDX" %}
interface(REG_INTER);
@@ -4216,7 +4285,7 @@
operand eDIRegI(xRegI reg) %{
constraint(ALLOC_IN_RC(edi_reg));
match(reg);
- match(eRegI);
+ match(rRegI);
format %{ "EDI" %}
interface(REG_INTER);
@@ -4263,7 +4332,7 @@
operand eSIRegI(xRegI reg) %{
constraint(ALLOC_IN_RC(esi_reg));
match(reg);
- match(eRegI);
+ match(rRegI);
format %{ "ESI" %}
interface(REG_INTER);
@@ -4284,7 +4353,7 @@
%}
operand eRegP() %{
- constraint(ALLOC_IN_RC(e_reg));
+ constraint(ALLOC_IN_RC(int_reg));
match(RegP);
match(eAXRegP);
match(eBXRegP);
@@ -4297,7 +4366,7 @@
// On windows95, EBP is not safe to use for implicit null tests.
operand eRegP_no_EBP() %{
- constraint(ALLOC_IN_RC(e_reg_no_rbp));
+ constraint(ALLOC_IN_RC(int_reg_no_rbp));
match(RegP);
match(eAXRegP);
match(eBXRegP);
@@ -4477,7 +4546,7 @@
// Float register operands
operand regDPR() %{
predicate( UseSSE < 2 );
- constraint(ALLOC_IN_RC(dbl_reg));
+ constraint(ALLOC_IN_RC(fp_dbl_reg));
match(RegD);
match(regDPR1);
match(regDPR2);
@@ -4487,7 +4556,7 @@
operand regDPR1(regDPR reg) %{
predicate( UseSSE < 2 );
- constraint(ALLOC_IN_RC(dbl_reg0));
+ constraint(ALLOC_IN_RC(fp_dbl_reg0));
match(reg);
format %{ "FPR1" %}
interface(REG_INTER);
@@ -4495,7 +4564,7 @@
operand regDPR2(regDPR reg) %{
predicate( UseSSE < 2 );
- constraint(ALLOC_IN_RC(dbl_reg1));
+ constraint(ALLOC_IN_RC(fp_dbl_reg1));
match(reg);
format %{ "FPR2" %}
interface(REG_INTER);
@@ -4503,45 +4572,16 @@
operand regnotDPR1(regDPR reg) %{
predicate( UseSSE < 2 );
- constraint(ALLOC_IN_RC(dbl_notreg0));
+ constraint(ALLOC_IN_RC(fp_dbl_notreg0));
match(reg);
format %{ %}
interface(REG_INTER);
%}
-// XMM Double register operands
-operand regD() %{
- predicate( UseSSE>=2 );
- constraint(ALLOC_IN_RC(xdb_reg));
- match(RegD);
- match(regD6);
- match(regD7);
- format %{ %}
- interface(REG_INTER);
-%}
-
-// XMM6 double register operands
-operand regD6(regD reg) %{
- predicate( UseSSE>=2 );
- constraint(ALLOC_IN_RC(xdb_reg6));
- match(reg);
- format %{ "XMM6" %}
- interface(REG_INTER);
-%}
-
-// XMM7 double register operands
-operand regD7(regD reg) %{
- predicate( UseSSE>=2 );
- constraint(ALLOC_IN_RC(xdb_reg7));
- match(reg);
- format %{ "XMM7" %}
- interface(REG_INTER);
-%}
-
// Float register operands
operand regFPR() %{
predicate( UseSSE < 2 );
- constraint(ALLOC_IN_RC(flt_reg));
+ constraint(ALLOC_IN_RC(fp_flt_reg));
match(RegF);
match(regFPR1);
format %{ %}
@@ -4551,21 +4591,30 @@
// Float register operands
operand regFPR1(regFPR reg) %{
predicate( UseSSE < 2 );
- constraint(ALLOC_IN_RC(flt_reg0));
+ constraint(ALLOC_IN_RC(fp_flt_reg0));
match(reg);
format %{ "FPR1" %}
interface(REG_INTER);
%}
-// XMM register operands
+// XMM Float register operands
operand regF() %{
predicate( UseSSE>=1 );
- constraint(ALLOC_IN_RC(xmm_reg));
+ constraint(ALLOC_IN_RC(float_reg));
match(RegF);
format %{ %}
interface(REG_INTER);
%}
+// XMM Double register operands
+operand regD() %{
+ predicate( UseSSE>=2 );
+ constraint(ALLOC_IN_RC(double_reg));
+ match(RegD);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
//----------Memory Operands----------------------------------------------------
// Direct Memory Operand
@@ -4583,7 +4632,7 @@
// Indirect Memory Operand
operand indirect(eRegP reg) %{
- constraint(ALLOC_IN_RC(e_reg));
+ constraint(ALLOC_IN_RC(int_reg));
match(reg);
format %{ "[$reg]" %}
@@ -4622,7 +4671,7 @@
%}
// Indirect Memory Plus Long Offset Operand
-operand indOffset32X(eRegI reg, immP off) %{
+operand indOffset32X(rRegI reg, immP off) %{
match(AddP off reg);
format %{ "[$reg + $off]" %}
@@ -4635,7 +4684,7 @@
%}
// Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
+operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
match(AddP (AddP reg ireg) off);
op_cost(10);
@@ -4649,7 +4698,7 @@
%}
// Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndex(eRegP reg, eRegI ireg) %{
+operand indIndex(eRegP reg, rRegI ireg) %{
match(AddP reg ireg);
op_cost(10);
@@ -4667,7 +4716,7 @@
// // -------------------------------------------------------------------------
// // Scaled Memory Operands
// // Indirect Memory Times Scale Plus Offset Operand
-// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
+// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
// match(AddP off (LShiftI ireg scale));
//
// op_cost(10);
@@ -4681,7 +4730,7 @@
// %}
// Indirect Memory Times Scale Plus Index Register
-operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
+operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
match(AddP reg (LShiftI ireg scale));
op_cost(10);
@@ -4695,7 +4744,7 @@
%}
// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
+operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
match(AddP (AddP reg (LShiftI ireg scale)) off);
op_cost(10);
@@ -4823,7 +4872,7 @@
// Indirect Memory Operand
operand indirect_win95_safe(eRegP_no_EBP reg)
%{
- constraint(ALLOC_IN_RC(e_reg));
+ constraint(ALLOC_IN_RC(int_reg));
match(reg);
op_cost(100);
@@ -4867,7 +4916,7 @@
%}
// Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
+operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
%{
match(AddP (AddP reg ireg) off);
@@ -4882,7 +4931,7 @@
%}
// Indirect Memory Times Scale Plus Index Register
-operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
+operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
%{
match(AddP reg (LShiftI ireg scale));
@@ -4897,7 +4946,7 @@
%}
// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
+operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
%{
match(AddP (AddP reg (LShiftI ireg scale)) off);
@@ -5086,7 +5135,7 @@
// Or: _mem if it requires the big decoder and a memory unit.
// Integer ALU reg operation
-pipe_class ialu_reg(eRegI dst) %{
+pipe_class ialu_reg(rRegI dst) %{
single_instruction;
dst : S4(write);
dst : S3(read);
@@ -5104,7 +5153,7 @@
%}
// Integer ALU reg operation using big decoder
-pipe_class ialu_reg_fat(eRegI dst) %{
+pipe_class ialu_reg_fat(rRegI dst) %{
single_instruction;
dst : S4(write);
dst : S3(read);
@@ -5122,7 +5171,7 @@
%}
// Integer ALU reg-reg operation
-pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
+pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -5140,7 +5189,7 @@
%}
// Integer ALU reg-reg operation
-pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
+pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -5158,7 +5207,7 @@
%}
// Integer ALU reg-mem operation
-pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
+pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
single_instruction;
dst : S5(write);
mem : S3(read);
@@ -5187,7 +5236,7 @@
%}
// Integer Store to Memory
-pipe_class ialu_mem_reg(memory mem, eRegI src) %{
+pipe_class ialu_mem_reg(memory mem, rRegI src) %{
single_instruction;
mem : S3(read);
src : S5(read);
@@ -5216,7 +5265,7 @@
%}
// Integer ALU0 reg-reg operation
-pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
+pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -5225,7 +5274,7 @@
%}
// Integer ALU0 reg-mem operation
-pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
+pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
single_instruction;
dst : S5(write);
mem : S3(read);
@@ -5235,7 +5284,7 @@
%}
// Integer ALU reg-reg operation
-pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
+pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
single_instruction;
cr : S4(write);
src1 : S3(read);
@@ -5245,7 +5294,7 @@
%}
// Integer ALU reg-imm operation
-pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
+pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
single_instruction;
cr : S4(write);
src1 : S3(read);
@@ -5254,7 +5303,7 @@
%}
// Integer ALU reg-mem operation
-pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
+pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
single_instruction;
cr : S4(write);
src1 : S3(read);
@@ -5265,7 +5314,7 @@
%}
// Conditional move reg-reg
-pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
+pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
instruction_count(4);
y : S4(read);
q : S3(read);
@@ -5274,7 +5323,7 @@
%}
// Conditional move reg-reg
-pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
+pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -5283,7 +5332,7 @@
%}
// Conditional move reg-mem
-pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
+pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -5534,7 +5583,7 @@
// in the encode section of the architecture description.
//----------BSWAP-Instruction--------------------------------------------------
-instruct bytes_reverse_int(eRegI dst) %{
+instruct bytes_reverse_int(rRegI dst) %{
match(Set dst (ReverseBytesI dst));
format %{ "BSWAP $dst" %}
@@ -5555,7 +5604,7 @@
ins_pipe( ialu_reg_reg);
%}
-instruct bytes_reverse_unsigned_short(eRegI dst, eFlagsReg cr) %{
+instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
match(Set dst (ReverseBytesUS dst));
effect(KILL cr);
@@ -5568,7 +5617,7 @@
ins_pipe( ialu_reg );
%}
-instruct bytes_reverse_short(eRegI dst, eFlagsReg cr) %{
+instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
match(Set dst (ReverseBytesS dst));
effect(KILL cr);
@@ -5584,7 +5633,7 @@
//---------- Zeros Count Instructions ------------------------------------------
-instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
predicate(UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosI src));
effect(KILL cr);
@@ -5596,7 +5645,7 @@
ins_pipe(ialu_reg);
%}
-instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
predicate(!UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosI src));
effect(KILL cr);
@@ -5621,7 +5670,7 @@
ins_pipe(ialu_reg);
%}
-instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
predicate(UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosL src));
effect(TEMP dst, KILL cr);
@@ -5644,7 +5693,7 @@
ins_pipe(ialu_reg);
%}
-instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
predicate(!UseCountLeadingZerosInstruction);
match(Set dst (CountLeadingZerosL src));
effect(TEMP dst, KILL cr);
@@ -5680,7 +5729,7 @@
ins_pipe(ialu_reg);
%}
-instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (CountTrailingZerosI src));
effect(KILL cr);
@@ -5699,7 +5748,7 @@
ins_pipe(ialu_reg);
%}
-instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
match(Set dst (CountTrailingZerosL src));
effect(TEMP dst, KILL cr);
@@ -5731,7 +5780,7 @@
//---------- Population Count Instructions -------------------------------------
-instruct popCountI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
effect(KILL cr);
@@ -5743,7 +5792,7 @@
ins_pipe(ialu_reg);
%}
-instruct popCountI_mem(eRegI dst, memory mem, eFlagsReg cr) %{
+instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI (LoadI mem)));
effect(KILL cr);
@@ -5756,7 +5805,7 @@
%}
// Note: Long.bitCount(long) returns an int.
-instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
effect(KILL cr, TEMP tmp, TEMP dst);
@@ -5773,7 +5822,7 @@
%}
// Note: Long.bitCount(long) returns an int.
-instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
+instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL (LoadL mem)));
effect(KILL cr, TEMP tmp, TEMP dst);
@@ -5877,7 +5926,7 @@
%}
// Load Short (16bit signed)
-instruct loadS(eRegI dst, memory mem) %{
+instruct loadS(rRegI dst, memory mem) %{
match(Set dst (LoadS mem));
ins_cost(125);
@@ -5891,7 +5940,7 @@
%}
// Load Short (16 bit signed) to Byte (8 bit signed)
-instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
ins_cost(125);
@@ -5922,7 +5971,7 @@
%}
// Load Unsigned Short/Char (16bit unsigned)
-instruct loadUS(eRegI dst, memory mem) %{
+instruct loadUS(rRegI dst, memory mem) %{
match(Set dst (LoadUS mem));
ins_cost(125);
@@ -5936,7 +5985,7 @@
%}
// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
-instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
ins_cost(125);
@@ -5997,7 +6046,7 @@
%}
// Load Integer
-instruct loadI(eRegI dst, memory mem) %{
+instruct loadI(rRegI dst, memory mem) %{
match(Set dst (LoadI mem));
ins_cost(125);
@@ -6011,7 +6060,7 @@
%}
// Load Integer (32 bit signed) to Byte (8 bit signed)
-instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
ins_cost(125);
@@ -6023,7 +6072,7 @@
%}
// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
-instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
+instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
match(Set dst (AndI (LoadI mem) mask));
ins_cost(125);
@@ -6035,7 +6084,7 @@
%}
// Load Integer (32 bit signed) to Short (16 bit signed)
-instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
+instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
ins_cost(125);
@@ -6047,7 +6096,7 @@
%}
// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
-instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
+instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
match(Set dst (AndI (LoadI mem) mask));
ins_cost(125);
@@ -6208,7 +6257,7 @@
%}
// Load Range
-instruct loadRange(eRegI dst, memory mem) %{
+instruct loadRange(rRegI dst, memory mem) %{
match(Set dst (LoadRange mem));
ins_cost(125);
@@ -6305,66 +6354,6 @@
ins_pipe( fpu_reg_mem );
%}
-// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regD dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (Load8B mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed8B" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Short to XMM register
-instruct loadA4S(regD dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (Load4S mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed4S" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Char to XMM register
-instruct loadA4C(regD dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (Load4C mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed4C" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Integer to XMM register
-instruct load2IU(regD dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (Load2I mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed2I" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Single to XMM
-instruct loadA2F(regD dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (Load2F mem));
- ins_cost(145);
- format %{ "MOVQ $dst,$mem\t! packed2F" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
// Load Effective Address
instruct leaP8(eRegP dst, indOffset8 mem) %{
match(Set dst mem);
@@ -6417,7 +6406,7 @@
%}
// Load Constant
-instruct loadConI(eRegI dst, immI src) %{
+instruct loadConI(rRegI dst, immI src) %{
match(Set dst src);
format %{ "MOV $dst,$src" %}
@@ -6426,7 +6415,7 @@
%}
// Load Constant zero
-instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
+instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
match(Set dst src);
effect(KILL cr);
@@ -6594,7 +6583,7 @@
%}
// Load Stack Slot
-instruct loadSSI(eRegI dst, stackSlotI src) %{
+instruct loadSSI(rRegI dst, stackSlotI src) %{
match(Set dst src);
ins_cost(125);
@@ -6821,7 +6810,7 @@
%}
// Store Char/Short
-instruct storeC(memory mem, eRegI src) %{
+instruct storeC(memory mem, rRegI src) %{
match(Set mem (StoreC mem src));
ins_cost(125);
@@ -6832,7 +6821,7 @@
%}
// Store Integer
-instruct storeI(memory mem, eRegI src) %{
+instruct storeI(memory mem, rRegI src) %{
match(Set mem (StoreI mem src));
ins_cost(125);
@@ -6976,42 +6965,6 @@
ins_pipe( ialu_mem_imm );
%}
-// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regD src) %{
- predicate(UseSSE>=1);
- match(Set mem (Store8B mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed8B" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regD src) %{
- predicate(UseSSE>=1);
- match(Set mem (Store4C mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed4C" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regD src) %{
- predicate(UseSSE>=1);
- match(Set mem (Store2I mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed2I" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
// Store CMS card-mark Immediate
instruct storeImmCM(memory mem, immI8 src) %{
match(Set mem (StoreCM mem src));
@@ -7073,18 +7026,6 @@
ins_pipe( pipe_slow );
%}
-// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regD src) %{
- predicate(UseSSE>=1);
- match(Set mem (Store2F mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed2F" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
// Store Float
instruct storeFPR( memory mem, regFPR1 src) %{
predicate(UseSSE==0);
@@ -7146,7 +7087,7 @@
%}
// Store Integer to stack slot
-instruct storeSSI(stackSlotI dst, eRegI src) %{
+instruct storeSSI(stackSlotI dst, rRegI src) %{
match(Set dst src);
ins_cost(100);
@@ -7271,7 +7212,7 @@
ins_pipe(empty);
%}
-instruct castP2X(eRegI dst, eRegP src ) %{
+instruct castP2X(rRegI dst, eRegP src ) %{
match(Set dst (CastP2X src));
ins_cost(50);
format %{ "MOV $dst, $src\t# CastP2X" %}
@@ -7281,7 +7222,7 @@
//----------Conditional Move---------------------------------------------------
// Conditional move
-instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{
+instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
predicate(!VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7298,7 +7239,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{
+instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
predicate(!VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7315,7 +7256,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
+instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7325,7 +7266,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
+instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7335,7 +7276,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
+instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -7345,7 +7286,7 @@
%}
// Conditional move
-instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
+instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
ins_cost(250);
@@ -7356,7 +7297,7 @@
%}
// Conditional move
-instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
+instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
ins_cost(250);
@@ -7366,7 +7307,7 @@
ins_pipe( pipe_cmov_mem );
%}
-instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
+instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() );
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
ins_cost(250);
@@ -7620,7 +7561,7 @@
//----------Arithmetic Instructions--------------------------------------------
//----------Addition Instructions----------------------------------------------
// Integer Addition Instructions
-instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (AddI dst src));
effect(KILL cr);
@@ -7631,7 +7572,7 @@
ins_pipe( ialu_reg_reg );
%}
-instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (AddI dst src));
effect(KILL cr);
@@ -7641,7 +7582,7 @@
ins_pipe( ialu_reg );
%}
-instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
predicate(UseIncDec);
match(Set dst (AddI dst src));
effect(KILL cr);
@@ -7653,7 +7594,7 @@
ins_pipe( ialu_reg );
%}
-instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
+instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
match(Set dst (AddI src0 src1));
ins_cost(110);
@@ -7673,7 +7614,7 @@
ins_pipe( ialu_reg_reg );
%}
-instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
+instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
predicate(UseIncDec);
match(Set dst (AddI dst src));
effect(KILL cr);
@@ -7685,7 +7626,7 @@
ins_pipe( ialu_reg );
%}
-instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
+instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
match(Set dst (AddP dst src));
effect(KILL cr);
@@ -7707,7 +7648,7 @@
ins_pipe( ialu_reg );
%}
-instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
match(Set dst (AddI dst (LoadI src)));
effect(KILL cr);
@@ -7718,7 +7659,7 @@
ins_pipe( ialu_reg_mem );
%}
-instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
effect(KILL cr);
@@ -7780,7 +7721,7 @@
ins_pipe( empty );
%}
-instruct castII( eRegI dst ) %{
+instruct castII( rRegI dst ) %{
match(Set dst (CastII dst));
format %{ "#castII of $dst" %}
ins_encode( /*empty encoding*/ );
@@ -7814,7 +7755,7 @@
// Conditional-store of an int value.
// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
-instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
+instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
match(Set cr (StoreIConditional mem (Binary oldval newval)));
effect(KILL oldval);
format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
@@ -7847,7 +7788,7 @@
// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
-instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
effect(KILL cr, KILL oldval);
format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7860,7 +7801,7 @@
ins_pipe( pipe_cmpxchg );
%}
-instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
+instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
effect(KILL cr, KILL oldval);
format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7872,7 +7813,7 @@
ins_pipe( pipe_cmpxchg );
%}
-instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
+instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
effect(KILL cr, KILL oldval);
format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7886,7 +7827,7 @@
//----------Subtraction Instructions-------------------------------------------
// Integer Subtraction Instructions
-instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (SubI dst src));
effect(KILL cr);
@@ -7897,7 +7838,7 @@
ins_pipe( ialu_reg_reg );
%}
-instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (SubI dst src));
effect(KILL cr);
@@ -7908,7 +7849,7 @@
ins_pipe( ialu_reg );
%}
-instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
match(Set dst (SubI dst (LoadI src)));
effect(KILL cr);
@@ -7919,7 +7860,7 @@
ins_pipe( ialu_reg_mem );
%}
-instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
match(Set dst (StoreI dst (SubI (LoadI dst) src)));
effect(KILL cr);
@@ -7931,7 +7872,7 @@
%}
// Subtract from a pointer
-instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
+instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
match(Set dst (AddP dst (SubI zero src)));
effect(KILL cr);
@@ -7942,7 +7883,7 @@
ins_pipe( ialu_reg_reg );
%}
-instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
+instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
match(Set dst (SubI zero dst));
effect(KILL cr);
@@ -7957,7 +7898,7 @@
//----------Multiplication/Division Instructions-------------------------------
// Integer Multiplication Instructions
// Multiply Register
-instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (MulI dst src));
effect(KILL cr);
@@ -7970,7 +7911,7 @@
%}
// Multiply 32-bit Immediate
-instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
+instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
match(Set dst (MulI src imm));
effect(KILL cr);
@@ -8026,7 +7967,7 @@
%}
// Multiply Memory 32-bit Immediate
-instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
+instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
match(Set dst (MulI (LoadI src) imm));
effect(KILL cr);
@@ -8038,7 +7979,7 @@
%}
// Multiply Memory
-instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
+instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
match(Set dst (MulI dst (LoadI src)));
effect(KILL cr);
@@ -8075,7 +8016,7 @@
%}
// Multiply Register Long
-instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
match(Set dst (MulL dst src));
effect(KILL cr, TEMP tmp);
ins_cost(4*100+3*400);
@@ -8093,7 +8034,7 @@
%}
// Multiply Register Long where the left operand's high 32 bits are zero
-instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
predicate(is_operand_hi32_zero(n->in(1)));
match(Set dst (MulL dst src));
effect(KILL cr, TEMP tmp);
@@ -8114,7 +8055,7 @@
%}
// Multiply Register Long where the right operand's high 32 bits are zero
-instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
predicate(is_operand_hi32_zero(n->in(2)));
match(Set dst (MulL dst src));
effect(KILL cr, TEMP tmp);
@@ -8150,7 +8091,7 @@
%}
// Multiply Register Long by small constant
-instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
match(Set dst (MulL dst src));
effect(KILL cr, TEMP tmp);
ins_cost(2*100+2*400);
@@ -8248,7 +8189,7 @@
%}
// Divide Register Long (no special case since divisor != -1)
-instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
+instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
match(Set dst (DivL dst imm));
effect( TEMP tmp, TEMP tmp2, KILL cr );
ins_cost(1000);
@@ -8319,7 +8260,7 @@
%}
// Remainder Register Long (remainder fit into 32 bits)
-instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
+instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
match(Set dst (ModL dst imm));
effect( TEMP tmp, TEMP tmp2, KILL cr );
ins_cost(1000);
@@ -8387,7 +8328,7 @@
// Integer Shift Instructions
// Shift Left by one
-instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@@ -8399,7 +8340,7 @@
%}
// Shift Left by 8-bit immediate
-instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@@ -8411,7 +8352,7 @@
%}
// Shift Left by variable
-instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@@ -8423,7 +8364,7 @@
%}
// Arithmetic shift right by one
-instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@@ -8445,7 +8386,7 @@
%}
// Arithmetic Shift Right by 8-bit immediate
-instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@@ -8468,7 +8409,7 @@
%}
// Arithmetic Shift Right by variable
-instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@@ -8480,7 +8421,7 @@
%}
// Logical shift right by one
-instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@@ -8492,7 +8433,7 @@
%}
// Logical Shift Right by 8-bit immediate
-instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@@ -8506,7 +8447,7 @@
// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
// This idiom is used by the compiler for the i2b bytecode.
-instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
+instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
size(3);
@@ -8519,7 +8460,7 @@
// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
// This idiom is used by the compiler the i2s bytecode.
-instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
+instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
size(3);
@@ -8532,7 +8473,7 @@
// Logical Shift Right by variable
-instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@@ -8548,7 +8489,7 @@
//----------Integer Logical Instructions---------------------------------------
// And Instructions
// And Register with Register
-instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (AndI dst src));
effect(KILL cr);
@@ -8560,7 +8501,7 @@
%}
// And Register with Immediate
-instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (AndI dst src));
effect(KILL cr);
@@ -8572,7 +8513,7 @@
%}
// And Register with Memory
-instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
match(Set dst (AndI dst (LoadI src)));
effect(KILL cr);
@@ -8584,7 +8525,7 @@
%}
// And Memory with Register
-instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
match(Set dst (StoreI dst (AndI (LoadI dst) src)));
effect(KILL cr);
@@ -8610,7 +8551,7 @@
// Or Instructions
// Or Register with Register
-instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (OrI dst src));
effect(KILL cr);
@@ -8621,7 +8562,7 @@
ins_pipe( ialu_reg_reg );
%}
-instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
+instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
match(Set dst (OrI dst (CastP2X src)));
effect(KILL cr);
@@ -8634,7 +8575,7 @@
// Or Register with Immediate
-instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (OrI dst src));
effect(KILL cr);
@@ -8646,7 +8587,7 @@
%}
// Or Register with Memory
-instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
match(Set dst (OrI dst (LoadI src)));
effect(KILL cr);
@@ -8658,7 +8599,7 @@
%}
// Or Memory with Register
-instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
match(Set dst (StoreI dst (OrI (LoadI dst) src)));
effect(KILL cr);
@@ -8684,7 +8625,7 @@
// ROL/ROR
// ROL expand
-instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROL $dst, $shift" %}
@@ -8693,7 +8634,7 @@
ins_pipe( ialu_reg );
%}
-instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROL $dst, $shift" %}
@@ -8713,7 +8654,7 @@
// end of ROL expand
// ROL 32bit by one once
-instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
expand %{
@@ -8722,7 +8663,7 @@
%}
// ROL 32bit var by imm8 once
-instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
@@ -8750,7 +8691,7 @@
%}
// ROR expand
-instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROR $dst, $shift" %}
@@ -8759,7 +8700,7 @@
ins_pipe( ialu_reg );
%}
-instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
effect (USE_DEF dst, USE shift, KILL cr);
format %{ "ROR $dst, $shift" %}
@@ -8779,7 +8720,7 @@
// end of ROR expand
// ROR right once
-instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
expand %{
@@ -8788,7 +8729,7 @@
%}
// ROR 32bit by immI8 once
-instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
@@ -8817,7 +8758,7 @@
// Xor Instructions
// Xor Register with Register
-instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
match(Set dst (XorI dst src));
effect(KILL cr);
@@ -8829,7 +8770,7 @@
%}
// Xor Register with Immediate -1
-instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
+instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
match(Set dst (XorI dst imm));
size(2);
@@ -8841,7 +8782,7 @@
%}
// Xor Register with Immediate
-instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (XorI dst src));
effect(KILL cr);
@@ -8853,7 +8794,7 @@
%}
// Xor Register with Memory
-instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
match(Set dst (XorI dst (LoadI src)));
effect(KILL cr);
@@ -8865,7 +8806,7 @@
%}
// Xor Memory with Register
-instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
match(Set dst (StoreI dst (XorI (LoadI dst) src)));
effect(KILL cr);
@@ -8890,14 +8831,14 @@
//----------Convert Int to Boolean---------------------------------------------
-instruct movI_nocopy(eRegI dst, eRegI src) %{
+instruct movI_nocopy(rRegI dst, rRegI src) %{
effect( DEF dst, USE src );
format %{ "MOV $dst,$src" %}
ins_encode( enc_Copy( dst, src) );
ins_pipe( ialu_reg_reg );
%}
-instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
+instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
effect( USE_DEF dst, USE src, KILL cr );
size(4);
@@ -8908,7 +8849,7 @@
ins_pipe( ialu_reg_reg_long );
%}
-instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
+instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
match(Set dst (Conv2B src));
expand %{
@@ -8917,14 +8858,14 @@
%}
%}
-instruct movP_nocopy(eRegI dst, eRegP src) %{
+instruct movP_nocopy(rRegI dst, eRegP src) %{
effect( DEF dst, USE src );
format %{ "MOV $dst,$src" %}
ins_encode( enc_Copy( dst, src) );
ins_pipe( ialu_reg_reg );
%}
-instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
+instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
effect( USE_DEF dst, USE src, KILL cr );
format %{ "NEG $dst\n\t"
"ADC $dst,$src" %}
@@ -8933,7 +8874,7 @@
ins_pipe( ialu_reg_reg_long );
%}
-instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
+instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
match(Set dst (Conv2B src));
expand %{
@@ -8958,7 +8899,7 @@
ins_pipe( pipe_slow );
%}
-instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
+instruct cmpLTMask0( rRegI dst, immI0 zero, eFlagsReg cr ) %{
match(Set dst (CmpLTMask dst zero));
effect( DEF dst, KILL cr );
ins_cost(100);
@@ -9430,7 +9371,7 @@
%}
// Compare vs zero into -1,0,1
-instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (CmpD3 src1 zero));
effect(KILL cr, KILL rax);
@@ -9444,7 +9385,7 @@
%}
// Compare into -1,0,1
-instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (CmpD3 src1 src2));
effect(KILL cr, KILL rax);
@@ -10222,7 +10163,7 @@
%}
// Compare vs zero into -1,0,1
-instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE == 0);
match(Set dst (CmpF3 src1 zero));
effect(KILL cr, KILL rax);
@@ -10236,7 +10177,7 @@
%}
// Compare into -1,0,1
-instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE == 0);
match(Set dst (CmpF3 src1 src2));
effect(KILL cr, KILL rax);
@@ -11156,7 +11097,7 @@
ins_pipe( fpu_reg_mem );
%}
-instruct convI2D_reg(regD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, rRegI src) %{
predicate( UseSSE>=2 && !UseXmmI2D );
match(Set dst (ConvI2D src));
format %{ "CVTSI2SD $dst,$src" %}
@@ -11176,7 +11117,7 @@
ins_pipe( pipe_slow );
%}
-instruct convXI2D_reg(regD dst, eRegI src)
+instruct convXI2D_reg(regD dst, rRegI src)
%{
predicate( UseSSE>=2 && UseXmmI2D );
match(Set dst (ConvI2D src));
@@ -11264,7 +11205,7 @@
%}
// Convert an int to a float in xmm; no rounding step needed.
-instruct convI2F_reg(regF dst, eRegI src) %{
+instruct convI2F_reg(regF dst, rRegI src) %{
predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
match(Set dst (ConvI2F src));
format %{ "CVTSI2SS $dst, $src" %}
@@ -11274,7 +11215,7 @@
ins_pipe( pipe_slow );
%}
- instruct convXI2F_reg(regF dst, eRegI src)
+ instruct convXI2F_reg(regF dst, rRegI src)
%{
predicate( UseSSE>=2 && UseXmmI2F );
match(Set dst (ConvI2F src));
@@ -11288,7 +11229,7 @@
ins_pipe(pipe_slow); // XXX
%}
-instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
+instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
match(Set dst (ConvI2L src));
effect(KILL cr);
ins_cost(375);
@@ -11300,7 +11241,7 @@
%}
// Zero-extend convert int to long
-instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
+instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
match(Set dst (AndL (ConvI2L src) mask) );
effect( KILL flags );
ins_cost(250);
@@ -11380,7 +11321,7 @@
ins_pipe( pipe_slow );
%}
-instruct convL2I_reg( eRegI dst, eRegL src ) %{
+instruct convL2I_reg( rRegI dst, eRegL src ) %{
match(Set dst (ConvL2I src));
effect( DEF dst, USE src );
format %{ "MOV $dst,$src.lo" %}
@@ -11389,7 +11330,7 @@
%}
-instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
+instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(100);
@@ -11424,7 +11365,7 @@
ins_pipe( pipe_slow );
%}
-instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
+instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
@@ -11436,7 +11377,7 @@
ins_pipe( pipe_slow );
%}
-instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
+instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
match(Set dst (MoveI2F src));
effect( DEF dst, USE src );
@@ -11476,7 +11417,7 @@
ins_pipe( pipe_slow );
%}
-instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
+instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
predicate(UseSSE>=2);
match(Set dst (MoveI2F src));
effect( DEF dst, USE src );
@@ -11610,186 +11551,6 @@
ins_pipe( pipe_slow );
%}
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regD dst, regD src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate8B src));
- format %{ "MOVDQA $dst,$src\n\t"
- "PUNPCKLBW $dst,$dst\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode %{
- if ($dst$$reg != $src$$reg) {
- __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
- }
- __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regD dst, eRegI src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate8B src));
- format %{ "MOVD $dst,$src\n\t"
- "PUNPCKLBW $dst,$dst\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regD dst, immI0 zero) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate8B zero));
- format %{ "PXOR $dst,$dst\t! replicate8B" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regD dst, regD src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate4S src));
- format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
- ins_encode %{
- __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regD dst, eRegI src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate4S src));
- format %{ "MOVD $dst,$src\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regD dst, immI0 zero) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate4S zero));
- format %{ "PXOR $dst,$dst\t! replicate4S" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regD dst, regD src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate4C src));
- format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
- ins_encode %{
- __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regD dst, eRegI src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate4C src));
- format %{ "MOVD $dst,$src\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regD dst, immI0 zero) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate4C zero));
- format %{ "PXOR $dst,$dst\t! replicate4C" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regD dst, regD src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate2I src));
- format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
- ins_encode %{
- __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regD dst, eRegI src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate2I src));
- format %{ "MOVD $dst,$src\n\t"
- "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regD dst, immI0 zero) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate2I zero));
- format %{ "PXOR $dst,$dst\t! replicate2I" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regD dst, regD src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate2F src));
- format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode %{
- __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regF(regD dst, regF src) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate2F src));
- format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode %{
- __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immF0(regD dst, immF0 zero) %{
- predicate(UseSSE>=2);
- match(Set dst (Replicate2F zero));
- format %{ "PXOR $dst,$dst\t! replicate2F" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
// =======================================================================
// fast clearing of an array
@@ -11898,7 +11659,7 @@
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions
-instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
+instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
match(Set cr (CmpI op1 op2));
effect( DEF cr, USE op1, USE op2 );
format %{ "CMP $op1,$op2" %}
@@ -11907,7 +11668,7 @@
ins_pipe( ialu_cr_reg_reg );
%}
-instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
+instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
match(Set cr (CmpI op1 op2));
effect( DEF cr, USE op1 );
format %{ "CMP $op1,$op2" %}
@@ -11918,7 +11679,7 @@
%}
// Cisc-spilled version of cmpI_eReg
-instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
+instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
match(Set cr (CmpI op1 (LoadI op2)));
format %{ "CMP $op1,$op2" %}
@@ -11928,7 +11689,7 @@
ins_pipe( ialu_cr_reg_mem );
%}
-instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
+instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
match(Set cr (CmpI src zero));
effect( DEF cr, USE src );
@@ -11938,7 +11699,7 @@
ins_pipe( ialu_cr_reg_imm );
%}
-instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
+instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
match(Set cr (CmpI (AndI src con) zero));
format %{ "TEST $src,$con" %}
@@ -11947,7 +11708,7 @@
ins_pipe( ialu_cr_reg_imm );
%}
-instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
+instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
match(Set cr (CmpI (AndI src mem) zero));
format %{ "TEST $src,$mem" %}
@@ -11958,7 +11719,7 @@
// Unsigned compare Instructions; really, same as signed except they
// produce an eFlagsRegU instead of eFlagsReg.
-instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
+instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
match(Set cr (CmpU op1 op2));
format %{ "CMPu $op1,$op2" %}
@@ -11967,7 +11728,7 @@
ins_pipe( ialu_cr_reg_reg );
%}
-instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
+instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
match(Set cr (CmpU op1 op2));
format %{ "CMPu $op1,$op2" %}
@@ -11977,7 +11738,7 @@
%}
// // Cisc-spilled version of cmpU_eReg
-instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
+instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
match(Set cr (CmpU op1 (LoadI op2)));
format %{ "CMPu $op1,$op2" %}
@@ -11988,7 +11749,7 @@
%}
// // Cisc-spilled version of cmpU_eReg
-//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
+//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
// match(Set cr (CmpU (LoadI op1) op2));
//
// format %{ "CMPu $op1,$op2" %}
@@ -11997,7 +11758,7 @@
// ins_encode( OpcP, RegMem( op1, op2) );
//%}
-instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
+instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
match(Set cr (CmpU src zero));
format %{ "TESTu $src,$src" %}
@@ -12093,7 +11854,7 @@
// *** Min and Max using the conditional move are slower than the
// *** branch version on a Pentium III.
// // Conditional move for min
-//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
// effect( USE_DEF op2, USE op1, USE cr );
// format %{ "CMOVlt $op2,$op1\t! min" %}
// opcode(0x4C,0x0F);
@@ -12102,7 +11863,7 @@
//%}
//
//// Min Register with Register (P6 version)
-//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
// predicate(VM_Version::supports_cmov() );
// match(Set op2 (MinI op1 op2));
// ins_cost(200);
@@ -12114,7 +11875,7 @@
//%}
// Min Register with Register (generic version)
-instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
match(Set dst (MinI dst src));
effect(KILL flags);
ins_cost(300);
@@ -12129,7 +11890,7 @@
// *** Min and Max using the conditional move are slower than the
// *** branch version on a Pentium III.
// // Conditional move for max
-//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
// effect( USE_DEF op2, USE op1, USE cr );
// format %{ "CMOVgt $op2,$op1\t! max" %}
// opcode(0x4F,0x0F);
@@ -12138,7 +11899,7 @@
//%}
//
// // Max Register with Register (P6 version)
-//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
// predicate(VM_Version::supports_cmov() );
// match(Set op2 (MaxI op1 op2));
// ins_cost(200);
@@ -12150,7 +11911,7 @@
//%}
// Max Register with Register (generic version)
-instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
match(Set dst (MaxI dst src));
effect(KILL flags);
ins_cost(300);
@@ -12211,7 +11972,7 @@
// ============================================================================
// Branch Instructions
// Jump Table
-instruct jumpXtnd(eRegI switch_val) %{
+instruct jumpXtnd(rRegI switch_val) %{
match(Jump switch_val);
ins_cost(350);
format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
@@ -12629,7 +12390,7 @@
// Manifest a CmpL result in the normal flags. Only good for LT or GE
// compares. Can be used for LE or GT compares by reversing arguments.
// NOT GOOD FOR EQ/NE tests.
-instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
+instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
match( Set flags (CmpL src1 src2 ));
effect( TEMP tmp );
ins_cost(300);
@@ -12675,7 +12436,7 @@
%}
// Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
ins_cost(200);
@@ -12685,7 +12446,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
+instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
ins_cost(250);
@@ -12746,7 +12507,7 @@
//======
// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
-instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
+instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
match( Set flags (CmpL src zero ));
effect(TEMP tmp);
ins_cost(200);
@@ -12803,7 +12564,7 @@
%}
// Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
ins_cost(200);
@@ -12813,7 +12574,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
+instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
ins_cost(250);
@@ -12875,7 +12636,7 @@
//======
// Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
// Same as cmpL_reg_flags_LEGT except must negate src
-instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
+instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
match( Set flags (CmpL src zero ));
effect( TEMP tmp );
ins_cost(300);
@@ -12889,7 +12650,7 @@
// Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
// requires a commuted test to get the same result.
-instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
+instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
match( Set flags (CmpL src1 src2 ));
effect( TEMP tmp );
ins_cost(300);
@@ -12936,7 +12697,7 @@
%}
// Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
ins_cost(200);
@@ -12946,7 +12707,7 @@
ins_pipe( pipe_cmov_reg );
%}
-instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
+instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
ins_cost(250);
@@ -13275,11 +13036,11 @@
// ---------EXAMPLE----------------------------------------------------------
//
// // pertinent parts of existing instructions in architecture description
-// instruct movI(eRegI dst, eRegI src) %{
+// instruct movI(rRegI dst, rRegI src) %{
// match(Set dst (CopyI src));
// %}
//
-// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
// match(Set dst (AddI dst src));
// effect(KILL cr);
// %}
@@ -13324,11 +13085,11 @@
// %}
// // Change load of spilled value to only a spill
-// instruct storeI(memory mem, eRegI src) %{
+// instruct storeI(memory mem, rRegI src) %{
// match(Set mem (StoreI mem src));
// %}
//
-// instruct loadI(eRegI dst, memory mem) %{
+// instruct loadI(rRegI dst, memory mem) %{
// match(Set dst (LoadI mem));
// %}
//
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Thu Jun 28 10:35:28 2012 -0700
@@ -131,102 +131,6 @@
// Floating Point Registers
-// XMM registers. 128-bit registers or 4 words each, labeled (a)-d.
-// Word a in each register holds a Float, words ab hold a Double. We
-// currently do not use the SIMD capabilities, so registers cd are
-// unused at the moment.
-// XMM8-XMM15 must be encoded with REX.
-// Linux ABI: No register preserved across function calls
-// XMM0-XMM7 might hold parameters
-// Windows ABI: XMM6-XMM15 preserved across function calls
-// XMM0-XMM3 might hold parameters
-
-reg_def XMM0 (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
-reg_def XMM0_H (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
-
-reg_def XMM1 (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
-reg_def XMM1_H (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
-
-reg_def XMM2 (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
-reg_def XMM2_H (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
-
-reg_def XMM3 (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
-reg_def XMM3_H (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
-
-reg_def XMM4 (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
-reg_def XMM4_H (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
-
-reg_def XMM5 (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
-reg_def XMM5_H (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
-
-#ifdef _WIN64
-
-reg_def XMM6 (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6_H (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
-
-reg_def XMM7 (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7_H (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
-
-reg_def XMM8 (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
-reg_def XMM8_H (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
-
-reg_def XMM9 (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
-reg_def XMM9_H (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
-
-reg_def XMM10 (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
-
-reg_def XMM11 (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
-
-reg_def XMM12 (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
-
-reg_def XMM13 (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
-
-reg_def XMM14 (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
-
-reg_def XMM15 (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
-
-#else
-
-reg_def XMM6 (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6_H (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
-
-reg_def XMM7 (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7_H (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
-
-reg_def XMM8 (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
-reg_def XMM8_H (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
-
-reg_def XMM9 (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
-reg_def XMM9_H (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
-
-reg_def XMM10 (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
-
-reg_def XMM11 (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
-
-reg_def XMM12 (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
-
-reg_def XMM13 (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
-
-reg_def XMM14 (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
-
-reg_def XMM15 (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
-
-#endif // _WIN64
-
-reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
-
// Specify priority of register selection within phases of register
// allocation. Highest priority is first. A useful heuristic is to
// give registers a low priority when they are required by machine
@@ -252,26 +156,6 @@
R15, R15_H,
RSP, RSP_H);
-// XXX probably use 8-15 first on Linux
-alloc_class chunk1(XMM0, XMM0_H,
- XMM1, XMM1_H,
- XMM2, XMM2_H,
- XMM3, XMM3_H,
- XMM4, XMM4_H,
- XMM5, XMM5_H,
- XMM6, XMM6_H,
- XMM7, XMM7_H,
- XMM8, XMM8_H,
- XMM9, XMM9_H,
- XMM10, XMM10_H,
- XMM11, XMM11_H,
- XMM12, XMM12_H,
- XMM13, XMM13_H,
- XMM14, XMM14_H,
- XMM15, XMM15_H);
-
-alloc_class chunk2(RFLAGS);
-
//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
@@ -501,46 +385,7 @@
// Singleton class for instruction pointer
// reg_class ip_reg(RIP);
-// Singleton class for condition codes
-reg_class int_flags(RFLAGS);
-
-// Class for all float registers
-reg_class float_reg(XMM0,
- XMM1,
- XMM2,
- XMM3,
- XMM4,
- XMM5,
- XMM6,
- XMM7,
- XMM8,
- XMM9,
- XMM10,
- XMM11,
- XMM12,
- XMM13,
- XMM14,
- XMM15);
-
-// Class for all double registers
-reg_class double_reg(XMM0, XMM0_H,
- XMM1, XMM1_H,
- XMM2, XMM2_H,
- XMM3, XMM3_H,
- XMM4, XMM4_H,
- XMM5, XMM5_H,
- XMM6, XMM6_H,
- XMM7, XMM7_H,
- XMM8, XMM8_H,
- XMM9, XMM9_H,
- XMM10, XMM10_H,
- XMM11, XMM11_H,
- XMM12, XMM12_H,
- XMM13, XMM13_H,
- XMM14, XMM14_H,
- XMM15, XMM15_H);
-%}
-
+%}
//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
@@ -1027,12 +872,84 @@
return rc_float;
}
+// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+ int src_hi, int dst_hi, uint ireg, outputStream* st);
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+ int stack_offset, int reg, uint ireg, outputStream* st);
+
+static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
+ int dst_offset, uint ireg, outputStream* st) {
+ if (cbuf) {
+ MacroAssembler _masm(cbuf);
+ switch (ireg) {
+ case Op_VecS:
+ __ movq(Address(rsp, -8), rax);
+ __ movl(rax, Address(rsp, src_offset));
+ __ movl(Address(rsp, dst_offset), rax);
+ __ movq(rax, Address(rsp, -8));
+ break;
+ case Op_VecD:
+ __ pushq(Address(rsp, src_offset));
+ __ popq (Address(rsp, dst_offset));
+ break;
+ case Op_VecX:
+ __ pushq(Address(rsp, src_offset));
+ __ popq (Address(rsp, dst_offset));
+ __ pushq(Address(rsp, src_offset+8));
+ __ popq (Address(rsp, dst_offset+8));
+ break;
+ case Op_VecY:
+ __ vmovdqu(Address(rsp, -32), xmm0);
+ __ vmovdqu(xmm0, Address(rsp, src_offset));
+ __ vmovdqu(Address(rsp, dst_offset), xmm0);
+ __ vmovdqu(xmm0, Address(rsp, -32));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+#ifndef PRODUCT
+ } else {
+ switch (ireg) {
+ case Op_VecS:
+ st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
+ "movl rax, [rsp + #%d]\n\t"
+ "movl [rsp + #%d], rax\n\t"
+ "movq rax, [rsp - #8]",
+ src_offset, dst_offset);
+ break;
+ case Op_VecD:
+ st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
+ "popq [rsp + #%d]",
+ src_offset, dst_offset);
+ break;
+ case Op_VecX:
+ st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
+ "popq [rsp + #%d]\n\t"
+ "pushq [rsp + #%d]\n\t"
+ "popq [rsp + #%d]",
+ src_offset, dst_offset, src_offset+8, dst_offset+8);
+ break;
+ case Op_VecY:
+ st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
+ "vmovdqu xmm0, [rsp + #%d]\n\t"
+ "vmovdqu [rsp + #%d], xmm0\n\t"
+ "vmovdqu xmm0, [rsp - #32]",
+ src_offset, dst_offset);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+#endif
+ }
+}
+
uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
PhaseRegAlloc* ra_,
bool do_size,
- outputStream* st) const
-{
-
+ outputStream* st) const {
+ assert(cbuf != NULL || st != NULL, "sanity");
// Get registers to move
OptoReg::Name src_second = ra_->get_reg_second(in(1));
OptoReg::Name src_first = ra_->get_reg_first(in(1));
@@ -1050,7 +967,30 @@
if (src_first == dst_first && src_second == dst_second) {
// Self copy, no move
return 0;
- } else if (src_first_rc == rc_stack) {
+ }
+ if (bottom_type()->isa_vect() != NULL) {
+ uint ireg = ideal_reg();
+ assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+ assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
+ if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+ // mem -> mem
+ int src_offset = ra_->reg2offset(src_first);
+ int dst_offset = ra_->reg2offset(dst_first);
+ vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
+ } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
+ vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
+ } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
+ int stack_offset = ra_->reg2offset(dst_first);
+ vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
+ } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
+ int stack_offset = ra_->reg2offset(src_first);
+ vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st);
+ } else {
+ ShouldNotReachHere();
+ }
+ return 0;
+ }
+ if (src_first_rc == rc_stack) {
// mem ->
if (dst_first_rc == rc_stack) {
// mem -> mem
@@ -1061,23 +1001,16 @@
int src_offset = ra_->reg2offset(src_first);
int dst_offset = ra_->reg2offset(dst_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xFF);
- encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
-
- emit_opcode(*cbuf, 0x8F);
- encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
-
+ MacroAssembler _masm(cbuf);
+ __ pushq(Address(rsp, src_offset));
+ __ popq (Address(rsp, dst_offset));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
- "popq [rsp + #%d]",
- src_offset,
- dst_offset);
+ "popq [rsp + #%d]",
+ src_offset, dst_offset);
#endif
}
- return
- 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
- 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1086,46 +1019,22 @@
int src_offset = ra_->reg2offset(src_first);
int dst_offset = ra_->reg2offset(dst_first);
if (cbuf) {
- emit_opcode(*cbuf, Assembler::REX_W);
- emit_opcode(*cbuf, 0x89);
- emit_opcode(*cbuf, 0x44);
- emit_opcode(*cbuf, 0x24);
- emit_opcode(*cbuf, 0xF8);
-
- emit_opcode(*cbuf, 0x8B);
- encode_RegMem(*cbuf,
- RAX_enc,
- RSP_enc, 0x4, 0, src_offset,
- false);
-
- emit_opcode(*cbuf, 0x89);
- encode_RegMem(*cbuf,
- RAX_enc,
- RSP_enc, 0x4, 0, dst_offset,
- false);
-
- emit_opcode(*cbuf, Assembler::REX_W);
- emit_opcode(*cbuf, 0x8B);
- emit_opcode(*cbuf, 0x44);
- emit_opcode(*cbuf, 0x24);
- emit_opcode(*cbuf, 0xF8);
-
+ MacroAssembler _masm(cbuf);
+ __ movq(Address(rsp, -8), rax);
+ __ movl(rax, Address(rsp, src_offset));
+ __ movl(Address(rsp, dst_offset), rax);
+ __ movq(rax, Address(rsp, -8));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
- "movl rax, [rsp + #%d]\n\t"
- "movl [rsp + #%d], rax\n\t"
- "movq rax, [rsp - #8]",
- src_offset,
- dst_offset);
+ "movl rax, [rsp + #%d]\n\t"
+ "movl [rsp + #%d], rax\n\t"
+ "movq rax, [rsp - #8]",
+ src_offset, dst_offset);
#endif
}
- return
- 5 + // movq
- 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
- 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
- 5; // movq
}
+ return 0;
} else if (dst_first_rc == rc_int) {
// mem -> gpr
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1133,52 +1042,32 @@
// 64-bit
int offset = ra_->reg2offset(src_first);
if (cbuf) {
- if (Matcher::_regEncode[dst_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WR);
- }
- emit_opcode(*cbuf, 0x8B);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[dst_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movq %s, [rsp + #%d]\t# spill",
Matcher::regName[dst_first],
offset);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(src_first);
if (cbuf) {
- if (Matcher::_regEncode[dst_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x8B);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[dst_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movl %s, [rsp + #%d]\t# spill",
Matcher::regName[dst_first],
offset);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] < 8)
- ? 3
- : 4); // REX
}
+ return 0;
} else if (dst_first_rc == rc_float) {
// mem-> xmm
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1189,18 +1078,13 @@
MacroAssembler _masm(cbuf);
__ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("%s %s, [rsp + #%d]\t# spill",
UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
Matcher::regName[dst_first],
offset);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] >= 8)
- ? 6
- : (5 + ((UseAVX>0)?1:0))); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1210,18 +1094,14 @@
MacroAssembler _masm(cbuf);
__ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movss %s, [rsp + #%d]\t# spill",
Matcher::regName[dst_first],
offset);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] >= 8)
- ? 6
- : (5 + ((UseAVX>0)?1:0))); // REX
}
+ return 0;
}
} else if (src_first_rc == rc_int) {
// gpr ->
@@ -1232,113 +1112,65 @@
// 64-bit
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WR);
- }
- emit_opcode(*cbuf, 0x89);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[src_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movq [rsp + #%d], %s\t# spill",
offset,
Matcher::regName[src_first]);
#endif
}
- return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x89);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[src_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movl [rsp + #%d], %s\t# spill",
offset,
Matcher::regName[src_first]);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] < 8)
- ? 3
- : 4); // REX
}
+ return 0;
} else if (dst_first_rc == rc_int) {
// gpr -> gpr
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WB);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_WR);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WRB);
- }
- }
- emit_opcode(*cbuf, 0x8B);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movq(as_Register(Matcher::_regEncode[dst_first]),
+ as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movq %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return 3; // REX
+ return 0;
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x8B);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movl(as_Register(Matcher::_regEncode[dst_first]),
+ as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movl %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 2
- : 3; // REX
+ return 0;
}
} else if (dst_first_rc == rc_float) {
// gpr -> xmm
@@ -1349,13 +1181,12 @@
MacroAssembler _masm(cbuf);
__ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movdq %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return 5; // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1364,17 +1195,14 @@
MacroAssembler _masm(cbuf);
__ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movdl %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return
- (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
- ? 5
- : (4 + ((UseAVX>0)?1:0)); // REX
}
+ return 0;
}
} else if (src_first_rc == rc_float) {
// xmm ->
@@ -1388,17 +1216,12 @@
MacroAssembler _masm(cbuf);
__ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movsd [rsp + #%d], %s\t# spill",
offset,
Matcher::regName[src_first]);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] >= 8)
- ? 6
- : (5 + ((UseAVX>0)?1:0))); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1408,18 +1231,14 @@
MacroAssembler _masm(cbuf);
__ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movss [rsp + #%d], %s\t# spill",
offset,
Matcher::regName[src_first]);
#endif
}
- return
- ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] >=8)
- ? 6
- : (5 + ((UseAVX>0)?1:0))); // REX
}
+ return 0;
} else if (dst_first_rc == rc_int) {
// xmm -> gpr
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1429,13 +1248,12 @@
MacroAssembler _masm(cbuf);
__ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movdq %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return 5; // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1444,17 +1262,14 @@
MacroAssembler _masm(cbuf);
__ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("movdl %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return
- (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
- ? 5
- : (4 + ((UseAVX>0)?1:0)); // REX
}
+ return 0;
} else if (dst_first_rc == rc_float) {
// xmm -> xmm
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1464,17 +1279,13 @@
MacroAssembler _masm(cbuf);
__ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("%s %s, %s\t# spill",
UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return
- (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
- ? 5
- : (4 + ((UseAVX>0)?1:0)); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1483,42 +1294,35 @@
MacroAssembler _masm(cbuf);
__ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
- } else if (!do_size) {
+ } else {
st->print("%s %s, %s\t# spill",
UseXmmRegToRegMoveAll ? "movaps" : "movss ",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
- return ((UseAVX>0) ? 5:
- ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
- ? (UseXmmRegToRegMoveAll ? 4 : 5)
- : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
}
+ return 0;
}
}
assert(0," foo ");
Unimplemented();
-
return 0;
}
#ifndef PRODUCT
-void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
-{
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
implementation(NULL, ra_, false, st);
}
#endif
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
-{
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
implementation(&cbuf, ra_, false, NULL);
}
-uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
-{
- return implementation(NULL, ra_, true, NULL);
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+ return MachNode::size(ra_);
}
//=============================================================================
@@ -1735,16 +1539,6 @@
return true;
}
-// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
- return 8;
-}
-
-// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
- return Op_RegD;
-}
-
// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
@@ -1831,21 +1625,21 @@
bool Matcher::can_be_java_arg(int reg)
{
return
- reg == RDI_num || reg == RDI_H_num ||
- reg == RSI_num || reg == RSI_H_num ||
- reg == RDX_num || reg == RDX_H_num ||
- reg == RCX_num || reg == RCX_H_num ||
- reg == R8_num || reg == R8_H_num ||
- reg == R9_num || reg == R9_H_num ||
- reg == R12_num || reg == R12_H_num ||
- reg == XMM0_num || reg == XMM0_H_num ||
- reg == XMM1_num || reg == XMM1_H_num ||
- reg == XMM2_num || reg == XMM2_H_num ||
- reg == XMM3_num || reg == XMM3_H_num ||
- reg == XMM4_num || reg == XMM4_H_num ||
- reg == XMM5_num || reg == XMM5_H_num ||
- reg == XMM6_num || reg == XMM6_H_num ||
- reg == XMM7_num || reg == XMM7_H_num;
+ reg == RDI_num || reg == RDI_H_num ||
+ reg == RSI_num || reg == RSI_H_num ||
+ reg == RDX_num || reg == RDX_H_num ||
+ reg == RCX_num || reg == RCX_H_num ||
+ reg == R8_num || reg == R8_H_num ||
+ reg == R9_num || reg == R9_H_num ||
+ reg == R12_num || reg == R12_H_num ||
+ reg == XMM0_num || reg == XMM0b_num ||
+ reg == XMM1_num || reg == XMM1b_num ||
+ reg == XMM2_num || reg == XMM2b_num ||
+ reg == XMM3_num || reg == XMM3b_num ||
+ reg == XMM4_num || reg == XMM4b_num ||
+ reg == XMM5_num || reg == XMM5b_num ||
+ reg == XMM6_num || reg == XMM6b_num ||
+ reg == XMM7_num || reg == XMM7b_num;
}
bool Matcher::is_spillable_arg(int reg)
@@ -3220,10 +3014,11 @@
OptoReg::Bad, // Op_RegI
RAX_H_num, // Op_RegP
OptoReg::Bad, // Op_RegF
- XMM0_H_num, // Op_RegD
+ XMM0b_num, // Op_RegD
RAX_H_num // Op_RegL
};
- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
+ // Excluded flags and vector registers.
+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
%}
%}
@@ -3985,7 +3780,6 @@
interface(REG_INTER);
%}
-
//----------Memory Operands----------------------------------------------------
// Direct Memory Operand
// operand direct(immP addr)
@@ -5416,61 +5210,6 @@
ins_pipe(pipe_slow); // XXX
%}
-// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regD dst, memory mem) %{
- match(Set dst (Load8B mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed8B" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Short to XMM register
-instruct loadA4S(regD dst, memory mem) %{
- match(Set dst (Load4S mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed4S" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Char to XMM register
-instruct loadA4C(regD dst, memory mem) %{
- match(Set dst (Load4C mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed4C" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Integer to XMM register
-instruct load2IU(regD dst, memory mem) %{
- match(Set dst (Load2I mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed2I" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Single to XMM
-instruct loadA2F(regD dst, memory mem) %{
- match(Set dst (Load2F mem));
- ins_cost(125);
- format %{ "MOVQ $dst,$mem\t! packed2F" %}
- ins_encode %{
- __ movq($dst$$XMMRegister, $mem$$Address);
- %}
- ins_pipe( pipe_slow );
-%}
-
// Load Effective Address
instruct leaP8(rRegP dst, indOffset8 mem)
%{
@@ -6200,39 +5939,6 @@
ins_pipe(ialu_mem_imm);
%}
-// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regD src) %{
- match(Set mem (Store8B mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed8B" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regD src) %{
- match(Set mem (Store4C mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed4C" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regD src) %{
- match(Set mem (Store2I mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed2I" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
// Store CMS card-mark Immediate
instruct storeImmCM0_reg(memory mem, immI0 zero)
%{
@@ -6258,17 +5964,6 @@
ins_pipe(ialu_mem_imm);
%}
-// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regD src) %{
- match(Set mem (Store2F mem src));
- ins_cost(145);
- format %{ "MOVQ $mem,$src\t! packed2F" %}
- ins_encode %{
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
- ins_pipe( pipe_slow );
-%}
-
// Store Float
instruct storeF(memory mem, regF src)
%{
@@ -10377,172 +10072,6 @@
ins_pipe( pipe_slow );
%}
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regD dst, regD src) %{
- match(Set dst (Replicate8B src));
- format %{ "MOVDQA $dst,$src\n\t"
- "PUNPCKLBW $dst,$dst\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode %{
- if ($dst$$reg != $src$$reg) {
- __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
- }
- __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_rRegI(regD dst, rRegI src) %{
- match(Set dst (Replicate8B src));
- format %{ "MOVD $dst,$src\n\t"
- "PUNPCKLBW $dst,$dst\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regD dst, immI0 zero) %{
- match(Set dst (Replicate8B zero));
- format %{ "PXOR $dst,$dst\t! replicate8B" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regD dst, regD src) %{
- match(Set dst (Replicate4S src));
- format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
- ins_encode %{
- __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_rRegI(regD dst, rRegI src) %{
- match(Set dst (Replicate4S src));
- format %{ "MOVD $dst,$src\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regD dst, immI0 zero) %{
- match(Set dst (Replicate4S zero));
- format %{ "PXOR $dst,$dst\t! replicate4S" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regD dst, regD src) %{
- match(Set dst (Replicate4C src));
- format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
- ins_encode %{
- __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_rRegI(regD dst, rRegI src) %{
- match(Set dst (Replicate4C src));
- format %{ "MOVD $dst,$src\n\t"
- "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regD dst, immI0 zero) %{
- match(Set dst (Replicate4C zero));
- format %{ "PXOR $dst,$dst\t! replicate4C" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regD dst, regD src) %{
- match(Set dst (Replicate2I src));
- format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
- ins_encode %{
- __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_rRegI(regD dst, rRegI src) %{
- match(Set dst (Replicate2I src));
- format %{ "MOVD $dst,$src\n\t"
- "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
- ins_encode %{
- __ movdl($dst$$XMMRegister, $src$$Register);
- __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regD dst, immI0 zero) %{
- match(Set dst (Replicate2I zero));
- format %{ "PXOR $dst,$dst\t! replicate2I" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regD dst, regD src) %{
- match(Set dst (Replicate2F src));
- format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode %{
- __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regF(regD dst, regF src) %{
- match(Set dst (Replicate2F src));
- format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode %{
- __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immF0(regD dst, immF0 zero) %{
- match(Set dst (Replicate2F zero));
- format %{ "PXOR $dst,$dst\t! replicate2F" %}
- ins_encode %{
- __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
- %}
- ins_pipe( fpu_reg_reg );
-%}
-
// =======================================================================
// fast clearing of an array
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -516,7 +516,12 @@
}
}
- if (thread->thread_state() == _thread_in_Java) {
+ // We test if stub is already set (by the stack overflow code
+ // above) so it is not overwritten by the code that follows. This
+ // check is not required on other platforms, because on other
+ // platforms we check for SIGSEGV only or SIGBUS only, where here
+ // we have to check for both SIGSEGV and SIGBUS.
+ if (thread->thread_state() == _thread_in_Java && stub == NULL) {
// Java thread running in Java code => find exception handler if any
// a fault inside compiled code, the interpreter, or a stub
--- a/hotspot/src/share/vm/adlc/adlparse.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/adlparse.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -115,6 +115,12 @@
parse_err(SYNERR, "expected one of - instruct, operand, ins_attrib, op_attrib, source, register, pipeline, encode\n Found %s",ident);
}
}
+ // Add reg_class spill_regs after parsing.
+ RegisterForm *regBlock = _AD.get_registers();
+ if (regBlock == NULL) {
+ parse_err(SEMERR, "Did not declare 'register' definitions");
+ }
+ regBlock->addSpillRegClass();
// Done with parsing, check consistency.
@@ -768,11 +774,12 @@
//------------------------------reg_parse--------------------------------------
void ADLParser::reg_parse(void) {
-
- // Create the RegisterForm for the architecture description.
- RegisterForm *regBlock = new RegisterForm(); // Build new Source object
- regBlock->_linenum = linenum();
- _AD.addForm(regBlock);
+ RegisterForm *regBlock = _AD.get_registers(); // Information about registers encoding
+ if (regBlock == NULL) {
+ // Create the RegisterForm for the architecture description.
+ regBlock = new RegisterForm(); // Build new Source object
+ _AD.addForm(regBlock);
+ }
skipws(); // Skip leading whitespace
if (_curchar == '%' && *(_ptr+1) == '{') {
@@ -796,15 +803,11 @@
parse_err(SYNERR, "Missing %c{ ... %c} block after register keyword.\n",'%','%');
return;
}
-
- // Add reg_class spill_regs
- regBlock->addSpillRegClass();
}
//------------------------------encode_parse-----------------------------------
void ADLParser::encode_parse(void) {
EncodeForm *encBlock; // Information about instruction/operand encoding
- char *desc = NULL; // String representation of encode rule
_AD.getForm(&encBlock);
if ( encBlock == NULL) {
--- a/hotspot/src/share/vm/adlc/archDesc.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/archDesc.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
//
-// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@@ -911,12 +911,24 @@
// Find last character in idealOp, it specifies the type
char last_char = 0;
const char *ptr = idealOp;
- for( ; *ptr != '\0'; ++ptr) {
+ for (; *ptr != '\0'; ++ptr) {
last_char = *ptr;
}
+ // Match Vector types.
+ if (strncmp(idealOp, "Vec",3)==0) {
+ switch(last_char) {
+ case 'S': return "TypeVect::VECTS";
+ case 'D': return "TypeVect::VECTD";
+ case 'X': return "TypeVect::VECTX";
+ case 'Y': return "TypeVect::VECTY";
+ default:
+ internal_err("Vector type %s with unrecognized type\n",idealOp);
+ }
+ }
+
// !!!!!
- switch( last_char ) {
+ switch(last_char) {
case 'I': return "TypeInt::INT";
case 'P': return "TypePtr::BOTTOM";
case 'N': return "TypeNarrowOop::BOTTOM";
--- a/hotspot/src/share/vm/adlc/forms.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/forms.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -265,47 +265,22 @@
if( strcmp(opType,"LoadN")==0 ) return Form::idealN;
if( strcmp(opType,"LoadRange")==0 ) return Form::idealI;
if( strcmp(opType,"LoadS")==0 ) return Form::idealS;
- if( strcmp(opType,"Load16B")==0 ) return Form::idealB;
- if( strcmp(opType,"Load8B")==0 ) return Form::idealB;
- if( strcmp(opType,"Load4B")==0 ) return Form::idealB;
- if( strcmp(opType,"Load8C")==0 ) return Form::idealC;
- if( strcmp(opType,"Load4C")==0 ) return Form::idealC;
- if( strcmp(opType,"Load2C")==0 ) return Form::idealC;
- if( strcmp(opType,"Load8S")==0 ) return Form::idealS;
- if( strcmp(opType,"Load4S")==0 ) return Form::idealS;
- if( strcmp(opType,"Load2S")==0 ) return Form::idealS;
- if( strcmp(opType,"Load2D")==0 ) return Form::idealD;
- if( strcmp(opType,"Load4F")==0 ) return Form::idealF;
- if( strcmp(opType,"Load2F")==0 ) return Form::idealF;
- if( strcmp(opType,"Load4I")==0 ) return Form::idealI;
- if( strcmp(opType,"Load2I")==0 ) return Form::idealI;
- if( strcmp(opType,"Load2L")==0 ) return Form::idealL;
+ if( strcmp(opType,"LoadVector")==0 ) return Form::idealV;
assert( strcmp(opType,"Load") != 0, "Must type Loads" );
return Form::none;
}
Form::DataType Form::is_store_to_memory(const char *opType) const {
if( strcmp(opType,"StoreB")==0) return Form::idealB;
- if( strcmp(opType,"StoreCM")==0) return Form::idealB;
+ if( strcmp(opType,"StoreCM")==0) return Form::idealB;
if( strcmp(opType,"StoreC")==0) return Form::idealC;
if( strcmp(opType,"StoreD")==0) return Form::idealD;
if( strcmp(opType,"StoreF")==0) return Form::idealF;
if( strcmp(opType,"StoreI")==0) return Form::idealI;
if( strcmp(opType,"StoreL")==0) return Form::idealL;
if( strcmp(opType,"StoreP")==0) return Form::idealP;
- if( strcmp(opType,"StoreN")==0) return Form::idealN;
- if( strcmp(opType,"Store16B")==0) return Form::idealB;
- if( strcmp(opType,"Store8B")==0) return Form::idealB;
- if( strcmp(opType,"Store4B")==0) return Form::idealB;
- if( strcmp(opType,"Store8C")==0) return Form::idealC;
- if( strcmp(opType,"Store4C")==0) return Form::idealC;
- if( strcmp(opType,"Store2C")==0) return Form::idealC;
- if( strcmp(opType,"Store2D")==0) return Form::idealD;
- if( strcmp(opType,"Store4F")==0) return Form::idealF;
- if( strcmp(opType,"Store2F")==0) return Form::idealF;
- if( strcmp(opType,"Store4I")==0) return Form::idealI;
- if( strcmp(opType,"Store2I")==0) return Form::idealI;
- if( strcmp(opType,"Store2L")==0) return Form::idealL;
+ if( strcmp(opType,"StoreN")==0) return Form::idealN;
+ if( strcmp(opType,"StoreVector")==0 ) return Form::idealV;
assert( strcmp(opType,"Store") != 0, "Must type Stores" );
return Form::none;
}
--- a/hotspot/src/share/vm/adlc/forms.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/forms.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -172,7 +172,8 @@
idealB = 6, // Byte type
idealC = 7, // Char type
idealS = 8, // String type
- idealN = 9 // Narrow oop types
+ idealN = 9, // Narrow oop types
+ idealV = 10 // Vector type
};
// Convert ideal name to a DataType, return DataType::none if not a 'ConX'
Form::DataType ideal_to_const_type(const char *ideal_type_name) const;
--- a/hotspot/src/share/vm/adlc/formsopt.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/formsopt.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -66,7 +66,7 @@
// for spill-slots/regs.
void RegisterForm::addSpillRegClass() {
// Stack slots start at the next available even register number.
- _reg_ctr = (_reg_ctr+1) & ~1;
+ _reg_ctr = (_reg_ctr+7) & ~7;
const char *rc_name = "stack_slots";
RegClass *reg_class = new RegClass(rc_name);
reg_class->_stack_or_reg = true;
@@ -150,9 +150,14 @@
int RegisterForm::RegMask_Size() {
// Need at least this many words
int words_for_regs = (_reg_ctr + 31)>>5;
- // Add a few for incoming & outgoing arguments to calls.
+ // The array of Register Mask bits should be large enough to cover
+ // all the machine registers and all parameters that need to be passed
+ // on the stack (stack registers) up to some interesting limit. Methods
+ // that need more parameters will NOT be compiled. On Intel, the limit
+ // is something like 90+ parameters.
+ // Add a few (3 words == 96 bits) for incoming & outgoing arguments to calls.
// Round up to the next doubleword size.
- return (words_for_regs + 2 + 1) & ~1;
+ return (words_for_regs + 3 + 1) & ~1;
}
void RegisterForm::dump() { // Debug printer
--- a/hotspot/src/share/vm/adlc/formssel.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/formssel.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -432,6 +432,14 @@
return _matrule->is_ideal_store();
}
+// Return 'true' if this instruction matches an ideal vector node
+bool InstructForm::is_vector() const {
+ if( _matrule == NULL ) return false;
+
+ return _matrule->is_vector();
+}
+
+
// Return the input register that must match the output register
// If this is not required, return 0
uint InstructForm::two_address(FormDict &globals) {
@@ -751,6 +759,9 @@
if (needs_base_oop_edge(globals)) return true;
+ if (is_vector()) return true;
+ if (is_mach_constant()) return true;
+
return false;
}
@@ -3381,11 +3392,8 @@
"StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" ,
"StoreB","StoreC","Store" ,"StoreFP",
"LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" ,
- "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
- "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B",
- "Store8B","Store4B","Store8C","Store4C","Store2C",
- "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
- "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S",
+ "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
+ "StoreVector", "LoadVector",
"LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
"LoadPLocked",
"StorePConditional", "StoreIConditional", "StoreLConditional",
@@ -3822,6 +3830,10 @@
strcmp(opType,"RegL")==0 ||
strcmp(opType,"RegF")==0 ||
strcmp(opType,"RegD")==0 ||
+ strcmp(opType,"VecS")==0 ||
+ strcmp(opType,"VecD")==0 ||
+ strcmp(opType,"VecX")==0 ||
+ strcmp(opType,"VecY")==0 ||
strcmp(opType,"Reg" )==0) ) {
return 1;
}
@@ -3938,19 +3950,12 @@
strcmp(opType,"ReverseBytesL")==0 ||
strcmp(opType,"ReverseBytesUS")==0 ||
strcmp(opType,"ReverseBytesS")==0 ||
- strcmp(opType,"Replicate16B")==0 ||
- strcmp(opType,"Replicate8B")==0 ||
- strcmp(opType,"Replicate4B")==0 ||
- strcmp(opType,"Replicate8C")==0 ||
- strcmp(opType,"Replicate4C")==0 ||
- strcmp(opType,"Replicate8S")==0 ||
- strcmp(opType,"Replicate4S")==0 ||
- strcmp(opType,"Replicate4I")==0 ||
- strcmp(opType,"Replicate2I")==0 ||
- strcmp(opType,"Replicate2L")==0 ||
- strcmp(opType,"Replicate4F")==0 ||
- strcmp(opType,"Replicate2F")==0 ||
- strcmp(opType,"Replicate2D")==0 ||
+ strcmp(opType,"ReplicateB")==0 ||
+ strcmp(opType,"ReplicateS")==0 ||
+ strcmp(opType,"ReplicateI")==0 ||
+ strcmp(opType,"ReplicateL")==0 ||
+ strcmp(opType,"ReplicateF")==0 ||
+ strcmp(opType,"ReplicateD")==0 ||
0 /* 0 to line up columns nicely */ )
return 1;
}
@@ -4034,6 +4039,23 @@
return ideal_load;
}
+bool MatchRule::is_vector() const {
+ if( _rChild ) {
+ const char *opType = _rChild->_opType;
+ if( strcmp(opType,"ReplicateB")==0 ||
+ strcmp(opType,"ReplicateS")==0 ||
+ strcmp(opType,"ReplicateI")==0 ||
+ strcmp(opType,"ReplicateL")==0 ||
+ strcmp(opType,"ReplicateF")==0 ||
+ strcmp(opType,"ReplicateD")==0 ||
+ strcmp(opType,"LoadVector")==0 ||
+ strcmp(opType,"StoreVector")==0 ||
+ 0 /* 0 to line up columns nicely */ )
+ return true;
+ }
+ return false;
+}
+
bool MatchRule::skip_antidep_check() const {
// Some loads operate on what is effectively immutable memory so we
--- a/hotspot/src/share/vm/adlc/formssel.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/formssel.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -160,6 +160,7 @@
virtual bool is_ideal_safepoint() const; // node matches 'SafePoint'
virtual bool is_ideal_nop() const; // node matches 'Nop'
virtual bool is_ideal_control() const; // control node
+ virtual bool is_vector() const; // vector instruction
virtual Form::CallType is_ideal_call() const; // matches ideal 'Call'
virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode'
@@ -1011,6 +1012,7 @@
bool is_ideal_goto() const; // node matches ideal 'Goto'
bool is_ideal_loopEnd() const; // node matches ideal 'LoopEnd'
bool is_ideal_bool() const; // node matches ideal 'Bool'
+ bool is_vector() const; // vector instruction
Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode'
// Should antidep checks be disabled for this rule
// See definition of MatchRule::skip_antidep_check
--- a/hotspot/src/share/vm/adlc/main.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/adlc/main.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -250,6 +250,7 @@
AD.addInclude(AD._HPP_file, "opto/node.hpp");
AD.addInclude(AD._HPP_file, "opto/regalloc.hpp");
AD.addInclude(AD._HPP_file, "opto/subnode.hpp");
+ AD.addInclude(AD._HPP_file, "opto/vectornode.hpp");
AD.addInclude(AD._CPP_CLONE_file, "precompiled.hpp");
AD.addInclude(AD._CPP_CLONE_file, "adfiles", get_basename(AD._HPP_file._name));
AD.addInclude(AD._CPP_EXPAND_file, "precompiled.hpp");
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -111,6 +111,10 @@
template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \
template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \
\
+ /* Java runtime version access */ \
+ template(sun_misc_Version, "sun/misc/Version") \
+ template(java_runtime_name_name, "java_runtime_name") \
+ \
/* class file format tags */ \
template(tag_source_file, "SourceFile") \
template(tag_inner_classes, "InnerClasses") \
--- a/hotspot/src/share/vm/code/vmreg.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/code/vmreg.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,7 @@
#include "code/vmreg.hpp"
// First VMReg value that could refer to a stack slot
-VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 1) & ~1);
+VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 7) & ~7);
// VMRegs are 4 bytes wide on all platforms
const int VMRegImpl::stack_slot_size = 4;
--- a/hotspot/src/share/vm/memory/universe.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/memory/universe.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -273,7 +273,7 @@
}
static klassOop typeArrayKlassObj(BasicType t) {
- assert((uint)t < T_VOID+1, "range check");
+ assert((uint)t < T_VOID+1, err_msg("range check for type: %s", type2name(t)));
assert(_typeArrayKlassObjs[t] != NULL, "domain check");
return _typeArrayKlassObjs[t];
}
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -81,6 +81,13 @@
product(intx, MaxLoopPad, (OptoLoopAlignment-1), \
"Align a loop if padding size in bytes is less or equal to this value") \
\
+ product(intx, MaxVectorSize, 32, \
+ "Max vector size in bytes, " \
+ "actual size could be less depending on elements type") \
+ \
+ product(bool, AlignVector, false, \
+ "Perform vector store/load alignment in loop") \
+ \
product(intx, NumberOfLoopInstrToAlign, 4, \
"Number of first instructions in a loop to align") \
\
@@ -292,9 +299,12 @@
develop(bool, SuperWordRTDepCheck, false, \
"Enable runtime dependency checks.") \
\
- product(bool, TraceSuperWord, false, \
+ notproduct(bool, TraceSuperWord, false, \
"Trace superword transforms") \
\
+ notproduct(bool, TraceNewVectors, false, \
+ "Trace creation of Vector nodes") \
+ \
product_pd(bool, OptoBundling, \
"Generate nops to fill i-cache lines") \
\
--- a/hotspot/src/share/vm/opto/callGenerator.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -172,9 +172,11 @@
JVMState* DynamicCallGenerator::generate(JVMState* jvms) {
GraphKit kit(jvms);
+ Compile* C = kit.C;
+ PhaseGVN& gvn = kit.gvn();
- if (kit.C->log() != NULL) {
- kit.C->log()->elem("dynamic_call bci='%d'", jvms->bci());
+ if (C->log() != NULL) {
+ C->log()->elem("dynamic_call bci='%d'", jvms->bci());
}
// Get the constant pool cache from the caller class.
@@ -190,18 +192,21 @@
size_t call_site_offset = cpcache->get_f1_offset(index);
// Load the CallSite object from the constant pool cache.
- const TypeOopPtr* cpcache_ptr = TypeOopPtr::make_from_constant(cpcache);
- Node* cpcache_adr = kit.makecon(cpcache_ptr);
- Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, cpcache_adr, call_site_offset);
- Node* call_site = kit.make_load(kit.control(), call_site_adr, TypeInstPtr::BOTTOM, T_OBJECT, Compile::AliasIdxRaw);
+ const TypeOopPtr* cpcache_type = TypeOopPtr::make_from_constant(cpcache); // returns TypeAryPtr of type T_OBJECT
+ const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
+ Node* cpcache_adr = kit.makecon(cpcache_type);
+ Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
+ // The oops in the constant pool cache are not compressed; load then as raw pointers.
+ Node* call_site = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
// Load the target MethodHandle from the CallSite object.
- Node* target_mh_adr = kit.basic_plus_adr(call_site, call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
- Node* target_mh = kit.make_load(kit.control(), target_mh_adr, TypeInstPtr::BOTTOM, T_OBJECT);
+ const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
+ Node* target_mh_adr = kit.basic_plus_adr(call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
+ Node* target_mh = kit.make_load(kit.control(), target_mh_adr, target_type, T_OBJECT);
address resolve_stub = SharedRuntime::get_resolve_opt_virtual_call_stub();
- CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci());
+ CallStaticJavaNode* call = new (C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci());
// invokedynamic is treated as an optimized invokevirtual.
call->set_optimized_virtual(true);
// Take extra care (in the presence of argument motion) not to trash the SP:
@@ -785,9 +790,10 @@
JVMState* PredictedDynamicCallGenerator::generate(JVMState* jvms) {
GraphKit kit(jvms);
+ Compile* C = kit.C;
PhaseGVN& gvn = kit.gvn();
- CompileLog* log = kit.C->log();
+ CompileLog* log = C->log();
if (log != NULL) {
log->elem("predicted_dynamic_call bci='%d'", jvms->bci());
}
@@ -803,8 +809,8 @@
Node* receiver = kit.argument(0);
// Check if the MethodHandle is the expected one
- Node* cmp = gvn.transform(new(kit.C, 3) CmpPNode(receiver, predicted_mh));
- bol = gvn.transform(new(kit.C, 2) BoolNode(cmp, BoolTest::eq) );
+ Node* cmp = gvn.transform(new (C, 3) CmpPNode(receiver, predicted_mh));
+ bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) );
} else {
// Get the constant pool cache from the caller class.
ciMethod* caller_method = jvms->method();
@@ -818,22 +824,25 @@
size_t call_site_offset = cpcache->get_f1_offset(index);
// Load the CallSite object from the constant pool cache.
- const TypeOopPtr* cpcache_ptr = TypeOopPtr::make_from_constant(cpcache);
- Node* cpcache_adr = kit.makecon(cpcache_ptr);
- Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, cpcache_adr, call_site_offset);
- Node* call_site = kit.make_load(kit.control(), call_site_adr, TypeInstPtr::BOTTOM, T_OBJECT, Compile::AliasIdxRaw);
+ const TypeOopPtr* cpcache_type = TypeOopPtr::make_from_constant(cpcache); // returns TypeAryPtr of type T_OBJECT
+ const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
+ Node* cpcache_adr = kit.makecon(cpcache_type);
+ Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
+ // The oops in the constant pool cache are not compressed; load then as raw pointers.
+ Node* call_site = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
// Load the target MethodHandle from the CallSite object.
+ const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
Node* target_adr = kit.basic_plus_adr(call_site, call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
- Node* target_mh = kit.make_load(kit.control(), target_adr, TypeInstPtr::BOTTOM, T_OBJECT);
+ Node* target_mh = kit.make_load(kit.control(), target_adr, target_type, T_OBJECT);
// Check if the MethodHandle is still the same.
- Node* cmp = gvn.transform(new(kit.C, 3) CmpPNode(target_mh, predicted_mh));
- bol = gvn.transform(new(kit.C, 2) BoolNode(cmp, BoolTest::eq) );
+ Node* cmp = gvn.transform(new (C, 3) CmpPNode(target_mh, predicted_mh));
+ bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) );
}
IfNode* iff = kit.create_and_xform_if(kit.control(), bol, _hit_prob, COUNT_UNKNOWN);
- kit.set_control( gvn.transform(new(kit.C, 1) IfTrueNode (iff)));
- Node* slow_ctl = gvn.transform(new(kit.C, 1) IfFalseNode(iff));
+ kit.set_control( gvn.transform(new (C, 1) IfTrueNode (iff)));
+ Node* slow_ctl = gvn.transform(new (C, 1) IfFalseNode(iff));
SafePointNode* slow_map = NULL;
JVMState* slow_jvms;
@@ -882,7 +891,7 @@
// Finish the diamond.
kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
- RegionNode* region = new (kit.C, 3) RegionNode(3);
+ RegionNode* region = new (C, 3) RegionNode(3);
region->init_req(1, kit.control());
region->init_req(2, slow_map->control());
kit.set_control(gvn.transform(region));
--- a/hotspot/src/share/vm/opto/chaitin.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/chaitin.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -75,6 +75,7 @@
// Flags
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
+ if( _is_vector ) tty->print("Vector ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
if( _direct_conflict ) tty->print("Direct_conflict ");
@@ -479,16 +480,18 @@
// Move important info out of the live_arena to longer lasting storage.
alloc_node_regs(_names.Size());
- for( uint i=0; i < _names.Size(); i++ ) {
- if( _names[i] ) { // Live range associated with Node?
- LRG &lrg = lrgs( _names[i] );
- if( lrg.num_regs() == 1 ) {
- _node_regs[i].set1( lrg.reg() );
+ for (uint i=0; i < _names.Size(); i++) {
+ if (_names[i]) { // Live range associated with Node?
+ LRG &lrg = lrgs(_names[i]);
+ if (!lrg.alive()) {
+ _node_regs[i].set_bad();
+ } else if (lrg.num_regs() == 1) {
+ _node_regs[i].set1(lrg.reg());
} else { // Must be a register-pair
- if( !lrg._fat_proj ) { // Must be aligned adjacent register pair
+ if (!lrg._fat_proj) { // Must be aligned adjacent register pair
// Live ranges record the highest register in their mask.
// We want the low register for the AD file writer's convenience.
- _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) );
+ _node_regs[i].set2( OptoReg::add(lrg.reg(),(1-lrg.num_regs())) );
} else { // Misaligned; extract 2 bits
OptoReg::Name hi = lrg.reg(); // Get hi register
lrg.Remove(hi); // Yank from mask
@@ -568,7 +571,7 @@
// Check for float-vs-int live range (used in register-pressure
// calculations)
const Type *n_type = n->bottom_type();
- if( n_type->is_floatingpoint() )
+ if (n_type->is_floatingpoint())
lrg._is_float = 1;
// Check for twice prior spilling. Once prior spilling might have
@@ -599,18 +602,28 @@
// Limit result register mask to acceptable registers
const RegMask &rm = n->out_RegMask();
lrg.AND( rm );
- // Check for bound register masks
- const RegMask &lrgmask = lrg.mask();
- if( lrgmask.is_bound1() || lrgmask.is_bound2() )
- lrg._is_bound = 1;
-
- // Check for maximum frequency value
- if( lrg._maxfreq < b->_freq )
- lrg._maxfreq = b->_freq;
int ireg = n->ideal_reg();
assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
"oops must be in Op_RegP's" );
+
+ // Check for vector live range (only if vector register is used).
+ // On SPARC vector uses RegD which could be misaligned so it is not
+ // processes as vector in RA.
+ if (RegMask::is_vector(ireg))
+ lrg._is_vector = 1;
+ assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD,
+ "vector must be in vector registers");
+
+ // Check for bound register masks
+ const RegMask &lrgmask = lrg.mask();
+ if (lrgmask.is_bound(ireg))
+ lrg._is_bound = 1;
+
+ // Check for maximum frequency value
+ if (lrg._maxfreq < b->_freq)
+ lrg._maxfreq = b->_freq;
+
// Check for oop-iness, or long/double
// Check for multi-kill projection
switch( ireg ) {
@@ -689,7 +702,7 @@
// AND changes how we count interferences. A mis-aligned
// double can interfere with TWO aligned pairs, or effectively
// FOUR registers!
- if( rm.is_misaligned_Pair() ) {
+ if (rm.is_misaligned_pair()) {
lrg._fat_proj = 1;
lrg._is_bound = 1;
}
@@ -706,6 +719,33 @@
lrg.set_reg_pressure(1);
#endif
break;
+ case Op_VecS:
+ assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
+ assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
+ lrg.set_num_regs(RegMask::SlotsPerVecS);
+ lrg.set_reg_pressure(1);
+ break;
+ case Op_VecD:
+ assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");
+ assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecD);
+ lrg.set_reg_pressure(1);
+ break;
+ case Op_VecX:
+ assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");
+ assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecX);
+ lrg.set_reg_pressure(1);
+ break;
+ case Op_VecY:
+ assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");
+ assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");
+ lrg.set_num_regs(RegMask::SlotsPerVecY);
+ lrg.set_reg_pressure(1);
+ break;
default:
ShouldNotReachHere();
}
@@ -763,24 +803,38 @@
} else {
lrg.AND( rm );
}
+
// Check for bound register masks
const RegMask &lrgmask = lrg.mask();
- if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+ int kreg = n->in(k)->ideal_reg();
+ bool is_vect = RegMask::is_vector(kreg);
+ assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
+ is_vect || kreg == Op_RegD,
+ "vector must be in vector registers");
+ if (lrgmask.is_bound(kreg))
lrg._is_bound = 1;
+
// If this use of a double forces a mis-aligned double,
// flag as '_fat_proj' - really flag as allowing misalignment
// AND changes how we count interferences. A mis-aligned
// double can interfere with TWO aligned pairs, or effectively
// FOUR registers!
- if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) {
+#ifdef ASSERT
+ if (is_vect) {
+ assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
+ assert(!lrg._fat_proj, "sanity");
+ assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
+ }
+#endif
+ if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
lrg._fat_proj = 1;
lrg._is_bound = 1;
}
// if the LRG is an unaligned pair, we will have to spill
// so clear the LRG's register mask if it is not already spilled
- if ( !n->is_SpillCopy() &&
- (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
- lrgmask.is_misaligned_Pair()) {
+ if (!is_vect && !n->is_SpillCopy() &&
+ (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
+ lrgmask.is_misaligned_pair()) {
lrg.Clear();
}
@@ -793,12 +847,14 @@
} // end for all blocks
// Final per-liverange setup
- for( uint i2=0; i2<_maxlrg; i2++ ) {
+ for (uint i2=0; i2<_maxlrg; i2++) {
LRG &lrg = lrgs(i2);
- if( lrg.num_regs() == 2 && !lrg._fat_proj )
- lrg.ClearToPairs();
+ assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+ if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+ lrg.clear_to_sets();
+ }
lrg.compute_set_mask_size();
- if( lrg.not_free() ) { // Handle case where we lose from the start
+ if (lrg.not_free()) { // Handle case where we lose from the start
lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
lrg._direct_conflict = 1;
}
@@ -1104,22 +1160,17 @@
// Choose a color which is legal for him
RegMask tempmask = lrg.mask();
tempmask.AND(lrgs(copy_lrg).mask());
- OptoReg::Name reg;
- if( lrg.num_regs() == 1 ) {
- reg = tempmask.find_first_elem();
- } else {
- tempmask.ClearToPairs();
- reg = tempmask.find_first_pair();
- }
- if( OptoReg::is_valid(reg) )
+ tempmask.clear_to_sets(lrg.num_regs());
+ OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
+ if (OptoReg::is_valid(reg))
return reg;
}
}
// If no bias info exists, just go with the register selection ordering
- if( lrg.num_regs() == 2 ) {
- // Find an aligned pair
- return OptoReg::add(lrg.mask().find_first_pair(),chunk);
+ if (lrg._is_vector || lrg.num_regs() == 2) {
+ // Find an aligned set
+ return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
}
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
@@ -1149,6 +1200,7 @@
// Use a heuristic to "bias" the color choice
return bias_color(lrg, chunk);
+ assert(!lrg._is_vector, "should be not vector here" );
assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
// Fat-proj case or misaligned double argument.
@@ -1238,14 +1290,16 @@
}
//assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
// Aligned pairs need aligned masks
- if( lrg->num_regs() == 2 && !lrg->_fat_proj )
- lrg->ClearToPairs();
+ assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+ if (lrg->num_regs() > 1 && !lrg->_fat_proj) {
+ lrg->clear_to_sets();
+ }
// Check if a color is available and if so pick the color
OptoReg::Name reg = choose_color( *lrg, chunk );
#ifdef SPARC
debug_only(lrg->compute_set_mask_size());
- assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
+ assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
#endif
//---------------
@@ -1277,17 +1331,16 @@
// If the live range is not bound, then we actually had some choices
// to make. In this case, the mask has more bits in it than the colors
// chosen. Restrict the mask to just what was picked.
- if( lrg->num_regs() == 1 ) { // Size 1 live range
+ int n_regs = lrg->num_regs();
+ assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
+ if (n_regs == 1 || !lrg->_fat_proj) {
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");
lrg->Clear(); // Clear the mask
lrg->Insert(reg); // Set regmask to match selected reg
- lrg->set_mask_size(1);
- } else if( !lrg->_fat_proj ) {
- // For pairs, also insert the low bit of the pair
- assert( lrg->num_regs() == 2, "unbound fatproj???" );
- lrg->Clear(); // Clear the mask
- lrg->Insert(reg); // Set regmask to match selected reg
- lrg->Insert(OptoReg::add(reg,-1));
- lrg->set_mask_size(2);
+ // For vectors and pairs, also insert the low bit of the pair
+ for (int i = 1; i < n_regs; i++)
+ lrg->Insert(OptoReg::add(reg,-i));
+ lrg->set_mask_size(n_regs);
} else { // Else fatproj
// mask must be equal to fatproj bits, by definition
}
@@ -1483,7 +1536,7 @@
// Check for AddP-related opcodes
if( !derived->is_Phi() ) {
- assert( derived->as_Mach()->ideal_Opcode() == Op_AddP, "" );
+ assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg("but is: %s", derived->Name()));
Node *base = derived->in(AddPNode::Base);
derived_base_map[derived->_idx] = base;
return base;
@@ -1860,12 +1913,20 @@
sprintf(buf,"L%d",lidx); // No register binding yet
} else if( !lidx ) { // Special, not allocated value
strcpy(buf,"Special");
- } else if( (lrgs(lidx).num_regs() == 1)
- ? !lrgs(lidx).mask().is_bound1()
- : !lrgs(lidx).mask().is_bound2() ) {
- sprintf(buf,"L%d",lidx); // No register binding yet
- } else { // Hah! We have a bound machine register
- print_reg( lrgs(lidx).reg(), this, buf );
+ } else {
+ if (lrgs(lidx)._is_vector) {
+ if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))
+ print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register
+ else
+ sprintf(buf,"L%d",lidx); // No register binding yet
+ } else if( (lrgs(lidx).num_regs() == 1)
+ ? lrgs(lidx).mask().is_bound1()
+ : lrgs(lidx).mask().is_bound_pair() ) {
+ // Hah! We have a bound machine register
+ print_reg( lrgs(lidx).reg(), this, buf );
+ } else {
+ sprintf(buf,"L%d",lidx); // No register binding yet
+ }
}
}
return buf+strlen(buf);
--- a/hotspot/src/share/vm/opto/chaitin.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/chaitin.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -99,8 +99,15 @@
void set_mask_size( int size ) {
assert((size == 65535) || (size == (int)_mask.Size()), "");
_mask_size = size;
- debug_only(_msize_valid=1;)
- debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); )
+#ifdef ASSERT
+ _msize_valid=1;
+ if (_is_vector) {
+ assert(!_fat_proj, "sanity");
+ _mask.verify_sets(_num_regs);
+ } else if (_num_regs == 2 && !_fat_proj) {
+ _mask.verify_pairs();
+ }
+#endif
}
void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
int mask_size() const { assert( _msize_valid, "mask size not valid" );
@@ -116,7 +123,8 @@
void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
void Insert( OptoReg::Name reg ) { _mask.Insert(reg); debug_only(_msize_valid=0;) }
void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) }
- void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) }
+ void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
+ void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
// Number of registers this live range uses when it colors
private:
@@ -150,6 +158,7 @@
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
+ _is_vector:1, // True if in vector registers
_was_spilled1:1, // True if prior spilling on def
_was_spilled2:1, // True if twice prior spilling on def
_is_bound:1, // live range starts life with no
--- a/hotspot/src/share/vm/opto/classes.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/classes.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -245,14 +245,12 @@
macro(XorL)
macro(Vector)
macro(AddVB)
-macro(AddVC)
macro(AddVS)
macro(AddVI)
macro(AddVL)
macro(AddVF)
macro(AddVD)
macro(SubVB)
-macro(SubVC)
macro(SubVS)
macro(SubVI)
macro(SubVL)
@@ -263,74 +261,36 @@
macro(DivVF)
macro(DivVD)
macro(LShiftVB)
-macro(LShiftVC)
macro(LShiftVS)
macro(LShiftVI)
-macro(URShiftVB)
-macro(URShiftVC)
-macro(URShiftVS)
-macro(URShiftVI)
+macro(RShiftVB)
+macro(RShiftVS)
+macro(RShiftVI)
macro(AndV)
macro(OrV)
macro(XorV)
-macro(VectorLoad)
-macro(Load16B)
-macro(Load8B)
-macro(Load4B)
-macro(Load8C)
-macro(Load4C)
-macro(Load2C)
-macro(Load8S)
-macro(Load4S)
-macro(Load2S)
-macro(Load4I)
-macro(Load2I)
-macro(Load2L)
-macro(Load4F)
-macro(Load2F)
-macro(Load2D)
-macro(VectorStore)
-macro(Store16B)
-macro(Store8B)
-macro(Store4B)
-macro(Store8C)
-macro(Store4C)
-macro(Store2C)
-macro(Store4I)
-macro(Store2I)
-macro(Store2L)
-macro(Store4F)
-macro(Store2F)
-macro(Store2D)
+macro(LoadVector)
+macro(StoreVector)
macro(Pack)
macro(PackB)
macro(PackS)
-macro(PackC)
macro(PackI)
macro(PackL)
macro(PackF)
macro(PackD)
-macro(Pack2x1B)
-macro(Pack2x2B)
-macro(Replicate16B)
-macro(Replicate8B)
-macro(Replicate4B)
-macro(Replicate8S)
-macro(Replicate4S)
-macro(Replicate2S)
-macro(Replicate8C)
-macro(Replicate4C)
-macro(Replicate2C)
-macro(Replicate4I)
-macro(Replicate2I)
-macro(Replicate2L)
-macro(Replicate4F)
-macro(Replicate2F)
-macro(Replicate2D)
+macro(Pack2L)
+macro(Pack2D)
+macro(ReplicateB)
+macro(ReplicateS)
+macro(ReplicateI)
+macro(ReplicateL)
+macro(ReplicateF)
+macro(ReplicateD)
macro(Extract)
macro(ExtractB)
+macro(ExtractUB)
+macro(ExtractC)
macro(ExtractS)
-macro(ExtractC)
macro(ExtractI)
macro(ExtractL)
macro(ExtractF)
--- a/hotspot/src/share/vm/opto/compile.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/compile.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -2591,38 +2591,12 @@
}
break;
- case Op_Load16B:
- case Op_Load8B:
- case Op_Load4B:
- case Op_Load8S:
- case Op_Load4S:
- case Op_Load2S:
- case Op_Load8C:
- case Op_Load4C:
- case Op_Load2C:
- case Op_Load4I:
- case Op_Load2I:
- case Op_Load2L:
- case Op_Load4F:
- case Op_Load2F:
- case Op_Load2D:
- case Op_Store16B:
- case Op_Store8B:
- case Op_Store4B:
- case Op_Store8C:
- case Op_Store4C:
- case Op_Store2C:
- case Op_Store4I:
- case Op_Store2I:
- case Op_Store2L:
- case Op_Store4F:
- case Op_Store2F:
- case Op_Store2D:
+ case Op_LoadVector:
+ case Op_StoreVector:
break;
case Op_PackB:
case Op_PackS:
- case Op_PackC:
case Op_PackI:
case Op_PackF:
case Op_PackL:
--- a/hotspot/src/share/vm/opto/ifg.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/ifg.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -416,6 +416,7 @@
if( lrgs(lidx).mask().is_UP() &&
lrgs(lidx).mask_size() &&
!lrgs(lidx)._is_float &&
+ !lrgs(lidx)._is_vector &&
lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
cnt += lrgs(lidx).reg_pressure();
}
@@ -430,7 +431,7 @@
while ((lidx = elements.next()) != 0) {
if( lrgs(lidx).mask().is_UP() &&
lrgs(lidx).mask_size() &&
- lrgs(lidx)._is_float )
+ (lrgs(lidx)._is_float || lrgs(lidx)._is_vector))
cnt += lrgs(lidx).reg_pressure();
}
return cnt;
@@ -439,8 +440,8 @@
//------------------------------lower_pressure---------------------------------
// Adjust register pressure down by 1. Capture last hi-to-low transition,
static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
- if( lrg->mask().is_UP() && lrg->mask_size() ) {
- if( lrg->_is_float ) {
+ if (lrg->mask().is_UP() && lrg->mask_size()) {
+ if (lrg->_is_float || lrg->_is_vector) {
pressure[1] -= lrg->reg_pressure();
if( pressure[1] == (uint)FLOATPRESSURE ) {
hrp_index[1] = where;
@@ -522,8 +523,8 @@
LRG &lrg = lrgs(lidx);
lrg._area += cost;
// Compute initial register pressure
- if( lrg.mask().is_UP() && lrg.mask_size() ) {
- if( lrg._is_float ) { // Count float pressure
+ if (lrg.mask().is_UP() && lrg.mask_size()) {
+ if (lrg._is_float || lrg._is_vector) { // Count float pressure
pressure[1] += lrg.reg_pressure();
#ifdef EXACT_PRESSURE
if( pressure[1] > b->_freg_pressure )
@@ -681,13 +682,10 @@
// according to its bindings.
const RegMask &rmask = lrgs(r).mask();
if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
- // Smear odd bits; leave only aligned pairs of bits.
- RegMask r2mask = rmask;
- r2mask.SmearToPairs();
// Check for common case
int r_size = lrgs(r).num_regs();
OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
-
+ // Smear odd bits
IndexSetIterator elements(&liveout);
uint l;
while ((l = elements.next()) != 0) {
@@ -701,10 +699,15 @@
// Remove the bits from LRG 'r' from LRG 'l' so 'l' no
// longer interferes with 'r'. If 'l' requires aligned
// adjacent pairs, subtract out bit pairs.
- if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
+ assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
+ if (lrg.num_regs() > 1 && !lrg._fat_proj) {
+ RegMask r2mask = rmask;
+ // Leave only aligned set of bits.
+ r2mask.smear_to_sets(lrg.num_regs());
+ // It includes vector case.
lrg.SUBTRACT( r2mask );
lrg.compute_set_mask_size();
- } else if( r_size != 1 ) {
+ } else if( r_size != 1 ) { // fat proj
lrg.SUBTRACT( rmask );
lrg.compute_set_mask_size();
} else { // Common case: size 1 bound removal
@@ -763,8 +766,8 @@
// Newly live things assumed live from here to top of block
lrg._area += cost;
// Adjust register pressure
- if( lrg.mask().is_UP() && lrg.mask_size() ) {
- if( lrg._is_float ) {
+ if (lrg.mask().is_UP() && lrg.mask_size()) {
+ if (lrg._is_float || lrg._is_vector) {
pressure[1] += lrg.reg_pressure();
#ifdef EXACT_PRESSURE
if( pressure[1] > b->_freg_pressure )
--- a/hotspot/src/share/vm/opto/lcm.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/lcm.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -139,6 +139,7 @@
int iop = mach->ideal_Opcode();
switch( iop ) {
case Op_LoadB:
+ case Op_LoadUB:
case Op_LoadUS:
case Op_LoadD:
case Op_LoadF:
@@ -445,6 +446,11 @@
if( e->is_MachNullCheck() && e->in(1) == n )
continue;
+ // Schedule IV increment last.
+ if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
+ e->in(1)->in(1) == n && n->is_iteratively_computed())
+ continue;
+
uint n_choice = 2;
// See if this instruction is consumed by a branch. If so, then (as the
--- a/hotspot/src/share/vm/opto/library_call.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/library_call.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -3592,8 +3592,10 @@
}
// Bail out if length is negative.
- // ...Not needed, since the new_array will throw the right exception.
- //generate_negative_guard(length, bailout, &length);
+ // Without this the new_array would throw
+ // NegativeArraySizeException but IllegalArgumentException is what
+ // should be thrown
+ generate_negative_guard(length, bailout, &length);
if (bailout->req() > 1) {
PreserveJVMState pjvms(this);
@@ -3617,7 +3619,9 @@
// Extreme case: Arrays.copyOf((Integer[])x, 10, String[].class).
// This will fail a store-check if x contains any non-nulls.
bool disjoint_bases = true;
- bool length_never_negative = true;
+ // if start > orig_length then the length of the copy may be
+ // negative.
+ bool length_never_negative = !is_copyOfRange;
generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
original, start, newcopy, intcon(0), moved,
disjoint_bases, length_never_negative);
--- a/hotspot/src/share/vm/opto/loopnode.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/loopnode.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -2751,7 +2751,8 @@
// Do not count uncommon calls
if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
Node *iff = n->in(0)->in(0);
- if( !iff->is_If() ||
+ // No any calls for vectorized loops.
+ if( UseSuperWord || !iff->is_If() ||
(n->in(0)->Opcode() == Op_IfFalse &&
(1.0 - iff->as_If()->_prob) >= 0.01) ||
(iff->as_If()->_prob >= 0.01) )
@@ -3216,7 +3217,8 @@
case Op_ModF:
case Op_ModD:
case Op_LoadB: // Same with Loads; they can sink
- case Op_LoadUS: // during loop optimizations.
+ case Op_LoadUB: // during loop optimizations.
+ case Op_LoadUS:
case Op_LoadD:
case Op_LoadF:
case Op_LoadI:
--- a/hotspot/src/share/vm/opto/machnode.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/machnode.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -439,9 +439,9 @@
// Don't remateralize somebody with bound inputs - it stretches a
// fixed register lifetime.
uint idx = oper_input_base();
- if( req() > idx ) {
+ if (req() > idx) {
const RegMask &rm = in_RegMask(idx);
- if( rm.is_bound1() || rm.is_bound2() )
+ if (rm.is_bound(ideal_reg()))
return false;
}
--- a/hotspot/src/share/vm/opto/machnode.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/machnode.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -319,6 +319,7 @@
class MachTypeNode : public MachNode {
virtual uint size_of() const { return sizeof(*this); } // Size is bigger
public:
+ MachTypeNode( ) {}
const Type *_bottom_type;
virtual const class Type *bottom_type() const { return _bottom_type; }
@@ -370,12 +371,12 @@
//------------------------------MachConstantNode-------------------------------
// Machine node that holds a constant which is stored in the constant table.
-class MachConstantNode : public MachNode {
+class MachConstantNode : public MachTypeNode {
protected:
Compile::Constant _constant; // This node's constant.
public:
- MachConstantNode() : MachNode() {
+ MachConstantNode() : MachTypeNode() {
init_class_id(Class_MachConstant);
}
--- a/hotspot/src/share/vm/opto/matcher.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/matcher.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/type.hpp"
+#include "opto/vectornode.hpp"
#include "runtime/atomic.hpp"
#include "runtime/os.hpp"
#ifdef TARGET_ARCH_MODEL_x86_32
@@ -58,18 +59,6 @@
OptoReg::Name OptoReg::c_frame_pointer;
-
-
-const int Matcher::base2reg[Type::lastype] = {
- Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
- Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
- Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
- 0, 0/*abio*/,
- Op_RegP /* Return address */, 0, /* the memories */
- Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
- 0 /*bottom*/
-};
-
const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
RegMask Matcher::mreg2regmask[_last_Mach_Reg];
RegMask Matcher::STACK_ONLY_mask;
@@ -107,6 +96,10 @@
idealreg2spillmask [Op_RegF] = NULL;
idealreg2spillmask [Op_RegD] = NULL;
idealreg2spillmask [Op_RegP] = NULL;
+ idealreg2spillmask [Op_VecS] = NULL;
+ idealreg2spillmask [Op_VecD] = NULL;
+ idealreg2spillmask [Op_VecX] = NULL;
+ idealreg2spillmask [Op_VecY] = NULL;
idealreg2debugmask [Op_RegI] = NULL;
idealreg2debugmask [Op_RegN] = NULL;
@@ -114,6 +107,10 @@
idealreg2debugmask [Op_RegF] = NULL;
idealreg2debugmask [Op_RegD] = NULL;
idealreg2debugmask [Op_RegP] = NULL;
+ idealreg2debugmask [Op_VecS] = NULL;
+ idealreg2debugmask [Op_VecD] = NULL;
+ idealreg2debugmask [Op_VecX] = NULL;
+ idealreg2debugmask [Op_VecY] = NULL;
idealreg2mhdebugmask[Op_RegI] = NULL;
idealreg2mhdebugmask[Op_RegN] = NULL;
@@ -121,6 +118,10 @@
idealreg2mhdebugmask[Op_RegF] = NULL;
idealreg2mhdebugmask[Op_RegD] = NULL;
idealreg2mhdebugmask[Op_RegP] = NULL;
+ idealreg2mhdebugmask[Op_VecS] = NULL;
+ idealreg2mhdebugmask[Op_VecD] = NULL;
+ idealreg2mhdebugmask[Op_VecX] = NULL;
+ idealreg2mhdebugmask[Op_VecY] = NULL;
debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node
}
@@ -134,7 +135,7 @@
warped = OptoReg::add(warped, C->out_preserve_stack_slots());
if( warped >= _in_arg_limit )
_in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
- if (!RegMask::can_represent(warped)) {
+ if (!RegMask::can_represent_arg(warped)) {
// the compiler cannot represent this method's calling sequence
C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
return OptoReg::Bad;
@@ -302,7 +303,7 @@
_out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
assert( is_even(_out_arg_limit), "out_preserve must be even" );
- if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
+ if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
// the compiler cannot represent this method's calling sequence
C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
}
@@ -428,7 +429,7 @@
void Matcher::init_first_stack_mask() {
// Allocate storage for spill masks as masks for the appropriate load type.
- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6);
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));
idealreg2spillmask [Op_RegN] = &rms[0];
idealreg2spillmask [Op_RegI] = &rms[1];
@@ -451,6 +452,11 @@
idealreg2mhdebugmask[Op_RegD] = &rms[16];
idealreg2mhdebugmask[Op_RegP] = &rms[17];
+ idealreg2spillmask [Op_VecS] = &rms[18];
+ idealreg2spillmask [Op_VecD] = &rms[19];
+ idealreg2spillmask [Op_VecX] = &rms[20];
+ idealreg2spillmask [Op_VecY] = &rms[21];
+
OptoReg::Name i;
// At first, start with the empty mask
@@ -462,7 +468,7 @@
C->FIRST_STACK_mask().Insert(i);
// Add in all bits past the outgoing argument area
- guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
+ guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
"must be able to represent all call arguments in reg mask");
init = _out_arg_limit;
for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
@@ -472,21 +478,48 @@
C->FIRST_STACK_mask().set_AllStack();
// Make spill masks. Registers for their class, plus FIRST_STACK_mask.
+ RegMask aligned_stack_mask = C->FIRST_STACK_mask();
+ // Keep spill masks aligned.
+ aligned_stack_mask.clear_to_pairs();
+ assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+
+ *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
#ifdef _LP64
*idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
+ idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
+#else
+ idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
#endif
*idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
*idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
- idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
+ idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
*idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
*idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
- idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
- *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
- idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+ idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
+ if (Matcher::vector_size_supported(T_BYTE,4)) {
+ *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
+ idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,2)) {
+ *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
+ idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,4)) {
+ aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
+ assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+ *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
+ idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,8)) {
+ aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
+ assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
+ *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
+ idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
+ }
if (UseFPUForSpilling) {
// This mask logic assumes that the spill operations are
// symmetric and that the registers involved are the same size.
@@ -807,6 +840,25 @@
idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
+
+ // Vector regmasks.
+ if (Matcher::vector_size_supported(T_BYTE,4)) {
+ TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
+ MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
+ idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,2)) {
+ MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
+ idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,4)) {
+ MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
+ idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,8)) {
+ MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
+ idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
+ }
}
#ifdef ASSERT
@@ -1063,7 +1115,7 @@
// that is killed by the call.
if( warped >= out_arg_limit_per_call )
out_arg_limit_per_call = OptoReg::add(warped,1);
- if (!RegMask::can_represent(warped)) {
+ if (!RegMask::can_represent_arg(warped)) {
C->record_method_not_compilable_all_tiers("unsupported calling sequence");
return OptoReg::Bad;
}
@@ -1251,7 +1303,7 @@
// this killed area.
uint r_cnt = mcall->tf()->range()->cnt();
MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
- if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
+ if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
} else {
for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
--- a/hotspot/src/share/vm/opto/matcher.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/matcher.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -250,10 +250,21 @@
static const bool convL2FSupported(void);
// Vector width in bytes
- static const uint vector_width_in_bytes(void);
+ static const int vector_width_in_bytes(BasicType bt);
+
+ // Limits on vector size (number of elements).
+ static const int max_vector_size(const BasicType bt);
+ static const int min_vector_size(const BasicType bt);
+ static const bool vector_size_supported(const BasicType bt, int size) {
+ return (Matcher::max_vector_size(bt) >= size &&
+ Matcher::min_vector_size(bt) <= size);
+ }
// Vector ideal reg
- static const uint vector_ideal_reg(void);
+ static const int vector_ideal_reg(int len);
+
+ // CPU supports misaligned vectors store/load.
+ static const bool misaligned_vectors_ok();
// Used to determine a "low complexity" 64-bit constant. (Zero is simple.)
// The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
--- a/hotspot/src/share/vm/opto/memnode.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/memnode.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1543,6 +1543,7 @@
// had an original form like p1:(AddP x x (LShiftL quux 3)), where the
// expression (LShiftL quux 3) independently optimized to the constant 8.
if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
+ && (_type->isa_vect() == NULL)
&& Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) {
// t might actually be lower than _type, if _type is a unique
// concrete subclass of abstract class t.
--- a/hotspot/src/share/vm/opto/mulnode.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/mulnode.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,9 @@
class MulNode : public Node {
virtual uint hash() const;
public:
- MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {}
+ MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
+ init_class_id(Class_Mul);
+ }
// Handle algebraic identities here. If we have an identity, return the Node
// we are equivalent to. We look for "add of zero" as an identity.
--- a/hotspot/src/share/vm/opto/node.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/node.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -1576,6 +1576,9 @@
} else {
tty->print("no type");
}
+ } else if (t->isa_vect() && this->is_MachSpillCopy()) {
+ // Dump MachSpillcopy vector type.
+ t->dump();
}
if (is_new) {
debug_only(dump_orig(debug_orig()));
--- a/hotspot/src/share/vm/opto/node.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/node.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -100,6 +100,7 @@
class MemBarStoreStoreNode;
class MemNode;
class MergeMemNode;
+class MulNode;
class MultiNode;
class MultiBranchNode;
class NeverBranchNode;
@@ -133,8 +134,8 @@
class TypeNode;
class UnlockNode;
class VectorNode;
-class VectorLoadNode;
-class VectorStoreNode;
+class LoadVectorNode;
+class StoreVectorNode;
class VectorSet;
typedef void (*NFunc)(Node&,void*);
extern "C" {
@@ -609,9 +610,9 @@
DEFINE_CLASS_ID(Mem, Node, 4)
DEFINE_CLASS_ID(Load, Mem, 0)
- DEFINE_CLASS_ID(VectorLoad, Load, 0)
+ DEFINE_CLASS_ID(LoadVector, Load, 0)
DEFINE_CLASS_ID(Store, Mem, 1)
- DEFINE_CLASS_ID(VectorStore, Store, 0)
+ DEFINE_CLASS_ID(StoreVector, Store, 0)
DEFINE_CLASS_ID(LoadStore, Mem, 2)
DEFINE_CLASS_ID(Region, Node, 5)
@@ -629,8 +630,9 @@
DEFINE_CLASS_ID(AddP, Node, 9)
DEFINE_CLASS_ID(BoxLock, Node, 10)
DEFINE_CLASS_ID(Add, Node, 11)
- DEFINE_CLASS_ID(Vector, Node, 12)
- DEFINE_CLASS_ID(ClearArray, Node, 13)
+ DEFINE_CLASS_ID(Mul, Node, 12)
+ DEFINE_CLASS_ID(Vector, Node, 13)
+ DEFINE_CLASS_ID(ClearArray, Node, 14)
_max_classes = ClassMask_ClearArray
};
@@ -752,6 +754,7 @@
DEFINE_CLASS_QUERY(MemBar)
DEFINE_CLASS_QUERY(MemBarStoreStore)
DEFINE_CLASS_QUERY(MergeMem)
+ DEFINE_CLASS_QUERY(Mul)
DEFINE_CLASS_QUERY(Multi)
DEFINE_CLASS_QUERY(MultiBranch)
DEFINE_CLASS_QUERY(Parm)
@@ -767,8 +770,8 @@
DEFINE_CLASS_QUERY(Sub)
DEFINE_CLASS_QUERY(Type)
DEFINE_CLASS_QUERY(Vector)
- DEFINE_CLASS_QUERY(VectorLoad)
- DEFINE_CLASS_QUERY(VectorStore)
+ DEFINE_CLASS_QUERY(LoadVector)
+ DEFINE_CLASS_QUERY(StoreVector)
DEFINE_CLASS_QUERY(Unlock)
#undef DEFINE_CLASS_QUERY
--- a/hotspot/src/share/vm/opto/opcodes.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/opcodes.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,10 @@
"RegD",
"RegL",
"RegFlags",
+ "VecS",
+ "VecD",
+ "VecX",
+ "VecY",
"_last_machine_leaf",
#include "classes.hpp"
"_last_class_name",
--- a/hotspot/src/share/vm/opto/opcodes.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/opcodes.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,10 @@
macro(RegF) // Machine float register
macro(RegD) // Machine double register
macro(RegL) // Machine long register
+ macro(VecS) // Machine vectors register
+ macro(VecD) // Machine vectord register
+ macro(VecX) // Machine vectorx register
+ macro(VecY) // Machine vectory register
macro(RegFlags) // Machine flags register
_last_machine_leaf, // Split between regular opcodes and machine
#include "classes.hpp"
--- a/hotspot/src/share/vm/opto/postaloc.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/postaloc.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,13 +27,15 @@
#include "opto/chaitin.hpp"
#include "opto/machnode.hpp"
-// see if this register kind does not requires two registers
-static bool is_single_register(uint x) {
-#ifdef _LP64
- return (x != Op_RegD && x != Op_RegL && x != Op_RegP);
-#else
- return (x != Op_RegD && x != Op_RegL);
-#endif
+// See if this register (or pairs, or vector) already contains the value.
+static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
+ Node_List& value) {
+ for (int i = 0; i < n_regs; i++) {
+ OptoReg::Name nreg = OptoReg::add(reg,-i);
+ if (value[nreg] != val)
+ return false;
+ }
+ return true;
}
//---------------------------may_be_copy_of_callee-----------------------------
@@ -167,9 +169,11 @@
const RegMask &use_mask = n->in_RegMask(idx);
bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
: (use_mask.is_AllStack() != 0));
- // Check for a copy to or from a misaligned pair.
- can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair();
-
+ if (!RegMask::is_vector(def->ideal_reg())) {
+ // Check for a copy to or from a misaligned pair.
+ // It is workaround for a sparc with misaligned pairs.
+ can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair();
+ }
if (!can_use)
return 0;
@@ -263,18 +267,16 @@
val = skip_copies(n->in(k));
}
- if( val == x ) return blk_adjust; // No progress?
+ if (val == x) return blk_adjust; // No progress?
- bool single = is_single_register(val->ideal_reg());
+ int n_regs = RegMask::num_registers(val->ideal_reg());
uint val_idx = n2lidx(val);
OptoReg::Name val_reg = lrgs(val_idx).reg();
// See if it happens to already be in the correct register!
// (either Phi's direct register, or the common case of the name
// never-clobbered original-def register)
- if( value[val_reg] == val &&
- // Doubles check both halves
- ( single || value[val_reg-1] == val ) ) {
+ if (register_contains_value(val, val_reg, n_regs, value)) {
blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
if( n->in(k) == regnd[val_reg] ) // Success! Quit trying
return blk_adjust;
@@ -306,9 +308,10 @@
}
Node *vv = value[reg];
- if( !single ) { // Doubles check for aligned-adjacent pair
- if( (reg&1)==0 ) continue; // Wrong half of a pair
- if( vv != value[reg-1] ) continue; // Not a complete pair
+ if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
+ uint last = (n_regs-1); // Looking for the last part of a set
+ if ((reg&last) != last) continue; // Wrong part of a set
+ if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
}
if( vv == val || // Got a direct hit?
(t && vv && vv->bottom_type() == t && vv->is_Mach() &&
@@ -526,8 +529,9 @@
if( pidx ) {
value.map(preg,phi);
regnd.map(preg,phi);
- OptoReg::Name preg_lo = OptoReg::add(preg,-1);
- if( !is_single_register(phi->ideal_reg()) ) {
+ int n_regs = RegMask::num_registers(phi->ideal_reg());
+ for (int l = 1; l < n_regs; l++) {
+ OptoReg::Name preg_lo = OptoReg::add(preg,-l);
value.map(preg_lo,phi);
regnd.map(preg_lo,phi);
}
@@ -568,13 +572,16 @@
value.map(ureg,valdef); // record improved reaching-def info
regnd.map(ureg, def);
// Record other half of doubles
- OptoReg::Name ureg_lo = OptoReg::add(ureg,-1);
- if( !is_single_register(def->ideal_reg()) &&
- ( !RegMask::can_represent(ureg_lo) ||
- lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent
- !value[ureg_lo] ) {
- value.map(ureg_lo,valdef); // record improved reaching-def info
- regnd.map(ureg_lo, def);
+ uint def_ideal_reg = def->ideal_reg();
+ int n_regs = RegMask::num_registers(def_ideal_reg);
+ for (int l = 1; l < n_regs; l++) {
+ OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
+ if (!value[ureg_lo] &&
+ (!RegMask::can_represent(ureg_lo) ||
+ lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent
+ value.map(ureg_lo,valdef); // record improved reaching-def info
+ regnd.map(ureg_lo, def);
+ }
}
}
}
@@ -607,7 +614,8 @@
}
uint n_ideal_reg = n->ideal_reg();
- if( is_single_register(n_ideal_reg) ) {
+ int n_regs = RegMask::num_registers(n_ideal_reg);
+ if (n_regs == 1) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node
// mapping so 'n' will go dead.
@@ -625,6 +633,25 @@
assert( n->is_Copy(), "" );
j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
}
+ } else if (RegMask::is_vector(n_ideal_reg)) {
+ // If Node 'n' does not change the value mapped by the register,
+ // then 'n' is a useless copy. Do not update the register->node
+ // mapping so 'n' will go dead.
+ if (!register_contains_value(val, nreg, n_regs, value)) {
+ // Update the mapping: record new Node defined by the register
+ regnd.map(nreg,n);
+ // Update mapping for defined *value*, which is the defined
+ // Node after skipping all copies.
+ value.map(nreg,val);
+ for (int l = 1; l < n_regs; l++) {
+ OptoReg::Name nreg_lo = OptoReg::add(nreg,-l);
+ regnd.map(nreg_lo, n );
+ value.map(nreg_lo,val);
+ }
+ } else if (n->is_Copy()) {
+ // Note: vector can't be constant and can't be copy of calee.
+ j -= replace_and_yank_if_dead(n, nreg, b, value, regnd);
+ }
} else {
// If the value occupies a register pair, record same info
// in both registers.
--- a/hotspot/src/share/vm/opto/reg_split.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/reg_split.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -74,12 +74,13 @@
const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
const RegMask *w_o_mask;
+ int num_regs = RegMask::num_registers(ireg);
+ bool is_vect = RegMask::is_vector(ireg);
if( w_mask->overlap( *o_mask ) && // Overlap AND
- ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned
-#ifdef _LP64
- && ireg != Op_RegP
-#endif
- ) || o_mask->is_aligned_Pairs()) ) {
+ ((num_regs == 1) // Single use or aligned
+ || is_vect // or vector
+ || !is_vect && o_mask->is_aligned_pairs()) ) {
+ assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned");
// Don't come here for mis-aligned doubles
w_o_mask = w_mask;
} else { // wide ideal mask does not overlap with o_mask
@@ -400,15 +401,17 @@
// CNC - Turned off 7/8/99, causes too much spilling
// if( lrg->_is_bound ) return false;
+ // Use float pressure numbers for vectors.
+ bool is_float_or_vector = lrg->_is_float || lrg->_is_vector;
// Not yet reached the high-pressure cutoff point, so low pressure
- uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index;
+ uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index;
if( insidx < hrp_idx ) return false;
// Register pressure for the block as a whole depends on reg class
- int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure;
+ int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure;
// Bound live ranges will split at the binding points first;
// Intermediate splits should assume the live range's register set
// got "freed up" and that num_regs will become INT_PRESSURE.
- int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE;
+ int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE;
// Effective register pressure limit.
int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
@@ -794,12 +797,15 @@
if( i < n->req() ) break;
insert_point--;
}
+ uint orig_eidx = b->end_idx();
maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
- insidx++;
+ // Spill of NULL check mem op goes into the following block.
+ if (b->end_idx() > orig_eidx)
+ insidx++;
}
// This is a new DEF, so update UP
UPblock[slidx] = false;
@@ -960,7 +966,7 @@
// Grab register mask info
const RegMask &dmask = def->out_RegMask();
const RegMask &umask = n->in_RegMask(inpidx);
-
+ bool is_vect = RegMask::is_vector(def->ideal_reg());
assert(inpidx < oopoff, "cannot use-split oop map info");
bool dup = UPblock[slidx];
@@ -972,7 +978,7 @@
if( !umask.is_AllStack() &&
(int)umask.Size() <= lrgs(useidx).num_regs() &&
(!def->rematerialize() ||
- umask.is_misaligned_Pair())) {
+ !is_vect && umask.is_misaligned_pair())) {
// These need a Split regardless of overlap or pressure
// SPLIT - NO DEF - NO CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
@@ -1123,10 +1129,12 @@
// Grab UP info for DEF
const RegMask &dmask = n->out_RegMask();
bool defup = dmask.is_UP();
+ int ireg = n->ideal_reg();
+ bool is_vect = RegMask::is_vector(ireg);
// Only split at Def if this is a HRP block or bound (and spilled once)
if( !n->rematerialize() &&
- (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) &&
- (deflrg._direct_conflict || deflrg._must_spill)) ||
+ (((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) &&
+ (deflrg._direct_conflict || deflrg._must_spill)) ||
// Check for LRG being up in a register and we are inside a high
// pressure area. Spill it down immediately.
(defup && is_high_pressure(b,&deflrg,insidx))) ) {
--- a/hotspot/src/share/vm/opto/regmask.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/regmask.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -129,11 +129,34 @@
0
);
+//=============================================================================
+bool RegMask::is_vector(uint ireg) {
+ return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY);
+}
+
+int RegMask::num_registers(uint ireg) {
+ switch(ireg) {
+ case Op_VecY:
+ return 8;
+ case Op_VecX:
+ return 4;
+ case Op_VecD:
+ case Op_RegD:
+ case Op_RegL:
+#ifdef _LP64
+ case Op_RegP:
+#endif
+ return 2;
+ }
+ // Op_VecS and the rest ideal registers.
+ return 1;
+}
+
//------------------------------find_first_pair--------------------------------
// Find the lowest-numbered register pair in the mask. Return the
// HIGHEST register number in the pair, or BAD if no pairs.
OptoReg::Name RegMask::find_first_pair() const {
- VerifyPairs();
+ verify_pairs();
for( int i = 0; i < RM_SIZE; i++ ) {
if( _A[i] ) { // Found some bits
int bit = _A[i] & -_A[i]; // Extract low bit
@@ -146,30 +169,30 @@
//------------------------------ClearToPairs-----------------------------------
// Clear out partial bits; leave only bit pairs
-void RegMask::ClearToPairs() {
+void RegMask::clear_to_pairs() {
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
bits |= (bits>>1); // Smear 1 hi-bit into a pair
_A[i] = bits;
}
- VerifyPairs();
+ verify_pairs();
}
//------------------------------SmearToPairs-----------------------------------
// Smear out partial bits; leave only bit pairs
-void RegMask::SmearToPairs() {
+void RegMask::smear_to_pairs() {
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
_A[i] = bits;
}
- VerifyPairs();
+ verify_pairs();
}
//------------------------------is_aligned_pairs-------------------------------
-bool RegMask::is_aligned_Pairs() const {
+bool RegMask::is_aligned_pairs() const {
// Assert that the register mask contains only bit pairs.
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
@@ -204,7 +227,7 @@
//------------------------------is_bound2--------------------------------------
// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
-int RegMask::is_bound2() const {
+int RegMask::is_bound_pair() const {
if( is_AllStack() ) return false;
int bit = -1; // Set to hold the one bit allowed
@@ -226,6 +249,132 @@
return true;
}
+static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
+//------------------------------find_first_set---------------------------------
+// Find the lowest-numbered register set in the mask. Return the
+// HIGHEST register number in the set, or BAD if no sets.
+// Works also for size 1.
+OptoReg::Name RegMask::find_first_set(int size) const {
+ verify_sets(size);
+ for (int i = 0; i < RM_SIZE; i++) {
+ if (_A[i]) { // Found some bits
+ int bit = _A[i] & -_A[i]; // Extract low bit
+ // Convert to bit number, return hi bit in pair
+ return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
+ }
+ }
+ return OptoReg::Bad;
+}
+
+//------------------------------clear_to_sets----------------------------------
+// Clear out partial bits; leave only aligned adjacent bit pairs
+void RegMask::clear_to_sets(int size) {
+ if (size == 1) return;
+ assert(2 <= size && size <= 8, "update low bits table");
+ assert(is_power_of_2(size), "sanity");
+ int low_bits_mask = low_bits[size>>2];
+ for (int i = 0; i < RM_SIZE; i++) {
+ int bits = _A[i];
+ int sets = (bits & low_bits_mask);
+ for (int j = 1; j < size; j++) {
+ sets = (bits & (sets<<1)); // filter bits which produce whole sets
+ }
+ sets |= (sets>>1); // Smear 1 hi-bit into a set
+ if (size > 2) {
+ sets |= (sets>>2); // Smear 2 hi-bits into a set
+ if (size > 4) {
+ sets |= (sets>>4); // Smear 4 hi-bits into a set
+ }
+ }
+ _A[i] = sets;
+ }
+ verify_sets(size);
+}
+
+//------------------------------smear_to_sets----------------------------------
+// Smear out partial bits to aligned adjacent bit sets
+void RegMask::smear_to_sets(int size) {
+ if (size == 1) return;
+ assert(2 <= size && size <= 8, "update low bits table");
+ assert(is_power_of_2(size), "sanity");
+ int low_bits_mask = low_bits[size>>2];
+ for (int i = 0; i < RM_SIZE; i++) {
+ int bits = _A[i];
+ int sets = 0;
+ for (int j = 0; j < size; j++) {
+ sets |= (bits & low_bits_mask); // collect partial bits
+ bits = bits>>1;
+ }
+ sets |= (sets<<1); // Smear 1 lo-bit into a set
+ if (size > 2) {
+ sets |= (sets<<2); // Smear 2 lo-bits into a set
+ if (size > 4) {
+ sets |= (sets<<4); // Smear 4 lo-bits into a set
+ }
+ }
+ _A[i] = sets;
+ }
+ verify_sets(size);
+}
+
+//------------------------------is_aligned_set--------------------------------
+bool RegMask::is_aligned_sets(int size) const {
+ if (size == 1) return true;
+ assert(2 <= size && size <= 8, "update low bits table");
+ assert(is_power_of_2(size), "sanity");
+ int low_bits_mask = low_bits[size>>2];
+ // Assert that the register mask contains only bit sets.
+ for (int i = 0; i < RM_SIZE; i++) {
+ int bits = _A[i];
+ while (bits) { // Check bits for pairing
+ int bit = bits & -bits; // Extract low bit
+ // Low bit is not odd means its mis-aligned.
+ if ((bit & low_bits_mask) == 0) return false;
+ // Do extra work since (bit << size) may overflow.
+ int hi_bit = bit << (size-1); // high bit
+ int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+ // Check for aligned adjacent bits in this set
+ if ((bits & set) != set) return false;
+ bits -= set; // Remove this set
+ }
+ }
+ return true;
+}
+
+//------------------------------is_bound_set-----------------------------------
+// Return TRUE if the mask contains one adjacent set of bits and no other bits.
+// Works also for size 1.
+int RegMask::is_bound_set(int size) const {
+ if( is_AllStack() ) return false;
+ assert(1 <= size && size <= 8, "update low bits table");
+ int bit = -1; // Set to hold the one bit allowed
+ for (int i = 0; i < RM_SIZE; i++) {
+ if (_A[i] ) { // Found some bits
+ if (bit != -1)
+ return false; // Already had bits, so fail
+ bit = _A[i] & -_A[i]; // Extract 1 bit from mask
+ int hi_bit = bit << (size-1); // high bit
+ if (hi_bit != 0) { // Bit set stays in same word?
+ int set = hi_bit + ((hi_bit-1) & ~(bit-1));
+ if (set != _A[i])
+ return false; // Require adjacent bit set and no more bits
+ } else { // Else its a split-set case
+ if (((-1) & ~(bit-1)) != _A[i])
+ return false; // Found many bits, so fail
+ i++; // Skip iteration forward and check high part
+ assert(size <= 8, "update next code");
+ // The lower 24 bits should be 0 since it is split case and size <= 8.
+ int set = bit>>24;
+ set = set & -set; // Remove sign extension.
+ set = (((set << size) - 1) >> 8);
+ if (_A[i] != set) return false; // Require 1 lo bit in next word
+ }
+ }
+ }
+ // True for both the empty mask and for a bit set
+ return true;
+}
+
//------------------------------is_UP------------------------------------------
// UP means register only, Register plus stack, or stack only is DOWN
bool RegMask::is_UP() const {
--- a/hotspot/src/share/vm/opto/regmask.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/regmask.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -113,7 +113,11 @@
// the controlling alignment constraint. Note that this alignment
// requirement is internal to the allocator, and independent of any
// particular platform.
- enum { SlotsPerLong = 2 };
+ enum { SlotsPerLong = 2,
+ SlotsPerVecS = 1,
+ SlotsPerVecD = 2,
+ SlotsPerVecX = 4,
+ SlotsPerVecY = 8 };
// A constructor only used by the ADLC output. All mask fields are filled
// in directly. Calls to this look something like RM(1,2,3,4);
@@ -193,20 +197,53 @@
OptoReg::Name find_first_pair() const;
// Clear out partial bits; leave only aligned adjacent bit pairs.
- void ClearToPairs();
+ void clear_to_pairs();
// Smear out partial bits; leave only aligned adjacent bit pairs.
- void SmearToPairs();
+ void smear_to_pairs();
// Verify that the mask contains only aligned adjacent bit pairs
- void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); }
+ void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
// Test that the mask contains only aligned adjacent bit pairs
- bool is_aligned_Pairs() const;
+ bool is_aligned_pairs() const;
// mask is a pair of misaligned registers
- bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();}
+ bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
// Test for single register
int is_bound1() const;
// Test for a single adjacent pair
- int is_bound2() const;
+ int is_bound_pair() const;
+ // Test for a single adjacent set of ideal register's size.
+ int is_bound(uint ireg) const {
+ if (is_vector(ireg)) {
+ if (is_bound_set(num_registers(ireg)))
+ return true;
+ } else if (is_bound1() || is_bound_pair()) {
+ return true;
+ }
+ return false;
+ }
+
+ // Find the lowest-numbered register set in the mask. Return the
+ // HIGHEST register number in the set, or BAD if no sets.
+ // Assert that the mask contains only bit sets.
+ OptoReg::Name find_first_set(int size) const;
+
+ // Clear out partial bits; leave only aligned adjacent bit sets of size.
+ void clear_to_sets(int size);
+ // Smear out partial bits to aligned adjacent bit sets.
+ void smear_to_sets(int size);
+ // Verify that the mask contains only aligned adjacent bit sets
+ void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
+ // Test that the mask contains only aligned adjacent bit sets
+ bool is_aligned_sets(int size) const;
+
+ // mask is a set of misaligned registers
+ bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}
+
+ // Test for a single adjacent set
+ int is_bound_set(int size) const;
+
+ static bool is_vector(uint ireg);
+ static int num_registers(uint ireg);
// Fast overlap test. Non-zero if any registers in common.
int overlap( const RegMask &rm ) const {
@@ -280,9 +317,15 @@
static bool can_represent(OptoReg::Name reg) {
// NOTE: -1 in computation reflects the usage of the last
- // bit of the regmask as an infinite stack flag.
+ // bit of the regmask as an infinite stack flag and
+ // -7 is to keep mask aligned for largest value (VecY).
return (int)reg < (int)(CHUNK_SIZE-1);
}
+ static bool can_represent_arg(OptoReg::Name reg) {
+ // NOTE: -SlotsPerVecY in computation reflects the need
+ // to keep mask aligned for largest value (VecY).
+ return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY);
+ }
};
// Do not use this constant directly in client code!
--- a/hotspot/src/share/vm/opto/stringopts.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/stringopts.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -112,6 +112,7 @@
_arguments->ins_req(0, value);
_mode.insert_before(0, mode);
}
+
void push_string(Node* value) {
push(value, StringMode);
}
@@ -125,9 +126,56 @@
push(value, CharMode);
}
+ static bool is_SB_toString(Node* call) {
+ if (call->is_CallStaticJava()) {
+ CallStaticJavaNode* csj = call->as_CallStaticJava();
+ ciMethod* m = csj->method();
+ if (m != NULL &&
+ (m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString ||
+ m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static Node* skip_string_null_check(Node* value) {
+ // Look for a diamond shaped Null check of toString() result
+ // (could be code from String.valueOf()):
+ // (Proj == NULL) ? "null":"CastPP(Proj)#NotNULL
+ if (value->is_Phi()) {
+ int true_path = value->as_Phi()->is_diamond_phi();
+ if (true_path != 0) {
+ // phi->region->if_proj->ifnode->bool
+ BoolNode* b = value->in(0)->in(1)->in(0)->in(1)->as_Bool();
+ Node* cmp = b->in(1);
+ Node* v1 = cmp->in(1);
+ Node* v2 = cmp->in(2);
+ // Null check of the return of toString which can simply be skipped.
+ if (b->_test._test == BoolTest::ne &&
+ v2->bottom_type() == TypePtr::NULL_PTR &&
+ value->in(true_path)->Opcode() == Op_CastPP &&
+ value->in(true_path)->in(1) == v1 &&
+ v1->is_Proj() && is_SB_toString(v1->in(0))) {
+ return v1;
+ }
+ }
+ }
+ return value;
+ }
+
Node* argument(int i) {
return _arguments->in(i);
}
+ Node* argument_uncast(int i) {
+ Node* arg = argument(i);
+ int amode = mode(i);
+ if (amode == StringConcat::StringMode ||
+ amode == StringConcat::StringNullCheckMode) {
+ arg = skip_string_null_check(arg);
+ }
+ return arg;
+ }
void set_argument(int i, Node* value) {
_arguments->set_req(i, value);
}
@@ -206,9 +254,11 @@
void StringConcat::eliminate_unneeded_control() {
- eliminate_initialize(begin()->initialization());
for (uint i = 0; i < _control.size(); i++) {
Node* n = _control.at(i);
+ if (n->is_Allocate()) {
+ eliminate_initialize(n->as_Allocate()->initialization());
+ }
if (n->is_Call()) {
if (n != _end) {
eliminate_call(n->as_Call());
@@ -239,14 +289,15 @@
assert(result->_control.contains(other->_end), "what?");
assert(result->_control.contains(_begin), "what?");
for (int x = 0; x < num_arguments(); x++) {
- if (argument(x) == arg) {
+ Node* argx = argument_uncast(x);
+ if (argx == arg) {
// replace the toString result with the all the arguments that
// made up the other StringConcat
for (int y = 0; y < other->num_arguments(); y++) {
result->append(other->argument(y), other->mode(y));
}
} else {
- result->append(argument(x), mode(x));
+ result->append(argx, mode(x));
}
}
result->set_allocation(other->_begin);
@@ -327,14 +378,9 @@
while (worklist.size() > 0) {
Node* ctrl = worklist.pop();
- if (ctrl->is_CallStaticJava()) {
+ if (StringConcat::is_SB_toString(ctrl)) {
CallStaticJavaNode* csj = ctrl->as_CallStaticJava();
- ciMethod* m = csj->method();
- if (m != NULL &&
- (m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString ||
- m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString)) {
- string_calls.push(csj);
- }
+ string_calls.push(csj);
}
if (ctrl->in(0) != NULL && !_visited.test_set(ctrl->in(0)->_idx)) {
worklist.push(ctrl->in(0));
@@ -550,44 +596,40 @@
for (int c = 0; c < concats.length(); c++) {
StringConcat* sc = concats.at(c);
for (int i = 0; i < sc->num_arguments(); i++) {
- Node* arg = sc->argument(i);
- if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
+ Node* arg = sc->argument_uncast(i);
+ if (arg->is_Proj() && StringConcat::is_SB_toString(arg->in(0))) {
CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
- if (csj->method() != NULL &&
- (csj->method()->intrinsic_id() == vmIntrinsics::_StringBuilder_toString ||
- csj->method()->intrinsic_id() == vmIntrinsics::_StringBuffer_toString)) {
- for (int o = 0; o < concats.length(); o++) {
- if (c == o) continue;
- StringConcat* other = concats.at(o);
- if (other->end() == csj) {
+ for (int o = 0; o < concats.length(); o++) {
+ if (c == o) continue;
+ StringConcat* other = concats.at(o);
+ if (other->end() == csj) {
#ifndef PRODUCT
- if (PrintOptimizeStringConcat) {
- tty->print_cr("considering stacked concats");
- }
+ if (PrintOptimizeStringConcat) {
+ tty->print_cr("considering stacked concats");
+ }
#endif
- StringConcat* merged = sc->merge(other, arg);
- if (merged->validate_control_flow()) {
+ StringConcat* merged = sc->merge(other, arg);
+ if (merged->validate_control_flow()) {
#ifndef PRODUCT
- if (PrintOptimizeStringConcat) {
- tty->print_cr("stacking would succeed");
- }
+ if (PrintOptimizeStringConcat) {
+ tty->print_cr("stacking would succeed");
+ }
#endif
- if (c < o) {
- concats.remove_at(o);
- concats.at_put(c, merged);
- } else {
- concats.remove_at(c);
- concats.at_put(o, merged);
- }
- goto restart;
+ if (c < o) {
+ concats.remove_at(o);
+ concats.at_put(c, merged);
} else {
+ concats.remove_at(c);
+ concats.at_put(o, merged);
+ }
+ goto restart;
+ } else {
#ifndef PRODUCT
- if (PrintOptimizeStringConcat) {
- tty->print_cr("stacking would fail");
- }
+ if (PrintOptimizeStringConcat) {
+ tty->print_cr("stacking would fail");
+ }
#endif
- }
}
}
}
--- a/hotspot/src/share/vm/opto/superword.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/superword.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,10 @@
//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt) {
+ assert(UseSuperWord, "should be");
+ // Do vectors exist on this architecture?
+ if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+
assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
@@ -89,15 +93,12 @@
Node *pre_opaq1 = pre_end->limit();
if (pre_opaq1->Opcode() != Op_Opaque1) return;
- // Do vectors exist on this architecture?
- if (vector_width_in_bytes() == 0) return;
-
init(); // initialize data structures
set_lpt(lpt);
set_lp(cl);
- // For now, define one block which is the entire loop body
+ // For now, define one block which is the entire loop body
set_bb(cl);
assert(_packset.length() == 0, "packset must be empty");
@@ -177,7 +178,7 @@
Node_List memops;
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
- if (n->is_Mem() && in_bb(n) &&
+ if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
is_java_primitive(n->as_Mem()->memory_type())) {
int align = memory_alignment(n->as_Mem(), 0);
if (align != bottom_align) {
@@ -185,54 +186,141 @@
}
}
}
- if (memops.size() == 0) return;
- // Find a memory reference to align to. The pre-loop trip count
- // is modified to align this reference to a vector-aligned address
- find_align_to_ref(memops);
- if (align_to_ref() == NULL) return;
+ Node_List align_to_refs;
+ int best_iv_adjustment = 0;
+ MemNode* best_align_to_mem_ref = NULL;
- SWPointer align_to_ref_p(align_to_ref(), this);
- int offset = align_to_ref_p.offset_in_bytes();
- int scale = align_to_ref_p.scale_in_bytes();
- int vw = vector_width_in_bytes();
- int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
- int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
-
-#ifndef PRODUCT
- if (TraceSuperWord)
- tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d",
- offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride());
-#endif
+ while (memops.size() != 0) {
+ // Find a memory reference to align to.
+ MemNode* mem_ref = find_align_to_ref(memops);
+ if (mem_ref == NULL) break;
+ align_to_refs.push(mem_ref);
+ int iv_adjustment = get_iv_adjustment(mem_ref);
- // Set alignment relative to "align_to_ref"
- for (int i = memops.size() - 1; i >= 0; i--) {
- MemNode* s = memops.at(i)->as_Mem();
- SWPointer p2(s, this);
- if (p2.comparable(align_to_ref_p)) {
- int align = memory_alignment(s, iv_adjustment);
- set_alignment(s, align);
- } else {
- memops.remove(i);
+ if (best_align_to_mem_ref == NULL) {
+ // Set memory reference which is the best from all memory operations
+ // to be used for alignment. The pre-loop trip count is modified to align
+ // this reference to a vector-aligned address.
+ best_align_to_mem_ref = mem_ref;
+ best_iv_adjustment = iv_adjustment;
}
- }
- // Create initial pack pairs of memory operations
- for (uint i = 0; i < memops.size(); i++) {
- Node* s1 = memops.at(i);
- for (uint j = 0; j < memops.size(); j++) {
- Node* s2 = memops.at(j);
- if (s1 != s2 && are_adjacent_refs(s1, s2)) {
- int align = alignment(s1);
- if (stmts_can_pack(s1, s2, align)) {
- Node_List* pair = new Node_List();
- pair->push(s1);
- pair->push(s2);
- _packset.append(pair);
+ SWPointer align_to_ref_p(mem_ref, this);
+ // Set alignment relative to "align_to_ref" for all related memory operations.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* s = memops.at(i)->as_Mem();
+ if (isomorphic(s, mem_ref)) {
+ SWPointer p2(s, this);
+ if (p2.comparable(align_to_ref_p)) {
+ int align = memory_alignment(s, iv_adjustment);
+ set_alignment(s, align);
}
}
}
- }
+
+ // Create initial pack pairs of memory operations for which
+ // alignment is set and vectors will be aligned.
+ bool create_pack = true;
+ if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
+ if (!Matcher::misaligned_vectors_ok()) {
+ int vw = vector_width(mem_ref);
+ int vw_best = vector_width(best_align_to_mem_ref);
+ if (vw > vw_best) {
+ // Do not vectorize a memory access with more elements per vector
+ // if unaligned memory access is not allowed because number of
+ // iterations in pre-loop will be not enough to align it.
+ create_pack = false;
+ }
+ }
+ } else {
+ if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+ // Can't allow vectorization of unaligned memory accesses with the
+ // same type since it could be overlapped accesses to the same array.
+ create_pack = false;
+ } else {
+ // Allow independent (different type) unaligned memory operations
+ // if HW supports them.
+ if (!Matcher::misaligned_vectors_ok()) {
+ create_pack = false;
+ } else {
+ // Check if packs of the same memory type but
+ // with a different alignment were created before.
+ for (uint i = 0; i < align_to_refs.size(); i++) {
+ MemNode* mr = align_to_refs.at(i)->as_Mem();
+ if (same_velt_type(mr, mem_ref) &&
+ memory_alignment(mr, iv_adjustment) != 0)
+ create_pack = false;
+ }
+ }
+ }
+ }
+ if (create_pack) {
+ for (uint i = 0; i < memops.size(); i++) {
+ Node* s1 = memops.at(i);
+ int align = alignment(s1);
+ if (align == top_align) continue;
+ for (uint j = 0; j < memops.size(); j++) {
+ Node* s2 = memops.at(j);
+ if (alignment(s2) == top_align) continue;
+ if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+ if (stmts_can_pack(s1, s2, align)) {
+ Node_List* pair = new Node_List();
+ pair->push(s1);
+ pair->push(s2);
+ _packset.append(pair);
+ }
+ }
+ }
+ }
+ } else { // Don't create unaligned pack
+ // First, remove remaining memory ops of the same type from the list.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* s = memops.at(i)->as_Mem();
+ if (same_velt_type(s, mem_ref)) {
+ memops.remove(i);
+ }
+ }
+
+ // Second, remove already constructed packs of the same type.
+ for (int i = _packset.length() - 1; i >= 0; i--) {
+ Node_List* p = _packset.at(i);
+ MemNode* s = p->at(0)->as_Mem();
+ if (same_velt_type(s, mem_ref)) {
+ remove_pack_at(i);
+ }
+ }
+
+ // If needed find the best memory reference for loop alignment again.
+ if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
+ // Put memory ops from remaining packs back on memops list for
+ // the best alignment search.
+ uint orig_msize = memops.size();
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ MemNode* s = p->at(0)->as_Mem();
+ assert(!same_velt_type(s, mem_ref), "sanity");
+ memops.push(s);
+ }
+ MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
+ if (best_align_to_mem_ref == NULL) break;
+ best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
+ // Restore list.
+ while (memops.size() > orig_msize)
+ (void)memops.pop();
+ }
+ } // unaligned memory accesses
+
+ // Remove used mem nodes.
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* m = memops.at(i)->as_Mem();
+ if (alignment(m) != top_align) {
+ memops.remove(i);
+ }
+ }
+
+ } // while (memops.size() != 0
+ set_align_to_ref(best_align_to_mem_ref);
#ifndef PRODUCT
if (TraceSuperWord) {
@@ -246,7 +334,7 @@
// Find a memory reference to align the loop induction variable to.
// Looks first at stores then at loads, looking for a memory reference
// with the largest number of references similar to it.
-void SuperWord::find_align_to_ref(Node_List &memops) {
+MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
// Count number of comparable memory ops
@@ -270,20 +358,28 @@
}
}
- // Find Store (or Load) with the greatest number of "comparable" references
+ // Find Store (or Load) with the greatest number of "comparable" references,
+ // biggest vector size, smallest data size and smallest iv offset.
int max_ct = 0;
+ int max_vw = 0;
int max_idx = -1;
int min_size = max_jint;
int min_iv_offset = max_jint;
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Store()) {
+ int vw = vector_width_in_bytes(s);
+ assert(vw > 1, "sanity");
SWPointer p(s, this);
- if (cmp_ct.at(j) > max_ct ||
- cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
- data_size(s) == min_size &&
- p.offset_in_bytes() < min_iv_offset)) {
+ if (cmp_ct.at(j) > max_ct ||
+ cmp_ct.at(j) == max_ct &&
+ (vw > max_vw ||
+ vw == max_vw &&
+ (data_size(s) < min_size ||
+ data_size(s) == min_size &&
+ (p.offset_in_bytes() < min_iv_offset)))) {
max_ct = cmp_ct.at(j);
+ max_vw = vw;
max_idx = j;
min_size = data_size(s);
min_iv_offset = p.offset_in_bytes();
@@ -295,12 +391,18 @@
for (uint j = 0; j < memops.size(); j++) {
MemNode* s = memops.at(j)->as_Mem();
if (s->is_Load()) {
+ int vw = vector_width_in_bytes(s);
+ assert(vw > 1, "sanity");
SWPointer p(s, this);
- if (cmp_ct.at(j) > max_ct ||
- cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
- data_size(s) == min_size &&
- p.offset_in_bytes() < min_iv_offset)) {
+ if (cmp_ct.at(j) > max_ct ||
+ cmp_ct.at(j) == max_ct &&
+ (vw > max_vw ||
+ vw == max_vw &&
+ (data_size(s) < min_size ||
+ data_size(s) == min_size &&
+ (p.offset_in_bytes() < min_iv_offset)))) {
max_ct = cmp_ct.at(j);
+ max_vw = vw;
max_idx = j;
min_size = data_size(s);
min_iv_offset = p.offset_in_bytes();
@@ -309,10 +411,7 @@
}
}
- if (max_ct > 0)
- set_align_to_ref(memops.at(max_idx)->as_Mem());
-
-#ifndef PRODUCT
+#ifdef ASSERT
if (TraceSuperWord && Verbose) {
tty->print_cr("\nVector memops after find_align_to_refs");
for (uint i = 0; i < memops.size(); i++) {
@@ -321,6 +420,17 @@
}
}
#endif
+
+ if (max_ct > 0) {
+#ifdef ASSERT
+ if (TraceSuperWord) {
+ tty->print("\nVector align to node: ");
+ memops.at(max_idx)->as_Mem()->dump();
+ }
+#endif
+ return memops.at(max_idx)->as_Mem();
+ }
+ return NULL;
}
//------------------------------ref_is_alignable---------------------------
@@ -341,7 +451,8 @@
// If initial offset from start of object is computable,
// compute alignment within the vector.
- int vw = vector_width_in_bytes();
+ int vw = vector_width_in_bytes(p.mem());
+ assert(vw > 1, "sanity");
if (vw % span == 0) {
Node* init_nd = pre_end->init_trip();
if (init_nd->is_Con() && p.invar() == NULL) {
@@ -361,6 +472,25 @@
return false;
}
+//---------------------------get_iv_adjustment---------------------------
+// Calculate loop's iv adjustment for this memory ops.
+int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
+ SWPointer align_to_ref_p(mem_ref, this);
+ int offset = align_to_ref_p.offset_in_bytes();
+ int scale = align_to_ref_p.scale_in_bytes();
+ int vw = vector_width_in_bytes(mem_ref);
+ assert(vw > 1, "sanity");
+ int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
+ int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+
+#ifndef PRODUCT
+ if (TraceSuperWord)
+ tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
+ offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw);
+#endif
+ return iv_adjustment;
+}
+
//---------------------------dependence_graph---------------------------
// Construct dependency graph.
// Add dependence edges to load/store nodes for memory dependence
@@ -488,9 +618,13 @@
bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
// Do not use superword for non-primitives
- if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) ||
- (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type())))
+ BasicType bt1 = velt_basic_type(s1);
+ BasicType bt2 = velt_basic_type(s2);
+ if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
return false;
+ if (Matcher::max_vector_size(bt1) < 2) {
+ return false; // No vectors for this type
+ }
if (isomorphic(s1, s2)) {
if (independent(s1, s2)) {
@@ -552,7 +686,7 @@
if (s1->Opcode() != s2->Opcode()) return false;
if (s1->req() != s2->req()) return false;
if (s1->in(0) != s2->in(0)) return false;
- if (velt_type(s1) != velt_type(s2)) return false;
+ if (!same_velt_type(s1, s2)) return false;
return true;
}
@@ -595,14 +729,16 @@
//------------------------------set_alignment---------------------------
void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
set_alignment(s1, align);
- set_alignment(s2, align + data_size(s1));
+ if (align == top_align || align == bottom_align) {
+ set_alignment(s2, align);
+ } else {
+ set_alignment(s2, align + data_size(s1));
+ }
}
//------------------------------data_size---------------------------
int SuperWord::data_size(Node* s) {
- const Type* t = velt_type(s);
- BasicType bt = t->array_element_basic_type();
- int bsize = type2aelembytes(bt);
+ int bsize = type2aelembytes(velt_basic_type(s));
assert(bsize != 0, "valid size");
return bsize;
}
@@ -631,9 +767,9 @@
//------------------------------follow_use_defs---------------------------
// Extend the packset by visiting operand definitions of nodes in pack p
bool SuperWord::follow_use_defs(Node_List* p) {
+ assert(p->size() == 2, "just checking");
Node* s1 = p->at(0);
Node* s2 = p->at(1);
- assert(p->size() == 2, "just checking");
assert(s1->req() == s2->req(), "just checking");
assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
@@ -718,7 +854,12 @@
for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
if (i1 != i2) {
- return false;
+ if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
+ // Further analysis relies on operands position matching.
+ u2->swap_edges(i1, i2);
+ } else {
+ return false;
+ }
}
} while (i1 < ct);
return true;
@@ -727,7 +868,7 @@
//------------------------------est_savings---------------------------
// Estimate the savings from executing s1 and s2 as a pack
int SuperWord::est_savings(Node* s1, Node* s2) {
- int save = 2 - 1; // 2 operations per instruction in packed form
+ int save_in = 2 - 1; // 2 operations per instruction in packed form
// inputs
for (uint i = 1; i < s1->req(); i++) {
@@ -735,17 +876,18 @@
Node* x2 = s2->in(i);
if (x1 != x2) {
if (are_adjacent_refs(x1, x2)) {
- save += adjacent_profit(x1, x2);
+ save_in += adjacent_profit(x1, x2);
} else if (!in_packset(x1, x2)) {
- save -= pack_cost(2);
+ save_in -= pack_cost(2);
} else {
- save += unpack_cost(2);
+ save_in += unpack_cost(2);
}
}
}
// uses of result
uint ct = 0;
+ int save_use = 0;
for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
Node* s1_use = s1->fast_out(i);
for (int j = 0; j < _packset.length(); j++) {
@@ -756,7 +898,7 @@
if (p->at(p->size()-1) == s2_use) {
ct++;
if (are_adjacent_refs(s1_use, s2_use)) {
- save += adjacent_profit(s1_use, s2_use);
+ save_use += adjacent_profit(s1_use, s2_use);
}
}
}
@@ -764,10 +906,10 @@
}
}
- if (ct < s1->outcnt()) save += unpack_cost(1);
- if (ct < s2->outcnt()) save += unpack_cost(1);
+ if (ct < s1->outcnt()) save_use += unpack_cost(1);
+ if (ct < s2->outcnt()) save_use += unpack_cost(1);
- return save;
+ return MAX2(save_in, save_use);
}
//------------------------------costs---------------------------
@@ -778,8 +920,9 @@
//------------------------------combine_packs---------------------------
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
void SuperWord::combine_packs() {
- bool changed;
- do {
+ bool changed = true;
+ // Combine packs regardless max vector size.
+ while (changed) {
changed = false;
for (int i = 0; i < _packset.length(); i++) {
Node_List* p1 = _packset.at(i);
@@ -787,6 +930,7 @@
for (int j = 0; j < _packset.length(); j++) {
Node_List* p2 = _packset.at(j);
if (p2 == NULL) continue;
+ if (i == j) continue;
if (p1->at(p1->size()-1) == p2->at(0)) {
for (uint k = 1; k < p2->size(); k++) {
p1->push(p2->at(k));
@@ -796,8 +940,39 @@
}
}
}
- } while (changed);
+ }
+ // Split packs which have size greater then max vector size.
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p1 = _packset.at(i);
+ if (p1 != NULL) {
+ BasicType bt = velt_basic_type(p1->at(0));
+ uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
+ assert(is_power_of_2(max_vlen), "sanity");
+ uint psize = p1->size();
+ if (!is_power_of_2(psize)) {
+ // Skip pack which can't be vector.
+ // case1: for(...) { a[i] = i; } elements values are different (i+x)
+ // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store
+ _packset.at_put(i, NULL);
+ continue;
+ }
+ if (psize > max_vlen) {
+ Node_List* pack = new Node_List();
+ for (uint j = 0; j < psize; j++) {
+ pack->push(p1->at(j));
+ if (pack->size() >= max_vlen) {
+ assert(is_power_of_2(pack->size()), "sanity");
+ _packset.append(pack);
+ pack = new Node_List();
+ }
+ }
+ _packset.at_put(i, NULL);
+ }
+ }
+ }
+
+ // Compress list.
for (int i = _packset.length() - 1; i >= 0; i--) {
Node_List* p1 = _packset.at(i);
if (p1 == NULL) {
@@ -880,8 +1055,7 @@
// Can code be generated for pack p?
bool SuperWord::implemented(Node_List* p) {
Node* p0 = p->at(0);
- int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
- return vopc > 0 && Matcher::has_match_rule(vopc);
+ return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
}
//------------------------------profitable---------------------------
@@ -939,36 +1113,36 @@
}
//-------------------------------remove_and_insert-------------------
-//remove "current" from its current position in the memory graph and insert
-//it after the appropriate insertion point (lip or uip)
+// Remove "current" from its current position in the memory graph and insert
+// it after the appropriate insertion point (lip or uip).
void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
Node *uip, Unique_Node_List &sched_before) {
Node* my_mem = current->in(MemNode::Memory);
- _igvn.rehash_node_delayed(current);
- _igvn.hash_delete(my_mem);
+ bool sched_up = sched_before.member(current);
- //remove current_store from its current position in the memmory graph
+ // remove current_store from its current position in the memmory graph
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem()) {
assert(use->in(MemNode::Memory) == current, "must be");
- _igvn.rehash_node_delayed(use);
if (use == prev) { // connect prev to my_mem
- use->set_req(MemNode::Memory, my_mem);
+ _igvn.replace_input_of(use, MemNode::Memory, my_mem);
+ --i; //deleted this edge; rescan position
} else if (sched_before.member(use)) {
- _igvn.hash_delete(uip);
- use->set_req(MemNode::Memory, uip);
+ if (!sched_up) { // Will be moved together with current
+ _igvn.replace_input_of(use, MemNode::Memory, uip);
+ --i; //deleted this edge; rescan position
+ }
} else {
- _igvn.hash_delete(lip);
- use->set_req(MemNode::Memory, lip);
+ if (sched_up) { // Will be moved together with current
+ _igvn.replace_input_of(use, MemNode::Memory, lip);
+ --i; //deleted this edge; rescan position
+ }
}
- --i; //deleted this edge; rescan position
}
}
- bool sched_up = sched_before.member(current);
Node *insert_pt = sched_up ? uip : lip;
- _igvn.hash_delete(insert_pt);
// all uses of insert_pt's memory state should use current's instead
for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
@@ -988,7 +1162,7 @@
}
//connect current to insert_pt
- current->set_req(MemNode::Memory, insert_pt);
+ _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
}
//------------------------------co_locate_pack----------------------------------
@@ -1025,7 +1199,7 @@
if (use->is_Mem() && use != previous)
memops.push(use);
}
- if(current == first) break;
+ if (current == first) break;
previous = current;
current = current->in(MemNode::Memory)->as_Mem();
}
@@ -1038,27 +1212,37 @@
Node *s2 = memops.at(j);
if (!independent(s1, s2)) {
if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
- schedule_before_pack.push(s1); //s1 must be scheduled before
+ schedule_before_pack.push(s1); // s1 must be scheduled before
Node_List* mem_pk = my_pack(s1);
if (mem_pk != NULL) {
for (uint ii = 0; ii < mem_pk->size(); ii++) {
- Node* s = mem_pk->at(ii); // follow partner
+ Node* s = mem_pk->at(ii); // follow partner
if (memops.member(s) && !schedule_before_pack.member(s))
schedule_before_pack.push(s);
}
}
+ break;
}
}
}
}
}
+ Node* upper_insert_pt = first->in(MemNode::Memory);
+ // Following code moves loads connected to upper_insert_pt below aliased stores.
+ // Collect such loads here and reconnect them back to upper_insert_pt later.
+ memops.clear();
+ for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
+ Node* use = upper_insert_pt->out(i);
+ if (!use->is_Store())
+ memops.push(use);
+ }
+
MemNode* lower_insert_pt = last;
- Node* upper_insert_pt = first->in(MemNode::Memory);
previous = last; //previous store in pk
current = last->in(MemNode::Memory)->as_Mem();
- //start scheduling from "last" to "first"
+ // start scheduling from "last" to "first"
while (true) {
assert(in_bb(current), "stay in block");
assert(in_pack(previous, pk), "previous stays in pack");
@@ -1066,16 +1250,13 @@
if (in_pack(current, pk)) {
// Forward users of my memory state (except "previous) to my input memory state
- _igvn.hash_delete(current);
for (DUIterator i = current->outs(); current->has_out(i); i++) {
Node* use = current->out(i);
if (use->is_Mem() && use != previous) {
assert(use->in(MemNode::Memory) == current, "must be");
if (schedule_before_pack.member(use)) {
- _igvn.hash_delete(upper_insert_pt);
_igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
} else {
- _igvn.hash_delete(lower_insert_pt);
_igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
}
--i; // deleted this edge; rescan position
@@ -1089,6 +1270,14 @@
if (current == first) break;
current = my_mem->as_Mem();
} // end while
+
+ // Reconnect loads back to upper_insert_pt.
+ for (uint i = 0; i < memops.size(); i++) {
+ Node *ld = memops.at(i);
+ if (ld->in(MemNode::Memory) != upper_insert_pt) {
+ _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
+ }
+ }
} else if (pk->at(0)->is_Load()) { //load
// all loads in the pack should have the same memory state. By default,
// we use the memory state of the last load. However, if any load could
@@ -1149,35 +1338,30 @@
Node* vn = NULL;
Node* low_adr = p->at(0);
Node* first = executed_first(p);
+ int opc = n->Opcode();
if (n->is_Load()) {
- int opc = n->Opcode();
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
- vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
-
+ vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
} else if (n->is_Store()) {
// Promote value to be stored to vector
Node* val = vector_opd(p, MemNode::ValueIn);
-
- int opc = n->Opcode();
Node* ctl = n->in(MemNode::Control);
Node* mem = first->in(MemNode::Memory);
Node* adr = low_adr->in(MemNode::Address);
const TypePtr* atyp = n->adr_type();
- vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
-
+ vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
} else if (n->req() == 3) {
// Promote operands to vector
Node* in1 = vector_opd(p, 1);
Node* in2 = vector_opd(p, 2);
- vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
-
+ vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
} else {
ShouldNotReachHere();
}
-
+ assert(vn != NULL, "sanity");
_phase->_igvn.register_new_node_with_optimizer(vn);
_phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
for (uint j = 0; j < p->size(); j++) {
@@ -1185,6 +1369,12 @@
_igvn.replace_node(pm, vn);
}
_igvn._worklist.push(vn);
+#ifdef ASSERT
+ if (TraceNewVectors) {
+ tty->print("new Vector node: ");
+ vn->dump();
+ }
+#endif
}
}
}
@@ -1207,10 +1397,10 @@
}
if (same_opd) {
- if (opd->is_Vector() || opd->is_VectorLoad()) {
+ if (opd->is_Vector() || opd->is_LoadVector()) {
return opd; // input is matching vector
}
- assert(!opd->is_VectorStore(), "such vector is not expected here");
+ assert(!opd->is_StoreVector(), "such vector is not expected here");
// Convert scalar input to vector with the same number of elements as
// p0's vector. Use p0's type because size of operand's container in
// vector should match p0's size regardless operand's size.
@@ -1219,12 +1409,18 @@
_phase->_igvn.register_new_node_with_optimizer(vn);
_phase->set_ctrl(vn, _phase->get_ctrl(opd));
+#ifdef ASSERT
+ if (TraceNewVectors) {
+ tty->print("new Vector node: ");
+ vn->dump();
+ }
+#endif
return vn;
}
// Insert pack operation
- const Type* p0_t = velt_type(p0);
- PackNode* pk = PackNode::make(_phase->C, opd, p0_t);
+ BasicType bt = velt_basic_type(p0);
+ PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
for (uint i = 1; i < vlen; i++) {
@@ -1232,10 +1428,16 @@
Node* in = pi->in(opd_idx);
assert(my_pack(in) == NULL, "Should already have been unpacked");
assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
- pk->add_opd(in);
+ pk->add_opd(i, in);
}
_phase->_igvn.register_new_node_with_optimizer(pk);
_phase->set_ctrl(pk, _phase->get_ctrl(opd));
+#ifdef ASSERT
+ if (TraceNewVectors) {
+ tty->print("new Vector node: ");
+ pk->dump();
+ }
+#endif
return pk;
}
@@ -1273,16 +1475,15 @@
// Insert extract operation
_igvn.hash_delete(def);
int def_pos = alignment(def) / data_size(def);
- const Type* def_t = velt_type(def);
- Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
+ Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
_phase->_igvn.register_new_node_with_optimizer(ex);
_phase->set_ctrl(ex, _phase->get_ctrl(def));
_igvn.replace_input_of(use, idx, ex);
_igvn._worklist.push(def);
bb_insert_after(ex, bb_idx(def));
- set_velt_type(ex, def_t);
+ set_velt_type(ex, velt_type(def));
}
}
@@ -1509,10 +1710,7 @@
// Initial type
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
- const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type())
- : _igvn.type(n);
- const Type* vt = container_type(t);
- set_velt_type(n, vt);
+ set_velt_type(n, container_type(n));
}
// Propagate narrowed type backwards through operations
@@ -1543,7 +1741,7 @@
bool same_type = true;
for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
Node *use = in->fast_out(k);
- if (!in_bb(use) || velt_type(use) != vt) {
+ if (!in_bb(use) || !same_velt_type(use, n)) {
same_type = false;
break;
}
@@ -1575,20 +1773,24 @@
if (!p.valid()) {
return bottom_align;
}
+ int vw = vector_width_in_bytes(s);
+ if (vw < 2) {
+ return bottom_align; // No vectors for this type
+ }
int offset = p.offset_in_bytes();
offset += iv_adjust_in_bytes;
- int off_rem = offset % vector_width_in_bytes();
- int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
+ int off_rem = offset % vw;
+ int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
return off_mod;
}
//---------------------------container_type---------------------------
// Smallest type containing range of values
-const Type* SuperWord::container_type(const Type* t) {
- const Type* tp = t->make_ptr();
- if (tp && tp->isa_aryptr()) {
- t = tp->is_aryptr()->elem();
+const Type* SuperWord::container_type(Node* n) {
+ if (n->is_Mem()) {
+ return Type::get_const_basic_type(n->as_Mem()->memory_type());
}
+ const Type* t = _igvn.type(n);
if (t->basic_type() == T_INT) {
if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL;
if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE;
@@ -1599,11 +1801,22 @@
return t;
}
+bool SuperWord::same_velt_type(Node* n1, Node* n2) {
+ const Type* vt1 = velt_type(n1);
+ const Type* vt2 = velt_type(n1);
+ if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
+ // Compare vectors element sizes for integer types.
+ return data_size(n1) == data_size(n2);
+ }
+ return vt1 == vt2;
+}
+
//-------------------------vector_opd_range-----------------------
// (Start, end] half-open range defining which operands are vector
void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
switch (n->Opcode()) {
- case Op_LoadB: case Op_LoadUS:
+ case Op_LoadB: case Op_LoadUB:
+ case Op_LoadS: case Op_LoadUS:
case Op_LoadI: case Op_LoadL:
case Op_LoadF: case Op_LoadD:
case Op_LoadP:
@@ -1721,6 +1934,7 @@
assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
SWPointer align_to_ref_p(align_to_ref, this);
+ assert(align_to_ref_p.valid(), "sanity");
// Given:
// lim0 == original pre loop limit
@@ -1773,10 +1987,12 @@
// N = (V - (e - lim0)) % V
// lim = lim0 - (V - (e - lim0)) % V
+ int vw = vector_width_in_bytes(align_to_ref);
int stride = iv_stride();
int scale = align_to_ref_p.scale_in_bytes();
int elt_size = align_to_ref_p.memory_size();
- int v_align = vector_width_in_bytes() / elt_size;
+ int v_align = vw / elt_size;
+ assert(v_align > 1, "sanity");
int k = align_to_ref_p.offset_in_bytes() / elt_size;
Node *kn = _igvn.intcon(k);
@@ -1796,6 +2012,25 @@
_phase->_igvn.register_new_node_with_optimizer(e);
_phase->set_ctrl(e, pre_ctrl);
}
+ if (vw > ObjectAlignmentInBytes) {
+ // incorporate base e +/- base && Mask >>> log2(elt)
+ Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw)));
+ Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
+ _phase->_igvn.register_new_node_with_optimizer(xbase);
+ Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask);
+ _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+#ifdef _LP64
+ masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase);
+ _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
+#endif
+ Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+ Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
+ _phase->_igvn.register_new_node_with_optimizer(bref);
+ _phase->set_ctrl(bref, pre_ctrl);
+ e = new (_phase->C, 3) AddINode(e, bref);
+ _phase->_igvn.register_new_node_with_optimizer(e);
+ _phase->set_ctrl(e, pre_ctrl);
+ }
// compute e +/- lim0
if (scale < 0) {
--- a/hotspot/src/share/vm/opto/superword.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/superword.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -264,8 +264,14 @@
_iv = lp->as_CountedLoop()->phi()->as_Phi(); }
int iv_stride() { return lp()->as_CountedLoop()->stride_con(); }
- int vector_width_in_bytes() { return Matcher::vector_width_in_bytes(); }
-
+ int vector_width(Node* n) {
+ BasicType bt = velt_basic_type(n);
+ return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt));
+ }
+ int vector_width_in_bytes(Node* n) {
+ BasicType bt = velt_basic_type(n);
+ return vector_width(n)*type2aelembytes(bt);
+ }
MemNode* align_to_ref() { return _align_to_ref; }
void set_align_to_ref(MemNode* m) { _align_to_ref = m; }
@@ -298,7 +304,9 @@
// vector element type
const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; }
+ BasicType velt_basic_type(Node* n) { return velt_type(n)->array_element_basic_type(); }
void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
+ bool same_velt_type(Node* n1, Node* n2);
// my_pack
Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
@@ -311,7 +319,9 @@
// Find the adjacent memory references and create pack pairs for them.
void find_adjacent_refs();
// Find a memory reference to align the loop induction variable to.
- void find_align_to_ref(Node_List &memops);
+ MemNode* find_align_to_ref(Node_List &memops);
+ // Calculate loop's iv adjustment for this memory ops.
+ int get_iv_adjustment(MemNode* mem);
// Can the preloop align the reference to position zero in the vector?
bool ref_is_alignable(SWPointer& p);
// Construct dependency graph.
@@ -394,7 +404,7 @@
// (Start, end] half-open range defining which operands are vector
void vector_opd_range(Node* n, uint* start, uint* end);
// Smallest type containing range of values
- static const Type* container_type(const Type* t);
+ const Type* container_type(Node* n);
// Adjust pre-loop limit so that in main loop, a load/store reference
// to align_to_ref will be a position zero in the vector.
void align_initial_loop_index(MemNode* align_to_ref);
@@ -462,6 +472,7 @@
Node* base() { return _base; }
Node* adr() { return _adr; }
+ MemNode* mem() { return _mem; }
int scale_in_bytes() { return _scale; }
Node* invar() { return _invar; }
bool negate_invar() { return _negate_invar; }
--- a/hotspot/src/share/vm/opto/type.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/type.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,10 @@
T_ILLEGAL, // Tuple
T_ARRAY, // Array
+ T_ILLEGAL, // VectorS
+ T_ILLEGAL, // VectorD
+ T_ILLEGAL, // VectorX
+ T_ILLEGAL, // VectorY
T_ADDRESS, // AnyPtr // shows up in factory methods for NULL_PTR
T_ADDRESS, // RawPtr
@@ -414,6 +418,24 @@
// get_zero_type() should not happen for T_CONFLICT
_zero_type[T_CONFLICT]= NULL;
+ // Vector predefined types, it needs initialized _const_basic_type[].
+ if (Matcher::vector_size_supported(T_BYTE,4)) {
+ TypeVect::VECTS = TypeVect::make(T_BYTE,4);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,2)) {
+ TypeVect::VECTD = TypeVect::make(T_FLOAT,2);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,4)) {
+ TypeVect::VECTX = TypeVect::make(T_FLOAT,4);
+ }
+ if (Matcher::vector_size_supported(T_FLOAT,8)) {
+ TypeVect::VECTY = TypeVect::make(T_FLOAT,8);
+ }
+ mreg2type[Op_VecS] = TypeVect::VECTS;
+ mreg2type[Op_VecD] = TypeVect::VECTD;
+ mreg2type[Op_VecX] = TypeVect::VECTX;
+ mreg2type[Op_VecY] = TypeVect::VECTY;
+
// Restore working type arena.
current->set_type_arena(save);
current->set_type_dict(NULL);
@@ -668,6 +690,10 @@
Bad, // Tuple - handled in v-call
Bad, // Array - handled in v-call
+ Bad, // VectorS - handled in v-call
+ Bad, // VectorD - handled in v-call
+ Bad, // VectorX - handled in v-call
+ Bad, // VectorY - handled in v-call
Bad, // AnyPtr - handled in v-call
Bad, // RawPtr - handled in v-call
@@ -728,8 +754,8 @@
//------------------------------data-------------------------------------------
const char * const Type::msg[Type::lastype] = {
"bad","control","top","int:","long:","half", "narrowoop:",
- "tuple:", "aryptr",
- "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:",
+ "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
+ "anyptr:", "rawptr:", "java:", "inst:", "aryptr:", "klass:",
"func", "abIO", "return_address", "memory",
"float_top", "ftcon:", "float",
"double_top", "dblcon:", "double",
@@ -790,7 +816,7 @@
//------------------------------isa_oop_ptr------------------------------------
// Return true if type is an oop pointer type. False for raw pointers.
static char isa_oop_ptr_tbl[Type::lastype] = {
- 0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*ary*/,
+ 0,0,0,0,0,0,0/*narrowoop*/,0/*tuple*/, 0/*array*/, 0, 0, 0, 0/*vector*/,
0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/,
0/*func*/,0,0/*return_address*/,0,
/*floats*/0,0,0, /*doubles*/0,0,0,
@@ -1926,6 +1952,121 @@
return false;
}
+//==============================TypeVect=======================================
+// Convenience common pre-built types.
+const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
+const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
+const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
+const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors
+
+//------------------------------make-------------------------------------------
+const TypeVect* TypeVect::make(const Type *elem, uint length) {
+ BasicType elem_bt = elem->array_element_basic_type();
+ assert(is_java_primitive(elem_bt), "only primitive types in vector");
+ assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
+ assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
+ int size = length * type2aelembytes(elem_bt);
+ switch (Matcher::vector_ideal_reg(size)) {
+ case Op_VecS:
+ return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
+ case Op_VecD:
+ case Op_RegD:
+ return (TypeVect*)(new TypeVectD(elem, length))->hashcons();
+ case Op_VecX:
+ return (TypeVect*)(new TypeVectX(elem, length))->hashcons();
+ case Op_VecY:
+ return (TypeVect*)(new TypeVectY(elem, length))->hashcons();
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeVect::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Vector
+ switch (t->base()) { // switch on original type
+
+ case Bottom: // Ye Olde Default
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case VectorS:
+ case VectorD:
+ case VectorX:
+ case VectorY: { // Meeting 2 vectors?
+ const TypeVect* v = t->is_vect();
+ assert( base() == v->base(), "");
+ assert(length() == v->length(), "");
+ assert(element_basic_type() == v->element_basic_type(), "");
+ return TypeVect::make(_elem->xmeet(v->_elem), _length);
+ }
+ case Top:
+ break;
+ }
+ return this;
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeVect::xdual() const {
+ return new TypeVect(base(), _elem->dual(), _length);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeVect::eq(const Type *t) const {
+ const TypeVect *v = t->is_vect();
+ return (_elem == v->_elem) && (_length == v->_length);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeVect::hash(void) const {
+ return (intptr_t)_elem + (intptr_t)_length;
+}
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Vector is singleton if all elements are the same
+// constant value (when vector is created with Replicate code).
+bool TypeVect::singleton(void) const {
+// There is no Con node for vectors yet.
+// return _elem->singleton();
+ return false;
+}
+
+bool TypeVect::empty(void) const {
+ return _elem->empty();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
+ switch (base()) {
+ case VectorS:
+ st->print("vectors["); break;
+ case VectorD:
+ st->print("vectord["); break;
+ case VectorX:
+ st->print("vectorx["); break;
+ case VectorY:
+ st->print("vectory["); break;
+ default:
+ ShouldNotReachHere();
+ }
+ st->print("%d]:{", _length);
+ _elem->dump2(d, depth, st);
+ st->print("}");
+}
+#endif
+
+
//=============================================================================
// Convenience common pre-built types.
const TypePtr *TypePtr::NULL_PTR;
@@ -2472,18 +2613,26 @@
//------------------------------make_from_constant-----------------------------
// Make a java pointer from an oop constant
const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o, bool require_constant) {
- if (o->is_method_data() || o->is_method() || o->is_cpcache()) {
+ if (o->is_method_data() || o->is_method()) {
// Treat much like a typeArray of bytes, like below, but fake the type...
- const Type* etype = (Type*)get_const_basic_type(T_BYTE);
+ const BasicType bt = T_BYTE;
+ const Type* etype = get_const_basic_type(bt);
const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
- ciKlass *klass = ciTypeArrayKlass::make((BasicType) T_BYTE);
- assert(o->can_be_constant(), "method data oops should be tenured");
- const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
- return arr;
+ ciKlass* klass = ciArrayKlass::make(ciType::make(bt));
+ assert(o->can_be_constant(), "should be tenured");
+ return TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+ } else if (o->is_cpcache()) {
+ // Treat much like a objArray, like below, but fake the type...
+ const BasicType bt = T_OBJECT;
+ const Type* etype = get_const_basic_type(bt);
+ const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+ ciKlass* klass = ciArrayKlass::make(ciType::make(bt));
+ assert(o->can_be_constant(), "should be tenured");
+ return TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
} else {
assert(o->is_java_object(), "must be java language object");
assert(!o->is_null_object(), "null object not yet handled here.");
- ciKlass *klass = o->klass();
+ ciKlass* klass = o->klass();
if (klass->is_instance_klass()) {
// Element is an instance
if (require_constant) {
@@ -2494,8 +2643,7 @@
return TypeInstPtr::make(o);
} else if (klass->is_obj_array_klass()) {
// Element is an object array. Recursively call ourself.
- const Type *etype =
- TypeOopPtr::make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
+ const Type *etype = make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
// We used to pass NotNull in here, asserting that the sub-arrays
// are all not-null. This is not true in generally, as code can
@@ -2505,12 +2653,10 @@
} else if (!o->should_be_constant()) {
return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
}
- const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
- return arr;
+ return TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
} else if (klass->is_type_array_klass()) {
// Element is an typeArray
- const Type* etype =
- (Type*)get_const_basic_type(klass->as_type_array_klass()->element_type());
+ const Type* etype = get_const_basic_type(klass->as_type_array_klass()->element_type());
const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
// We used to pass NotNull in here, asserting that the array pointer
// is not-null. That was not true in general.
@@ -2519,12 +2665,11 @@
} else if (!o->should_be_constant()) {
return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
}
- const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
- return arr;
+ return TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
}
}
- ShouldNotReachHere();
+ fatal("unhandled object type");
return NULL;
}
@@ -4140,7 +4285,7 @@
// Print a 'flattened' signature
static const char * const flat_type_msg[Type::lastype] = {
"bad","control","top","int","long","_", "narrowoop",
- "tuple:", "array:",
+ "tuple:", "array:", "vectors:", "vectord:", "vectorx:", "vectory:",
"ptr", "rawptr", "ptr", "ptr", "ptr", "ptr",
"func", "abIO", "return_address", "mem",
"float_top", "ftcon:", "flt",
--- a/hotspot/src/share/vm/opto/type.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/type.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,11 @@
class TypeNarrowOop;
class TypeAry;
class TypeTuple;
+class TypeVect;
+class TypeVectS;
+class TypeVectD;
+class TypeVectX;
+class TypeVectY;
class TypePtr;
class TypeRawPtr;
class TypeOopPtr;
@@ -78,6 +83,10 @@
Tuple, // Method signature or object layout
Array, // Array types
+ VectorS, // 32bit Vector types
+ VectorD, // 64bit Vector types
+ VectorX, // 128bit Vector types
+ VectorY, // 256bit Vector types
AnyPtr, // Any old raw, klass, inst, or array pointer
RawPtr, // Raw (non-oop) pointers
@@ -222,6 +231,8 @@
const TypeF *isa_float_constant() const; // Returns NULL if not a FloatCon
const TypeTuple *is_tuple() const; // Collection of fields, NOT a pointer
const TypeAry *is_ary() const; // Array, NOT array pointer
+ const TypeVect *is_vect() const; // Vector
+ const TypeVect *isa_vect() const; // Returns NULL if not a Vector
const TypePtr *is_ptr() const; // Asserts it is a ptr type
const TypePtr *isa_ptr() const; // Returns NULL if not ptr type
const TypeRawPtr *isa_rawptr() const; // NOT Java oop
@@ -574,6 +585,69 @@
#endif
};
+//------------------------------TypeVect---------------------------------------
+// Class of Vector Types
+class TypeVect : public Type {
+ const Type* _elem; // Vector's element type
+ const uint _length; // Elements in vector (power of 2)
+
+protected:
+ TypeVect(TYPES t, const Type* elem, uint length) : Type(t),
+ _elem(elem), _length(length) {}
+
+public:
+ const Type* element_type() const { return _elem; }
+ BasicType element_basic_type() const { return _elem->array_element_basic_type(); }
+ uint length() const { return _length; }
+ uint length_in_bytes() const {
+ return _length * type2aelembytes(element_basic_type());
+ }
+
+ virtual bool eq(const Type *t) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+
+ static const TypeVect *make(const BasicType elem_bt, uint length) {
+ // Use bottom primitive type.
+ return make(get_const_basic_type(elem_bt), length);
+ }
+ // Used directly by Replicate nodes to construct singleton vector.
+ static const TypeVect *make(const Type* elem, uint length);
+
+ virtual const Type *xmeet( const Type *t) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ static const TypeVect *VECTS;
+ static const TypeVect *VECTD;
+ static const TypeVect *VECTX;
+ static const TypeVect *VECTY;
+
+#ifndef PRODUCT
+ virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping
+#endif
+};
+
+class TypeVectS : public TypeVect {
+ friend class TypeVect;
+ TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
+};
+
+class TypeVectD : public TypeVect {
+ friend class TypeVect;
+ TypeVectD(const Type* elem, uint length) : TypeVect(VectorD, elem, length) {}
+};
+
+class TypeVectX : public TypeVect {
+ friend class TypeVect;
+ TypeVectX(const Type* elem, uint length) : TypeVect(VectorX, elem, length) {}
+};
+
+class TypeVectY : public TypeVect {
+ friend class TypeVect;
+ TypeVectY(const Type* elem, uint length) : TypeVect(VectorY, elem, length) {}
+};
+
//------------------------------TypePtr----------------------------------------
// Class of machine Pointer Types: raw data, instances or arrays.
// If the _base enum is AnyPtr, then this refers to all of the above.
@@ -1113,6 +1187,15 @@
return (TypeAry*)this;
}
+inline const TypeVect *Type::is_vect() const {
+ assert( _base >= VectorS && _base <= VectorY, "Not a Vector" );
+ return (TypeVect*)this;
+}
+
+inline const TypeVect *Type::isa_vect() const {
+ return (_base >= VectorS && _base <= VectorY) ? (TypeVect*)this : NULL;
+}
+
inline const TypePtr *Type::is_ptr() const {
// AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");
--- a/hotspot/src/share/vm/opto/vectornode.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/vectornode.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,147 +28,16 @@
//------------------------------VectorNode--------------------------------------
-// Return vector type for an element type and vector length.
-const Type* VectorNode::vect_type(BasicType elt_bt, uint len) {
- assert(len <= VectorNode::max_vlen(elt_bt), "len in range");
- switch(elt_bt) {
- case T_BOOLEAN:
- case T_BYTE:
- switch(len) {
- case 2: return TypeInt::CHAR;
- case 4: return TypeInt::INT;
- case 8: return TypeLong::LONG;
- }
- break;
- case T_CHAR:
- case T_SHORT:
- switch(len) {
- case 2: return TypeInt::INT;
- case 4: return TypeLong::LONG;
- }
- break;
- case T_INT:
- switch(len) {
- case 2: return TypeLong::LONG;
- }
- break;
- case T_LONG:
- break;
- case T_FLOAT:
- switch(len) {
- case 2: return Type::DOUBLE;
- }
- break;
- case T_DOUBLE:
- break;
- }
- ShouldNotReachHere();
- return NULL;
-}
-
-// Scalar promotion
-VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- assert(vlen <= VectorNode::max_vlen(bt), "vlen in range");
- switch (bt) {
- case T_BOOLEAN:
- case T_BYTE:
- if (vlen == 16) return new (C, 2) Replicate16BNode(s);
- if (vlen == 8) return new (C, 2) Replicate8BNode(s);
- if (vlen == 4) return new (C, 2) Replicate4BNode(s);
- break;
- case T_CHAR:
- if (vlen == 8) return new (C, 2) Replicate8CNode(s);
- if (vlen == 4) return new (C, 2) Replicate4CNode(s);
- if (vlen == 2) return new (C, 2) Replicate2CNode(s);
- break;
- case T_SHORT:
- if (vlen == 8) return new (C, 2) Replicate8SNode(s);
- if (vlen == 4) return new (C, 2) Replicate4SNode(s);
- if (vlen == 2) return new (C, 2) Replicate2SNode(s);
- break;
- case T_INT:
- if (vlen == 4) return new (C, 2) Replicate4INode(s);
- if (vlen == 2) return new (C, 2) Replicate2INode(s);
- break;
- case T_LONG:
- if (vlen == 2) return new (C, 2) Replicate2LNode(s);
- break;
- case T_FLOAT:
- if (vlen == 4) return new (C, 2) Replicate4FNode(s);
- if (vlen == 2) return new (C, 2) Replicate2FNode(s);
- break;
- case T_DOUBLE:
- if (vlen == 2) return new (C, 2) Replicate2DNode(s);
- break;
- }
- ShouldNotReachHere();
- return NULL;
-}
-
-// Return initial Pack node. Additional operands added with add_opd() calls.
-PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- switch (bt) {
- case T_BOOLEAN:
- case T_BYTE:
- return new (C, 2) PackBNode(s);
- case T_CHAR:
- return new (C, 2) PackCNode(s);
- case T_SHORT:
- return new (C, 2) PackSNode(s);
- case T_INT:
- return new (C, 2) PackINode(s);
- case T_LONG:
- return new (C, 2) PackLNode(s);
- case T_FLOAT:
- return new (C, 2) PackFNode(s);
- case T_DOUBLE:
- return new (C, 2) PackDNode(s);
- }
- ShouldNotReachHere();
- return NULL;
-}
-
-// Create a binary tree form for Packs. [lo, hi) (half-open) range
-Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
- int ct = hi - lo;
- assert(is_power_of_2(ct), "power of 2");
- int mid = lo + ct/2;
- Node* n1 = ct == 2 ? in(lo) : binaryTreePack(C, lo, mid);
- Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi );
- int rslt_bsize = ct * type2aelembytes(elt_basic_type());
- if (bottom_type()->is_floatingpoint()) {
- switch (rslt_bsize) {
- case 8: return new (C, 3) PackFNode(n1, n2);
- case 16: return new (C, 3) PackDNode(n1, n2);
- }
- } else {
- assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long");
- switch (rslt_bsize) {
- case 2: return new (C, 3) Pack2x1BNode(n1, n2);
- case 4: return new (C, 3) Pack2x2BNode(n1, n2);
- case 8: return new (C, 3) PackINode(n1, n2);
- case 16: return new (C, 3) PackLNode(n1, n2);
- }
- }
- ShouldNotReachHere();
- return NULL;
-}
-
// Return the vector operator for the specified scalar operation
-// and vector length. One use is to check if the code generator
+// and vector length. Also used to check if the code generator
// supports the vector operation.
-int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt)))
- return 0; // unimplemented
+int VectorNode::opcode(int sopc, uint vlen, BasicType bt) {
switch (sopc) {
case Op_AddI:
switch (bt) {
case T_BOOLEAN:
case T_BYTE: return Op_AddVB;
- case T_CHAR: return Op_AddVC;
+ case T_CHAR:
case T_SHORT: return Op_AddVS;
case T_INT: return Op_AddVI;
}
@@ -186,7 +55,7 @@
switch (bt) {
case T_BOOLEAN:
case T_BYTE: return Op_SubVB;
- case T_CHAR: return Op_SubVC;
+ case T_CHAR:
case T_SHORT: return Op_SubVS;
case T_INT: return Op_SubVI;
}
@@ -216,18 +85,18 @@
switch (bt) {
case T_BOOLEAN:
case T_BYTE: return Op_LShiftVB;
- case T_CHAR: return Op_LShiftVC;
+ case T_CHAR:
case T_SHORT: return Op_LShiftVS;
case T_INT: return Op_LShiftVI;
}
ShouldNotReachHere();
- case Op_URShiftI:
+ case Op_RShiftI:
switch (bt) {
case T_BOOLEAN:
- case T_BYTE: return Op_URShiftVB;
- case T_CHAR: return Op_URShiftVC;
- case T_SHORT: return Op_URShiftVS;
- case T_INT: return Op_URShiftVI;
+ case T_BYTE: return Op_RShiftVB;
+ case T_CHAR:
+ case T_SHORT: return Op_RShiftVS;
+ case T_INT: return Op_RShiftVI;
}
ShouldNotReachHere();
case Op_AndI:
@@ -241,13 +110,14 @@
return Op_XorV;
case Op_LoadB:
+ case Op_LoadUB:
case Op_LoadUS:
case Op_LoadS:
case Op_LoadI:
case Op_LoadL:
case Op_LoadF:
case Op_LoadD:
- return VectorLoadNode::opcode(sopc, vlen);
+ return Op_LoadVector;
case Op_StoreB:
case Op_StoreC:
@@ -255,211 +125,170 @@
case Op_StoreL:
case Op_StoreF:
case Op_StoreD:
- return VectorStoreNode::opcode(sopc, vlen);
- }
- return 0; // Unimplemented
-}
-
-// Helper for above.
-int VectorLoadNode::opcode(int sopc, uint vlen) {
- switch (sopc) {
- case Op_LoadB:
- switch (vlen) {
- case 2: return 0; // Unimplemented
- case 4: return Op_Load4B;
- case 8: return Op_Load8B;
- case 16: return Op_Load16B;
- }
- break;
- case Op_LoadUS:
- switch (vlen) {
- case 2: return Op_Load2C;
- case 4: return Op_Load4C;
- case 8: return Op_Load8C;
- }
- break;
- case Op_LoadS:
- switch (vlen) {
- case 2: return Op_Load2S;
- case 4: return Op_Load4S;
- case 8: return Op_Load8S;
- }
- break;
- case Op_LoadI:
- switch (vlen) {
- case 2: return Op_Load2I;
- case 4: return Op_Load4I;
- }
- break;
- case Op_LoadL:
- if (vlen == 2) return Op_Load2L;
- break;
- case Op_LoadF:
- switch (vlen) {
- case 2: return Op_Load2F;
- case 4: return Op_Load4F;
- }
- break;
- case Op_LoadD:
- if (vlen == 2) return Op_Load2D;
- break;
+ return Op_StoreVector;
}
return 0; // Unimplemented
}
-// Helper for above
-int VectorStoreNode::opcode(int sopc, uint vlen) {
- switch (sopc) {
- case Op_StoreB:
- switch (vlen) {
- case 2: return 0; // Unimplemented
- case 4: return Op_Store4B;
- case 8: return Op_Store8B;
- case 16: return Op_Store16B;
- }
- break;
- case Op_StoreC:
- switch (vlen) {
- case 2: return Op_Store2C;
- case 4: return Op_Store4C;
- case 8: return Op_Store8C;
- }
- break;
- case Op_StoreI:
- switch (vlen) {
- case 2: return Op_Store2I;
- case 4: return Op_Store4I;
- }
- break;
- case Op_StoreL:
- if (vlen == 2) return Op_Store2L;
- break;
- case Op_StoreF:
- switch (vlen) {
- case 2: return Op_Store2F;
- case 4: return Op_Store4F;
- }
- break;
- case Op_StoreD:
- if (vlen == 2) return Op_Store2D;
- break;
+bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
+ if (is_java_primitive(bt) &&
+ (vlen > 1) && is_power_of_2(vlen) &&
+ Matcher::vector_size_supported(bt, vlen)) {
+ int vopc = VectorNode::opcode(opc, vlen, bt);
+ return vopc > 0 && Matcher::has_match_rule(vopc);
}
- return 0; // Unimplemented
+ return false;
}
// Return the vector version of a scalar operation node.
-VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) {
- int vopc = opcode(sopc, vlen, opd_t);
+VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) {
+ const TypeVect* vt = TypeVect::make(bt, vlen);
+ int vopc = VectorNode::opcode(opc, vlen, bt);
switch (vopc) {
- case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen);
- case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen);
- case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen);
- case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen);
- case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen);
- case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen);
- case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen);
+ case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt);
+ case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vt);
+ case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vt);
+ case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vt);
+ case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vt);
+ case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vt);
+
+ case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vt);
+ case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vt);
+ case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vt);
+ case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vt);
+ case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt);
+ case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt);
- case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen);
- case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen);
- case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen);
- case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen);
- case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen);
- case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen);
- case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen);
+ case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt);
+ case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt);
+
+ case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vt);
+ case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vt);
+
+ case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt);
+ case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt);
+ case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt);
+
+ case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt);
+ case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt);
+ case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt);
- case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen);
- case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen);
+ case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt);
+ case Op_OrV: return new (C, 3) OrVNode (n1, n2, vt);
+ case Op_XorV: return new (C, 3) XorVNode(n1, n2, vt);
+ }
+ ShouldNotReachHere();
+ return NULL;
- case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen);
- case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen);
+}
- case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen);
- case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen);
- case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen);
- case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen);
-
- case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen);
- case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen);
- case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen);
- case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen);
-
- case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type());
- case Op_OrV: return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type());
- case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+// Scalar promotion
+VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
+ BasicType bt = opd_t->array_element_basic_type();
+ const TypeVect* vt = opd_t->singleton() ? TypeVect::make(opd_t, vlen)
+ : TypeVect::make(bt, vlen);
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, 2) ReplicateBNode(s, vt);
+ case T_CHAR:
+ case T_SHORT:
+ return new (C, 2) ReplicateSNode(s, vt);
+ case T_INT:
+ return new (C, 2) ReplicateINode(s, vt);
+ case T_LONG:
+ return new (C, 2) ReplicateLNode(s, vt);
+ case T_FLOAT:
+ return new (C, 2) ReplicateFNode(s, vt);
+ case T_DOUBLE:
+ return new (C, 2) ReplicateDNode(s, vt);
}
ShouldNotReachHere();
return NULL;
}
-// Return the vector version of a scalar load node.
-VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem,
- Node* adr, const TypePtr* atyp, uint vlen) {
- int vopc = opcode(opc, vlen);
-
- switch(vopc) {
- case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp);
- case Op_Load8B: return new (C, 3) Load8BNode(ctl, mem, adr, atyp);
- case Op_Load4B: return new (C, 3) Load4BNode(ctl, mem, adr, atyp);
-
- case Op_Load8C: return new (C, 3) Load8CNode(ctl, mem, adr, atyp);
- case Op_Load4C: return new (C, 3) Load4CNode(ctl, mem, adr, atyp);
- case Op_Load2C: return new (C, 3) Load2CNode(ctl, mem, adr, atyp);
-
- case Op_Load8S: return new (C, 3) Load8SNode(ctl, mem, adr, atyp);
- case Op_Load4S: return new (C, 3) Load4SNode(ctl, mem, adr, atyp);
- case Op_Load2S: return new (C, 3) Load2SNode(ctl, mem, adr, atyp);
-
- case Op_Load4I: return new (C, 3) Load4INode(ctl, mem, adr, atyp);
- case Op_Load2I: return new (C, 3) Load2INode(ctl, mem, adr, atyp);
-
- case Op_Load2L: return new (C, 3) Load2LNode(ctl, mem, adr, atyp);
-
- case Op_Load4F: return new (C, 3) Load4FNode(ctl, mem, adr, atyp);
- case Op_Load2F: return new (C, 3) Load2FNode(ctl, mem, adr, atyp);
-
- case Op_Load2D: return new (C, 3) Load2DNode(ctl, mem, adr, atyp);
+// Return initial Pack node. Additional operands added with add_opd() calls.
+PackNode* PackNode::make(Compile* C, Node* s, uint vlen, BasicType bt) {
+ const TypeVect* vt = TypeVect::make(bt, vlen);
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, vlen+1) PackBNode(s, vt);
+ case T_CHAR:
+ case T_SHORT:
+ return new (C, vlen+1) PackSNode(s, vt);
+ case T_INT:
+ return new (C, vlen+1) PackINode(s, vt);
+ case T_LONG:
+ return new (C, vlen+1) PackLNode(s, vt);
+ case T_FLOAT:
+ return new (C, vlen+1) PackFNode(s, vt);
+ case T_DOUBLE:
+ return new (C, vlen+1) PackDNode(s, vt);
}
ShouldNotReachHere();
return NULL;
}
-// Return the vector version of a scalar store node.
-VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem,
- Node* adr, const TypePtr* atyp, Node* val,
- uint vlen) {
- int vopc = opcode(opc, vlen);
+// Create a binary tree form for Packs. [lo, hi) (half-open) range
+Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
+ int ct = hi - lo;
+ assert(is_power_of_2(ct), "power of 2");
+ if (ct == 2) {
+ PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type());
+ pk->add_opd(1, in(lo+1));
+ return pk;
- switch(vopc) {
- case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val);
- case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val);
- case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val);
+ } else {
+ int mid = lo + ct/2;
+ Node* n1 = binaryTreePack(C, lo, mid);
+ Node* n2 = binaryTreePack(C, mid, hi );
- case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val);
- case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val);
- case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val);
-
- case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val);
- case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val);
-
- case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val);
-
- case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val);
- case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val);
-
- case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val);
+ BasicType bt = vect_type()->element_basic_type();
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, 3) PackSNode(n1, n2, TypeVect::make(T_SHORT, 2));
+ case T_CHAR:
+ case T_SHORT:
+ return new (C, 3) PackINode(n1, n2, TypeVect::make(T_INT, 2));
+ case T_INT:
+ return new (C, 3) PackLNode(n1, n2, TypeVect::make(T_LONG, 2));
+ case T_LONG:
+ return new (C, 3) Pack2LNode(n1, n2, TypeVect::make(T_LONG, 2));
+ case T_FLOAT:
+ return new (C, 3) PackDNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+ case T_DOUBLE:
+ return new (C, 3) Pack2DNode(n1, n2, TypeVect::make(T_DOUBLE, 2));
+ }
+ ShouldNotReachHere();
}
- ShouldNotReachHere();
return NULL;
}
+// Return the vector version of a scalar load node.
+LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) {
+ const TypeVect* vt = TypeVect::make(bt, vlen);
+ return new (C, 3) LoadVectorNode(ctl, mem, adr, atyp, vt);
+ return NULL;
+}
+
+// Return the vector version of a scalar store node.
+StoreVectorNode* StoreVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, Node* val,
+ uint vlen) {
+ return new (C, 4) StoreVectorNode(ctl, mem, adr, atyp, val);
+}
+
// Extract a scalar element of vector.
-Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
- BasicType bt = opd_t->array_element_basic_type();
- assert(position < VectorNode::max_vlen(bt), "pos in range");
+Node* ExtractNode::make(Compile* C, Node* v, uint position, BasicType bt) {
+ assert((int)position < Matcher::max_vector_size(bt), "pos in range");
ConINode* pos = ConINode::make(C, (int)position);
switch (bt) {
case T_BOOLEAN:
+ return new (C, 3) ExtractUBNode(v, pos);
case T_BYTE:
return new (C, 3) ExtractBNode(v, pos);
case T_CHAR:
@@ -478,3 +307,4 @@
ShouldNotReachHere();
return NULL;
}
+
--- a/hotspot/src/share/vm/opto/vectornode.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/opto/vectornode.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,48 +31,32 @@
//------------------------------VectorNode--------------------------------------
// Vector Operation
-class VectorNode : public Node {
- virtual uint size_of() const { return sizeof(*this); }
- protected:
- uint _length; // vector length
- virtual BasicType elt_basic_type() const = 0; // Vector element basic type
+class VectorNode : public TypeNode {
+ public:
- static const Type* vect_type(BasicType elt_bt, uint len);
- static const Type* vect_type(const Type* elt_type, uint len) {
- return vect_type(elt_type->array_element_basic_type(), len);
+ VectorNode(Node* n1, const TypeVect* vt) : TypeNode(vt, 2) {
+ init_class_id(Class_Vector);
+ init_req(1, n1);
+ }
+ VectorNode(Node* n1, Node* n2, const TypeVect* vt) : TypeNode(vt, 3) {
+ init_class_id(Class_Vector);
+ init_req(1, n1);
+ init_req(2, n2);
}
- public:
- friend class VectorLoadNode; // For vect_type
- friend class VectorStoreNode; // ditto.
+ const TypeVect* vect_type() const { return type()->is_vect(); }
+ uint length() const { return vect_type()->length(); } // Vector length
- VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) {
- init_class_id(Class_Vector);
- }
- VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) {
- init_class_id(Class_Vector);
- }
virtual int Opcode() const;
- uint length() const { return _length; } // Vector length
-
- static uint max_vlen(BasicType bt) { // max vector length
- return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes(bt));
- }
-
- // Element and vector type
- const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
- const Type* vect_type() const { return vect_type(elt_basic_type(), length()); }
-
- virtual const Type *bottom_type() const { return vect_type(); }
- virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
-
- // Vector opcode from scalar opcode
- static int opcode(int sopc, uint vlen, const Type* opd_t);
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(vect_type()->length_in_bytes()); }
static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
- static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t);
+ static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt);
+
+ static int opcode(int opc, uint vlen, BasicType bt);
+ static bool implemented(int opc, uint vlen, BasicType bt);
};
@@ -81,981 +65,393 @@
//------------------------------AddVBNode---------------------------------------
// Vector add byte
class AddVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
public:
- AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------AddVCNode---------------------------------------
-// Vector add char
-class AddVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVSNode---------------------------------------
-// Vector add short
+// Vector add char/short
class AddVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVINode---------------------------------------
// Vector add int
class AddVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVLNode---------------------------------------
// Vector add long
class AddVLNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
public:
- AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVFNode---------------------------------------
// Vector add float
class AddVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AddVDNode---------------------------------------
// Vector add double
class AddVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ AddVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVBNode---------------------------------------
// Vector subtract byte
class SubVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
public:
- SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------SubVCNode---------------------------------------
-// Vector subtract char
-class SubVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVSNode---------------------------------------
// Vector subtract short
class SubVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVINode---------------------------------------
// Vector subtract int
class SubVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVLNode---------------------------------------
// Vector subtract long
class SubVLNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
public:
- SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVFNode---------------------------------------
// Vector subtract float
class SubVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------SubVDNode---------------------------------------
// Vector subtract double
class SubVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ SubVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------MulVFNode---------------------------------------
// Vector multiply float
class MulVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ MulVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------MulVDNode---------------------------------------
// Vector multiply double
class MulVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ MulVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------DivVFNode---------------------------------------
// Vector divide float
class DivVFNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ DivVFNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------DivVDNode---------------------------------------
// Vector Divide double
class DivVDNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ DivVDNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------LShiftVBNode---------------------------------------
// Vector lshift byte
class LShiftVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
public:
- LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------LShiftVCNode---------------------------------------
-// Vector lshift chars
-class LShiftVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------LShiftVSNode---------------------------------------
// Vector lshift shorts
class LShiftVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------LShiftVINode---------------------------------------
// Vector lshift ints
class LShiftVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------URShiftVBNode---------------------------------------
// Vector urshift bytes
-class URShiftVBNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+class RShiftVBNode : public VectorNode {
public:
- URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
- virtual int Opcode() const;
-};
-
-//------------------------------URShiftVCNode---------------------------------------
-// Vector urshift char
-class URShiftVCNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------URShiftVSNode---------------------------------------
// Vector urshift shorts
-class URShiftVSNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
+class RShiftVSNode : public VectorNode {
public:
- URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------URShiftVINode---------------------------------------
// Vector urshift ints
-class URShiftVINode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
+class RShiftVINode : public VectorNode {
public:
- URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------AndVNode---------------------------------------
// Vector and
class AndVNode : public VectorNode {
- protected:
- BasicType _bt;
- virtual BasicType elt_basic_type() const { return _bt; }
public:
- AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------OrVNode---------------------------------------
// Vector or
class OrVNode : public VectorNode {
- protected:
- BasicType _bt;
- virtual BasicType elt_basic_type() const { return _bt; }
public:
- OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
//------------------------------XorVNode---------------------------------------
// Vector xor
class XorVNode : public VectorNode {
- protected:
- BasicType _bt;
- virtual BasicType elt_basic_type() const { return _bt; }
public:
- XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {}
virtual int Opcode() const;
};
-//================================= M E M O R Y ==================================
-
+//================================= M E M O R Y ===============================
-//------------------------------VectorLoadNode--------------------------------------
-// Vector Load from memory
-class VectorLoadNode : public LoadNode {
- virtual uint size_of() const { return sizeof(*this); }
-
- protected:
- virtual BasicType elt_basic_type() const = 0; // Vector element basic type
- // For use in constructor
- static const Type* vect_type(const Type* elt_type, uint len) {
- return VectorNode::vect_type(elt_type, len);
+//------------------------------LoadVectorNode---------------------------------
+// Load Vector from memory
+class LoadVectorNode : public LoadNode {
+ public:
+ LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt)
+ : LoadNode(c, mem, adr, at, vt) {
+ init_class_id(Class_LoadVector);
}
- public:
- VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt)
- : LoadNode(c,mem,adr,at,rt) {
- init_class_id(Class_VectorLoad);
- }
+ const TypeVect* vect_type() const { return type()->is_vect(); }
+ uint length() const { return vect_type()->length(); } // Vector length
+
virtual int Opcode() const;
- virtual uint length() const = 0; // Vector length
-
- // Element and vector type
- const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
- const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
-
- virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(memory_size()); }
virtual BasicType memory_type() const { return T_VOID; }
- virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
- // Vector opcode from scalar opcode
- static int opcode(int sopc, uint vlen);
-
- static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem,
- Node* adr, const TypePtr* atyp, uint vlen);
-};
-
-//------------------------------Load16BNode--------------------------------------
-// Vector load of 16 bytes (8bits signed) from memory
-class Load16BNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store16B; }
- virtual uint length() const { return 16; }
-};
-
-//------------------------------Load8BNode--------------------------------------
-// Vector load of 8 bytes (8bits signed) from memory
-class Load8BNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store8B; }
- virtual uint length() const { return 8; }
-};
+ virtual int memory_size() const { return vect_type()->length_in_bytes(); }
-//------------------------------Load4BNode--------------------------------------
-// Vector load of 4 bytes (8bits signed) from memory
-class Load4BNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4B; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load8CNode--------------------------------------
-// Vector load of 8 chars (16bits unsigned) from memory
-class Load8CNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store8C; }
- virtual uint length() const { return 8; }
-};
+ virtual int store_Opcode() const { return Op_StoreVector; }
-//------------------------------Load4CNode--------------------------------------
-// Vector load of 4 chars (16bits unsigned) from memory
-class Load4CNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4C; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2CNode--------------------------------------
-// Vector load of 2 chars (16bits unsigned) from memory
-class Load2CNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2C; }
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Load8SNode--------------------------------------
-// Vector load of 8 shorts (16bits signed) from memory
-class Load8SNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store8C; }
- virtual uint length() const { return 8; }
+ static LoadVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, uint vlen, BasicType bt);
};
-//------------------------------Load4SNode--------------------------------------
-// Vector load of 4 shorts (16bits signed) from memory
-class Load4SNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4C; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2SNode--------------------------------------
-// Vector load of 2 shorts (16bits signed) from memory
-class Load2SNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2C; }
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Load4INode--------------------------------------
-// Vector load of 4 integers (32bits signed) from memory
-class Load4INode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4I; }
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Load2INode--------------------------------------
-// Vector load of 2 integers (32bits signed) from memory
-class Load2INode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
- : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2I; }
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Load2LNode--------------------------------------
-// Vector load of 2 longs (64bits signed) from memory
-class Load2LNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------StoreVectorNode--------------------------------
+// Store Vector to memory
+class StoreVectorNode : public StoreNode {
public:
- Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG)
- : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2L; }
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Load4FNode--------------------------------------
-// Vector load of 4 floats (32bits) from memory
-class Load4FNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
- : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store4F; }
- virtual uint length() const { return 4; }
-};
+ StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : StoreNode(c, mem, adr, at, val) {
+ assert(val->is_Vector() || val->is_LoadVector(), "sanity");
+ init_class_id(Class_StoreVector);
+ }
-//------------------------------Load2FNode--------------------------------------
-// Vector load of 2 floats (32bits) from memory
-class Load2FNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
- : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2F; }
- virtual uint length() const { return 2; }
-};
+ const TypeVect* vect_type() const { return in(MemNode::ValueIn)->bottom_type()->is_vect(); }
+ uint length() const { return vect_type()->length(); } // Vector length
-//------------------------------Load2DNode--------------------------------------
-// Vector load of 2 doubles (64bits) from memory
-class Load2DNode : public VectorLoadNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
- Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE)
- : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
- virtual int Opcode() const;
- virtual int store_Opcode() const { return Op_Store2D; }
- virtual uint length() const { return 2; }
-};
-
-
-//------------------------------VectorStoreNode--------------------------------------
-// Vector Store to memory
-class VectorStoreNode : public StoreNode {
- virtual uint size_of() const { return sizeof(*this); }
-
- protected:
- virtual BasicType elt_basic_type() const = 0; // Vector element basic type
-
- public:
- VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : StoreNode(c,mem,adr,at,val) {
- init_class_id(Class_VectorStore);
- }
virtual int Opcode() const;
- virtual uint length() const = 0; // Vector length
-
- // Element and vector type
- const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
- const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(memory_size()); }
+ virtual BasicType memory_type() const { return T_VOID; }
+ virtual int memory_size() const { return vect_type()->length_in_bytes(); }
- virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
- virtual BasicType memory_type() const { return T_VOID; }
- virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); }
-
- // Vector opcode from scalar opcode
- static int opcode(int sopc, uint vlen);
-
- static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+ static StoreVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
Node* adr, const TypePtr* atyp, Node* val,
uint vlen);
};
-//------------------------------Store16BNode--------------------------------------
-// Vector store of 16 bytes (8bits signed) to memory
-class Store16BNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 16; }
-};
+
+//=========================Promote_Scalar_to_Vector============================
-//------------------------------Store8BNode--------------------------------------
-// Vector store of 8 bytes (8bits signed) to memory
-class Store8BNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateBNode---------------------------------
+// Replicate byte scalar to be vector
+class ReplicateBNode : public VectorNode {
public:
- Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4BNode--------------------------------------
-// Vector store of 4 bytes (8bits signed) to memory
-class Store4BNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
+ ReplicateBNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store8CNode--------------------------------------
-// Vector store of 8 chars (16bits signed/unsigned) to memory
-class Store8CNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 8; }
-};
-
-//------------------------------Store4CNode--------------------------------------
-// Vector store of 4 chars (16bits signed/unsigned) to memory
-class Store4CNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2CNode--------------------------------------
-// Vector store of 2 chars (16bits signed/unsigned) to memory
-class Store2CNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
};
-//------------------------------Store4INode--------------------------------------
-// Vector store of 4 integers (32bits signed) to memory
-class Store4INode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2INode--------------------------------------
-// Vector store of 2 integers (32bits signed) to memory
-class Store2INode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Store2LNode--------------------------------------
-// Vector store of 2 longs (64bits signed) to memory
-class Store2LNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------ReplicateSNode---------------------------------
+// Replicate short scalar to be vector
+class ReplicateSNode : public VectorNode {
public:
- Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Store4FNode--------------------------------------
-// Vector store of 4 floats (32bits) to memory
-class Store4FNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 4; }
-};
-
-//------------------------------Store2FNode--------------------------------------
-// Vector store of 2 floats (32bits) to memory
-class Store2FNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
-};
-
-//------------------------------Store2DNode--------------------------------------
-// Vector store of 2 doubles (64bits) to memory
-class Store2DNode : public VectorStoreNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
- Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
- : VectorStoreNode(c,mem,adr,at,val) {}
- virtual int Opcode() const;
- virtual uint length() const { return 2; }
-};
-
-//=========================Promote_Scalar_to_Vector====================================
-
-//------------------------------Replicate16BNode---------------------------------------
-// Replicate byte scalar to be vector of 16 bytes
-class Replicate16BNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Replicate16BNode(Node* in1) : VectorNode(in1, 16) {}
+ ReplicateSNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate8BNode---------------------------------------
-// Replicate byte scalar to be vector of 8 bytes
-class Replicate8BNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------ReplicateINode---------------------------------
+// Replicate int scalar to be vector
+class ReplicateINode : public VectorNode {
public:
- Replicate8BNode(Node* in1) : VectorNode(in1, 8) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4BNode---------------------------------------
-// Replicate byte scalar to be vector of 4 bytes
-class Replicate4BNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
- public:
- Replicate4BNode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate8CNode---------------------------------------
-// Replicate char scalar to be vector of 8 chars
-class Replicate8CNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Replicate8CNode(Node* in1) : VectorNode(in1, 8) {}
+ ReplicateINode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate4CNode---------------------------------------
-// Replicate char scalar to be vector of 4 chars
-class Replicate4CNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Replicate4CNode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2CNode---------------------------------------
-// Replicate char scalar to be vector of 2 chars
-class Replicate2CNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
+//------------------------------ReplicateLNode---------------------------------
+// Replicate long scalar to be vector
+class ReplicateLNode : public VectorNode {
public:
- Replicate2CNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate8SNode---------------------------------------
-// Replicate short scalar to be vector of 8 shorts
-class Replicate8SNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Replicate8SNode(Node* in1) : VectorNode(in1, 8) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4SNode---------------------------------------
-// Replicate short scalar to be vector of 4 shorts
-class Replicate4SNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
- public:
- Replicate4SNode(Node* in1) : VectorNode(in1, 4) {}
+ ReplicateLNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate2SNode---------------------------------------
-// Replicate short scalar to be vector of 2 shorts
-class Replicate2SNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
+//------------------------------ReplicateFNode---------------------------------
+// Replicate float scalar to be vector
+class ReplicateFNode : public VectorNode {
public:
- Replicate2SNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4INode---------------------------------------
-// Replicate int scalar to be vector of 4 ints
-class Replicate4INode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Replicate4INode(Node* in1) : VectorNode(in1, 4) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2INode---------------------------------------
-// Replicate int scalar to be vector of 2 ints
-class Replicate2INode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
- public:
- Replicate2INode(Node* in1) : VectorNode(in1, 2) {}
+ ReplicateFNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate2LNode---------------------------------------
-// Replicate long scalar to be vector of 2 longs
-class Replicate2LNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
+//------------------------------ReplicateDNode---------------------------------
+// Replicate double scalar to be vector
+class ReplicateDNode : public VectorNode {
public:
- Replicate2LNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate4FNode---------------------------------------
-// Replicate float scalar to be vector of 4 floats
-class Replicate4FNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Replicate4FNode(Node* in1) : VectorNode(in1, 4) {}
+ ReplicateDNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
virtual int Opcode() const;
};
-//------------------------------Replicate2FNode---------------------------------------
-// Replicate float scalar to be vector of 2 floats
-class Replicate2FNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
- public:
- Replicate2FNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//------------------------------Replicate2DNode---------------------------------------
-// Replicate double scalar to be vector of 2 doubles
-class Replicate2DNode : public VectorNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
- public:
- Replicate2DNode(Node* in1) : VectorNode(in1, 2) {}
- virtual int Opcode() const;
-};
-
-//========================Pack_Scalars_into_a_Vector==============================
+//========================Pack_Scalars_into_a_Vector===========================
//------------------------------PackNode---------------------------------------
// Pack parent class (not for code generation).
class PackNode : public VectorNode {
public:
- PackNode(Node* in1) : VectorNode(in1, 1) {}
- PackNode(Node* in1, Node* n2) : VectorNode(in1, n2, 2) {}
+ PackNode(Node* in1, const TypeVect* vt) : VectorNode(in1, vt) {}
+ PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {}
virtual int Opcode() const;
- void add_opd(Node* n) {
- add_req(n);
- _length++;
- assert(_length == req() - 1, "vector length matches edge count");
+ void add_opd(uint i, Node* n) {
+ init_req(i+1, n);
}
// Create a binary tree form for Packs. [lo, hi) (half-open) range
Node* binaryTreePack(Compile* C, int lo, int hi);
- static PackNode* make(Compile* C, Node* s, const Type* elt_t);
+ static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt);
};
//------------------------------PackBNode---------------------------------------
// Pack byte scalars into vector
class PackBNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
public:
- PackBNode(Node* in1) : PackNode(in1) {}
- virtual int Opcode() const;
-};
-
-//------------------------------PackCNode---------------------------------------
-// Pack char scalars into vector
-class PackCNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- PackCNode(Node* in1) : PackNode(in1) {}
+ PackBNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
virtual int Opcode() const;
};
//------------------------------PackSNode---------------------------------------
// Pack short scalars into a vector
class PackSNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_SHORT; }
public:
- PackSNode(Node* in1) : PackNode(in1) {}
+ PackSNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackSNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackINode---------------------------------------
// Pack integer scalars into a vector
class PackINode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_INT; }
public:
- PackINode(Node* in1) : PackNode(in1) {}
- PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackINode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackINode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackLNode---------------------------------------
// Pack long scalars into a vector
class PackLNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_LONG; }
public:
- PackLNode(Node* in1) : PackNode(in1) {}
- PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackLNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackLNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Pack2LNode--------------------------------------
+// Pack 2 long scalars into a vector
+class Pack2LNode : public PackNode {
+ public:
+ Pack2LNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackFNode---------------------------------------
// Pack float scalars into vector
class PackFNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_FLOAT; }
public:
- PackFNode(Node* in1) : PackNode(in1) {}
- PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackFNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackFNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//------------------------------PackDNode---------------------------------------
// Pack double scalars into a vector
class PackDNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_DOUBLE; }
public:
- PackDNode(Node* in1) : PackNode(in1) {}
- PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ PackDNode(Node* in1, const TypeVect* vt) : PackNode(in1, vt) {}
+ PackDNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
};
-// The Pack2xN nodes assist code generation. They are created from
-// Pack4C, etc. nodes in final_graph_reshape in the form of a
-// balanced, binary tree.
-
-//------------------------------Pack2x1BNode-----------------------------------------
-// Pack 2 1-byte integers into vector of 2 bytes
-class Pack2x1BNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_BYTE; }
+//------------------------------Pack2DNode--------------------------------------
+// Pack 2 double scalars into a vector
+class Pack2DNode : public PackNode {
public:
- Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+ Pack2DNode(Node* in1, Node* in2, const TypeVect* vt) : PackNode(in1, in2, vt) {}
virtual int Opcode() const;
- virtual uint ideal_reg() const { return Op_RegI; }
};
-//------------------------------Pack2x2BNode---------------------------------------
-// Pack 2 2-byte integers into vector of 4 bytes
-class Pack2x2BNode : public PackNode {
- protected:
- virtual BasicType elt_basic_type() const { return T_CHAR; }
- public:
- Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
- virtual int Opcode() const;
- virtual uint ideal_reg() const { return Op_RegI; }
-};
//========================Extract_Scalar_from_Vector===============================
@@ -1069,7 +465,7 @@
virtual int Opcode() const;
uint pos() const { return in(2)->get_int(); }
- static Node* make(Compile* C, Node* v, uint position, const Type* opd_t);
+ static Node* make(Compile* C, Node* v, uint position, BasicType bt);
};
//------------------------------ExtractBNode---------------------------------------
@@ -1082,6 +478,16 @@
virtual uint ideal_reg() const { return Op_RegI; }
};
+//------------------------------ExtractUBNode--------------------------------------
+// Extract a boolean from a vector at position "pos"
+class ExtractUBNode : public ExtractNode {
+ public:
+ ExtractUBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
//------------------------------ExtractCNode---------------------------------------
// Extract a char from a vector at position "pos"
class ExtractCNode : public ExtractNode {
--- a/hotspot/src/share/vm/runtime/java.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/runtime/java.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -660,6 +660,7 @@
}
JDK_Version JDK_Version::_current;
+const char* JDK_Version::_runtime_name;
void JDK_Version::initialize() {
jdk_version_info info;
--- a/hotspot/src/share/vm/runtime/java.hpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/runtime/java.hpp Thu Jun 28 10:35:28 2012 -0700
@@ -74,6 +74,7 @@
private:
static JDK_Version _current;
+ static const char* _runtime_name;
// In this class, we promote the minor version of release to be the
// major version for releases >= 5 in anticipation of the JDK doing the
@@ -181,6 +182,13 @@
void to_string(char* buffer, size_t buflen) const;
+ static const char* runtime_name() {
+ return _runtime_name;
+ }
+ static void set_runtime_name(const char* name) {
+ _runtime_name = name;
+ }
+
// Convenience methods for queries on the current major/minor version
static bool is_jdk12x_version() {
return current().compare_major(2) == 0;
--- a/hotspot/src/share/vm/runtime/thread.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/runtime/thread.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -996,6 +996,29 @@
vmSymbols::void_method_signature(), CHECK);
}
+char java_runtime_name[128] = "";
+
+// extract the JRE name from sun.misc.Version.java_runtime_name
+static const char* get_java_runtime_name(TRAPS) {
+ klassOop k = SystemDictionary::find(vmSymbols::sun_misc_Version(),
+ Handle(), Handle(), CHECK_AND_CLEAR_NULL);
+ fieldDescriptor fd;
+ bool found = k != NULL &&
+ instanceKlass::cast(k)->find_local_field(vmSymbols::java_runtime_name_name(),
+ vmSymbols::string_signature(), &fd);
+ if (found) {
+ oop name_oop = k->java_mirror()->obj_field(fd.offset());
+ if (name_oop == NULL)
+ return NULL;
+ const char* name = java_lang_String::as_utf8_string(name_oop,
+ java_runtime_name,
+ sizeof(java_runtime_name));
+ return name;
+ } else {
+ return NULL;
+ }
+}
+
// General purpose hook into Java code, run once when the VM is initialized.
// The Java library method itself may be changed independently from the VM.
static void call_postVMInitHook(TRAPS) {
@@ -3352,6 +3375,9 @@
// The VM creates & returns objects of this class. Make sure it's initialized.
initialize_class(vmSymbols::java_lang_Class(), CHECK_0);
call_initializeSystemClass(CHECK_0);
+
+ // get the Java runtime name after java.lang.System is initialized
+ JDK_Version::set_runtime_name(get_java_runtime_name(THREAD));
} else {
warning("java.lang.System not initialized");
}
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -1944,14 +1944,12 @@
declare_c2_type(ReverseBytesLNode, Node) \
declare_c2_type(VectorNode, Node) \
declare_c2_type(AddVBNode, VectorNode) \
- declare_c2_type(AddVCNode, VectorNode) \
declare_c2_type(AddVSNode, VectorNode) \
declare_c2_type(AddVINode, VectorNode) \
declare_c2_type(AddVLNode, VectorNode) \
declare_c2_type(AddVFNode, VectorNode) \
declare_c2_type(AddVDNode, VectorNode) \
declare_c2_type(SubVBNode, VectorNode) \
- declare_c2_type(SubVCNode, VectorNode) \
declare_c2_type(SubVSNode, VectorNode) \
declare_c2_type(SubVINode, VectorNode) \
declare_c2_type(SubVLNode, VectorNode) \
@@ -1962,73 +1960,33 @@
declare_c2_type(DivVFNode, VectorNode) \
declare_c2_type(DivVDNode, VectorNode) \
declare_c2_type(LShiftVBNode, VectorNode) \
- declare_c2_type(LShiftVCNode, VectorNode) \
declare_c2_type(LShiftVSNode, VectorNode) \
declare_c2_type(LShiftVINode, VectorNode) \
- declare_c2_type(URShiftVBNode, VectorNode) \
- declare_c2_type(URShiftVCNode, VectorNode) \
- declare_c2_type(URShiftVSNode, VectorNode) \
- declare_c2_type(URShiftVINode, VectorNode) \
+ declare_c2_type(RShiftVBNode, VectorNode) \
+ declare_c2_type(RShiftVSNode, VectorNode) \
+ declare_c2_type(RShiftVINode, VectorNode) \
declare_c2_type(AndVNode, VectorNode) \
declare_c2_type(OrVNode, VectorNode) \
declare_c2_type(XorVNode, VectorNode) \
- declare_c2_type(VectorLoadNode, LoadNode) \
- declare_c2_type(Load16BNode, VectorLoadNode) \
- declare_c2_type(Load8BNode, VectorLoadNode) \
- declare_c2_type(Load4BNode, VectorLoadNode) \
- declare_c2_type(Load8CNode, VectorLoadNode) \
- declare_c2_type(Load4CNode, VectorLoadNode) \
- declare_c2_type(Load2CNode, VectorLoadNode) \
- declare_c2_type(Load8SNode, VectorLoadNode) \
- declare_c2_type(Load4SNode, VectorLoadNode) \
- declare_c2_type(Load2SNode, VectorLoadNode) \
- declare_c2_type(Load4INode, VectorLoadNode) \
- declare_c2_type(Load2INode, VectorLoadNode) \
- declare_c2_type(Load2LNode, VectorLoadNode) \
- declare_c2_type(Load4FNode, VectorLoadNode) \
- declare_c2_type(Load2FNode, VectorLoadNode) \
- declare_c2_type(Load2DNode, VectorLoadNode) \
- declare_c2_type(VectorStoreNode, StoreNode) \
- declare_c2_type(Store16BNode, VectorStoreNode) \
- declare_c2_type(Store8BNode, VectorStoreNode) \
- declare_c2_type(Store4BNode, VectorStoreNode) \
- declare_c2_type(Store8CNode, VectorStoreNode) \
- declare_c2_type(Store4CNode, VectorStoreNode) \
- declare_c2_type(Store2CNode, VectorStoreNode) \
- declare_c2_type(Store4INode, VectorStoreNode) \
- declare_c2_type(Store2INode, VectorStoreNode) \
- declare_c2_type(Store2LNode, VectorStoreNode) \
- declare_c2_type(Store4FNode, VectorStoreNode) \
- declare_c2_type(Store2FNode, VectorStoreNode) \
- declare_c2_type(Store2DNode, VectorStoreNode) \
- declare_c2_type(Replicate16BNode, VectorNode) \
- declare_c2_type(Replicate8BNode, VectorNode) \
- declare_c2_type(Replicate4BNode, VectorNode) \
- declare_c2_type(Replicate8CNode, VectorNode) \
- declare_c2_type(Replicate4CNode, VectorNode) \
- declare_c2_type(Replicate2CNode, VectorNode) \
- declare_c2_type(Replicate8SNode, VectorNode) \
- declare_c2_type(Replicate4SNode, VectorNode) \
- declare_c2_type(Replicate2SNode, VectorNode) \
- declare_c2_type(Replicate4INode, VectorNode) \
- declare_c2_type(Replicate2INode, VectorNode) \
- declare_c2_type(Replicate2LNode, VectorNode) \
- declare_c2_type(Replicate4FNode, VectorNode) \
- declare_c2_type(Replicate2FNode, VectorNode) \
- declare_c2_type(Replicate2DNode, VectorNode) \
+ declare_c2_type(LoadVectorNode, LoadNode) \
+ declare_c2_type(StoreVectorNode, StoreNode) \
+ declare_c2_type(ReplicateBNode, VectorNode) \
+ declare_c2_type(ReplicateSNode, VectorNode) \
+ declare_c2_type(ReplicateINode, VectorNode) \
+ declare_c2_type(ReplicateLNode, VectorNode) \
+ declare_c2_type(ReplicateFNode, VectorNode) \
+ declare_c2_type(ReplicateDNode, VectorNode) \
declare_c2_type(PackNode, VectorNode) \
declare_c2_type(PackBNode, PackNode) \
- declare_c2_type(PackCNode, PackNode) \
declare_c2_type(PackSNode, PackNode) \
declare_c2_type(PackINode, PackNode) \
declare_c2_type(PackLNode, PackNode) \
declare_c2_type(PackFNode, PackNode) \
declare_c2_type(PackDNode, PackNode) \
- declare_c2_type(Pack2x1BNode, PackNode) \
- declare_c2_type(Pack2x2BNode, PackNode) \
+ declare_c2_type(Pack2LNode, PackNode) \
+ declare_c2_type(Pack2DNode, PackNode) \
declare_c2_type(ExtractNode, Node) \
declare_c2_type(ExtractBNode, ExtractNode) \
- declare_c2_type(ExtractCNode, ExtractNode) \
declare_c2_type(ExtractSNode, ExtractNode) \
declare_c2_type(ExtractINode, ExtractNode) \
declare_c2_type(ExtractLNode, ExtractNode) \
--- a/hotspot/src/share/vm/utilities/vmError.cpp Thu Jun 28 04:21:07 2012 -0400
+++ b/hotspot/src/share/vm/utilities/vmError.cpp Thu Jun 28 10:35:28 2012 -0700
@@ -450,7 +450,9 @@
// VM version
st->print_cr("#");
JDK_Version::current().to_string(buf, sizeof(buf));
- st->print_cr("# JRE version: %s", buf);
+ const char* runtime_name = JDK_Version::runtime_name() != NULL ?
+ JDK_Version::runtime_name() : "";
+ st->print_cr("# JRE version: %s (%s)", runtime_name, buf);
st->print_cr("# Java VM: %s (%s %s %s %s)",
Abstract_VM_Version::vm_name(),
Abstract_VM_Version::vm_release(),
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestBooleanVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,952 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestBooleanVect
+ */
+
+public class TestBooleanVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Boolean vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ boolean[] a1 = new boolean[ARRLEN];
+ boolean[] a2 = new boolean[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, true);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, true, true);
+ test_ci_neg(a1);
+ test_vi_neg(a2, true);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, true, true);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, true);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, true, true);
+ test_ci_off(a1);
+ test_vi_off(a2, true);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, true, true);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, true, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, true, true, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, true);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, true, true);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, true, true);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, true, true);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], false);
+ }
+ test_vi(a2, true);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], true);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], true);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], false);
+ errn += verify("test_2ci: a2", i, a2[i], false);
+ }
+ test_2vi(a1, a2, true, true);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], true);
+ errn += verify("test_2vi: a2", i, a2[i], true);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], false);
+ }
+ test_vi_neg(a2, true);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], true);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], true);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], false);
+ errn += verify("test_2ci_neg: a2", i, a2[i], false);
+ }
+ test_2vi_neg(a1, a2, true, true);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], true);
+ errn += verify("test_2vi_neg: a2", i, a2[i], true);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], false);
+ }
+ test_vi_oppos(a2, true);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], true);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], true);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], false);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], false);
+ }
+ test_2vi_oppos(a1, a2, true, true);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], true);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], true);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], false);
+ }
+ test_vi_off(a2, true);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], true);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], true);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], false);
+ errn += verify("test_2ci_off: a2", i, a2[i], false);
+ }
+ test_2vi_off(a1, a2, true, true);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], true);
+ errn += verify("test_2vi_off: a2", i, a2[i], true);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], false);
+ errn += verify("test_2vi_off: a2", i, a2[i], false);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], false);
+ }
+ test_vi_inv(a2, true, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], true);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], true);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], false);
+ errn += verify("test_2ci_inv: a2", i, a2[i], false);
+ }
+ test_2vi_inv(a1, a2, true, true, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], true);
+ errn += verify("test_2vi_inv: a2", i, a2[i], true);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], false);
+ errn += verify("test_2vi_inv: a2", i, a2[i], false);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = true;
+ a2[i] = false;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ boolean val = (i%SCALE != 0);
+ errn += verify("test_ci_scl: a1", i, a1[i], val);
+ }
+ test_vi_scl(a2, true);
+ for (int i=0; i<ARRLEN; i++) {
+ boolean val = (i%SCALE == 0);
+ errn += verify("test_vi_scl: a2", i, a2[i], val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_scl: a1", i, a1[i], true);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], true);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], false);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], false);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], false);
+ }
+ }
+ test_2vi_scl(a1, a2, false, true);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], true);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], false);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], false);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], true);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_vi(a2, true);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], false);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], true);
+ }
+ test_vi(a2, false);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], false);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], true);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], false);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], false);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], false);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], false);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_2vi_aln(a1, a2, true, true);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], true);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], false);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], false);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], true);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_vi(a2, true);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], false);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], true);
+ }
+ test_vi(a2, false);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], false);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], true);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], false);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], false);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], false);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], false);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ a2[i] = false;
+ }
+ test_2vi_unaln(a1, a2, true, true);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], true);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], false);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], false);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], true);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (i > 0);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = false;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ boolean v = (i%ALIGN_OFF > 0);
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = false;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], false);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ boolean v = (i%ALIGN_OFF > 0);
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], false);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], false);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ }
+ test_2vi_aln(a1, a1, true, true);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], true);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], true);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (i > 0);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = false;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ boolean v = (i%UNALIGN_OFF > 0);
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = false;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], false);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ boolean v = (i%UNALIGN_OFF > 0);
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], false);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], false);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = false;
+ }
+ test_2vi_unaln(a1, a1, true, true);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], true);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], true);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, true, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, true, true, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, true, true);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(boolean[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = false;
+ }
+ }
+ static void test_vi(boolean[] a, boolean b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = false;
+ b[i] = false;
+ }
+ }
+ static void test_2vi(boolean[] a, boolean[] b, boolean c, boolean d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(boolean[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = false;
+ }
+ }
+ static void test_vi_neg(boolean[] a, boolean b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(boolean[] a, boolean[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(boolean[] a, boolean[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = false;
+ b[i] = false;
+ }
+ }
+ static void test_2vi_neg(boolean[] a, boolean[] b, boolean c, boolean d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(boolean[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = false;
+ }
+ }
+ static void test_vi_oppos(boolean[] a, boolean b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(boolean[] a, boolean[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(boolean[] a, boolean[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = false;
+ b[i] = false;
+ }
+ }
+ static void test_2vi_oppos(boolean[] a, boolean[] b, boolean c, boolean d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(boolean[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = false;
+ }
+ }
+ static void test_vi_off(boolean[] a, boolean b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = false;
+ b[i+OFFSET] = false;
+ }
+ }
+ static void test_2vi_off(boolean[] a, boolean[] b, boolean c, boolean d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(boolean[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = false;
+ }
+ }
+ static void test_vi_inv(boolean[] a, boolean b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(boolean[] a, boolean[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(boolean[] a, boolean[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = false;
+ b[i+k] = false;
+ }
+ }
+ static void test_2vi_inv(boolean[] a, boolean[] b, boolean c, boolean d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(boolean[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = false;
+ }
+ }
+ static void test_vi_scl(boolean[] a, boolean b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(boolean[] a, boolean[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(boolean[] a, boolean[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = false;
+ b[i*SCALE] = false;
+ }
+ }
+ static void test_2vi_scl(boolean[] a, boolean[] b, boolean c, boolean d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = false;
+ b[i] = false;
+ }
+ }
+ static void test_2vi_aln(boolean[] a, boolean[] b, boolean c, boolean d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(boolean[] a, boolean[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = false;
+ b[i] = false;
+ }
+ }
+ static void test_2vi_unaln(boolean[] a, boolean[] b, boolean c, boolean d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, boolean elem, boolean val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestByteDoubleVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteDoubleVect
+ */
+
+public class TestByteDoubleVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Byte + Double vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ byte[] a1 = new byte[ARRLEN];
+ byte[] a2 = new byte[ARRLEN];
+ double[] b1 = new double[ARRLEN];
+ double[] b2 = new double[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (byte)123, 103.);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (byte)123, 103.);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (byte)123, 103.);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (byte)123, 103.);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (byte)123, 103.);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.);
+ }
+ test_vi(a2, b2, (byte)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+ }
+ test_vi_neg(a2, b2, (byte)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+ }
+ test_vi_oppos(a2, b2, (byte)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_vi_aln(a1, b1, (byte)123, 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (byte)123, 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (double)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (double)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (byte)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (byte)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (byte)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (byte)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (byte)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(byte[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi(byte[] a, double[] b, byte c, double d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(byte[] a, byte[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(byte[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_neg(byte[] a, double[] b, byte c, double d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(byte[] a, byte[] b, double[] c, double[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(byte[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_oppos(byte[] a, double[] b, byte c, double d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(byte[] a, byte[] b, double[] c, double[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(byte[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_aln(byte[] a, double[] b, byte c, double d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(byte[] a, byte[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(byte[] a, byte[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(byte[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_unaln(byte[] a, double[] b, byte c, double d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(byte[] a, byte[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(byte[] a, byte[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, byte elem, byte val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, double elem, double val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestByteFloatVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteFloatVect
+ */
+
+public class TestByteFloatVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Byte + Float vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ byte[] a1 = new byte[ARRLEN];
+ byte[] a2 = new byte[ARRLEN];
+ float[] b1 = new float[ARRLEN];
+ float[] b2 = new float[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (byte)123, 103.f);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (byte)123, 103.f);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (byte)123, 103.f);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (byte)123, 103.f);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (byte)123, 103.f);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.f);
+ }
+ test_vi(a2, b2, (byte)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.f);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+ }
+ test_vi_neg(a2, b2, (byte)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+ }
+ test_vi_oppos(a2, b2, (byte)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_vi_aln(a1, b1, (byte)123, 103.f);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (byte)123, 103.f);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (float)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.f;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (float)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.f;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (byte)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (byte)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (byte)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (byte)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (byte)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(byte[] a, float[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi(byte[] a, float[] b, byte c, float d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(byte[] a, byte[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(byte[] a, float[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_neg(byte[] a, float[] b, byte c, float d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(byte[] a, byte[] b, float[] c, float[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(byte[] a, float[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_oppos(byte[] a, float[] b, byte c, float d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(byte[] a, byte[] b, float[] c, float[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(byte[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_aln(byte[] a, float[] b, byte c, float d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(byte[] a, byte[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(byte[] a, byte[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(byte[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_unaln(byte[] a, float[] b, byte c, float d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(byte[] a, byte[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(byte[] a, byte[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, byte elem, byte val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, float elem, float val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestByteIntVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteIntVect
+ */
+
+public class TestByteIntVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Byte + Integer vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ byte[] a1 = new byte[ARRLEN];
+ byte[] a2 = new byte[ARRLEN];
+ int[] b1 = new int[ARRLEN];
+ int[] b2 = new int[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (byte)123, (int)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (byte)123, (int)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (byte)123, (int)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (byte)123, (int)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (byte)123, (int)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci: b1", i, b1[i], (int)-103);
+ }
+ test_vi(a2, b2, (byte)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi: b2", i, b2[i], (int)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (int)-103);
+ }
+ test_vi_neg(a2, b2, (byte)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (int)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (int)-103);
+ }
+ test_vi_oppos(a2, b2, (byte)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (int)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (int)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (int)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_aln(a1, b1, (byte)123, (int)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (int)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (int)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (byte)123, (int)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (int)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (int)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (int)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (int)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (byte)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (byte)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (byte)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (byte)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (byte)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(byte[] a, int[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi(byte[] a, int[] b, byte c, int d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(byte[] a, byte[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(byte[] a, int[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_neg(byte[] a, int[] b, byte c, int d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(byte[] a, byte[] b, int[] c, int[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(byte[] a, int[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_oppos(byte[] a, int[] b, byte c, int d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(byte[] a, byte[] b, int[] c, int[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(byte[] a, int[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_aln(byte[] a, int[] b, byte c, int d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(byte[] a, byte[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(byte[] a, byte[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(byte[] a, int[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_unaln(byte[] a, int[] b, byte c, int d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(byte[] a, byte[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(byte[] a, byte[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, byte elem, byte val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestByteLongVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteLongVect
+ */
+
+public class TestByteLongVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Byte + Long vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ byte[] a1 = new byte[ARRLEN];
+ byte[] a2 = new byte[ARRLEN];
+ long[] b1 = new long[ARRLEN];
+ long[] b2 = new long[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (byte)123, (long)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (byte)123, (long)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (byte)123, (long)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (byte)123, (long)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (byte)123, (long)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci: b1", i, b1[i], (long)-103);
+ }
+ test_vi(a2, b2, (byte)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi: b2", i, b2[i], (long)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (long)-103);
+ }
+ test_vi_neg(a2, b2, (byte)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (long)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (long)-103);
+ }
+ test_vi_oppos(a2, b2, (byte)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (long)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (long)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (long)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_aln(a1, b1, (byte)123, (long)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (long)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (byte)123, (long)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (long)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (long)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (long)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (long)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (byte)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (byte)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (byte)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (byte)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (byte)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(byte[] a, long[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi(byte[] a, long[] b, byte c, long d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(byte[] a, byte[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(byte[] a, long[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_neg(byte[] a, long[] b, byte c, long d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(byte[] a, byte[] b, long[] c, long[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(byte[] a, long[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_oppos(byte[] a, long[] b, byte c, long d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(byte[] a, byte[] b, long[] c, long[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(byte[] a, long[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_aln(byte[] a, long[] b, byte c, long d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(byte[] a, byte[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(byte[] a, byte[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(byte[] a, long[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_unaln(byte[] a, long[] b, byte c, long d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(byte[] a, byte[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(byte[] a, byte[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, byte elem, byte val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestByteShortVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteShortVect
+ */
+
+public class TestByteShortVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Byte + Short vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ byte[] a1 = new byte[ARRLEN];
+ byte[] a2 = new byte[ARRLEN];
+ short[] b1 = new short[ARRLEN];
+ short[] b2 = new short[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (byte)123, (short)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (byte)123, (short)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (byte)123, (short)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (byte)123, (short)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (byte)123, (short)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci: b1", i, b1[i], (short)-103);
+ }
+ test_vi(a2, b2, (byte)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi: b2", i, b2[i], (short)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (short)-103);
+ }
+ test_vi_neg(a2, b2, (byte)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (short)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (short)-103);
+ }
+ test_vi_oppos(a2, b2, (byte)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (short)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (short)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (short)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_aln(a1, b1, (byte)123, (short)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (short)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (short)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (byte)123, (short)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (short)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (short)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ b1[i] = (short)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (short)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (byte)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (byte)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (byte)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (byte)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (byte)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(byte[] a, short[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi(byte[] a, short[] b, byte c, short d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(byte[] a, byte[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(byte[] a, short[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_neg(byte[] a, short[] b, byte c, short d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(byte[] a, byte[] b, short[] c, short[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(byte[] a, short[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_oppos(byte[] a, short[] b, byte c, short d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(byte[] a, byte[] b, short[] c, short[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(byte[] a, short[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_aln(byte[] a, short[] b, byte c, short d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(byte[] a, byte[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(byte[] a, byte[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(byte[] a, short[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_unaln(byte[] a, short[] b, byte c, short d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(byte[] a, byte[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(byte[] a, byte[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, byte elem, byte val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestByteVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestByteVect
+ */
+
+public class TestByteVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Byte vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ byte[] a1 = new byte[ARRLEN];
+ byte[] a2 = new byte[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, (byte)123);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, (byte)123, (byte)103);
+ test_ci_neg(a1);
+ test_vi_neg(a2, (byte)123);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, (byte)123, (byte)103);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, (byte)123);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, (byte)123, (byte)103);
+ test_ci_off(a1);
+ test_vi_off(a2, (byte)123);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, (byte)123, (byte)103);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, (byte)123, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, (byte)123, (byte)103, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, (byte)123);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, (byte)123, (byte)103);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, (byte)123, (byte)103);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, (byte)123, (byte)103);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (byte)-123);
+ }
+ test_vi(a2, (byte)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (byte)123);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (byte)123);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], (byte)-123);
+ errn += verify("test_2ci: a2", i, a2[i], (byte)-103);
+ }
+ test_2vi(a1, a2, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], (byte)123);
+ errn += verify("test_2vi: a2", i, a2[i], (byte)103);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (byte)-123);
+ }
+ test_vi_neg(a2, (byte)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (byte)123);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (byte)123);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], (byte)-123);
+ errn += verify("test_2ci_neg: a2", i, a2[i], (byte)-103);
+ }
+ test_2vi_neg(a1, a2, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], (byte)123);
+ errn += verify("test_2vi_neg: a2", i, a2[i], (byte)103);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (byte)-123);
+ }
+ test_vi_oppos(a2, (byte)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (byte)123);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (byte)123);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], (byte)-123);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], (byte)-103);
+ }
+ test_2vi_oppos(a1, a2, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], (byte)123);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], (byte)103);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], (byte)-123);
+ }
+ test_vi_off(a2, (byte)123);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], (byte)123);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], (byte)123);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], (byte)-123);
+ errn += verify("test_2ci_off: a2", i, a2[i], (byte)-103);
+ }
+ test_2vi_off(a1, a2, (byte)123, (byte)103);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (byte)123);
+ errn += verify("test_2vi_off: a2", i, a2[i], (byte)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (byte)-1);
+ errn += verify("test_2vi_off: a2", i, a2[i], (byte)-1);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], (byte)-123);
+ }
+ test_vi_inv(a2, (byte)123, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], (byte)123);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], (byte)123);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], (byte)-123);
+ errn += verify("test_2ci_inv: a2", i, a2[i], (byte)-103);
+ }
+ test_2vi_inv(a1, a2, (byte)123, (byte)103, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (byte)123);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (byte)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (byte)-1);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (byte)-1);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (byte)val);
+ }
+ test_vi_scl(a2, (byte)123);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (byte)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (byte)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], (byte)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (byte)-123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], (byte)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (byte)-103);
+ }
+ }
+ test_2vi_scl(a1, a2, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], (byte)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (byte)123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], (byte)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (byte)103);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (byte)123);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (byte)123);
+ }
+ test_vi(a2, (byte)-123);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (byte)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (byte)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (byte)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_aln(a1, a2, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (byte)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (byte)123);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (byte)123);
+ }
+ test_vi(a2, (byte)-123);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (byte)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (byte)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (byte)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_unaln(a1, a2, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (byte)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (byte)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (byte)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (byte)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_aln(a1, a1, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (byte)103);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (byte)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (byte)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (byte)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (byte)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (byte)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_unaln(a1, a1, (byte)123, (byte)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (byte)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (byte)103);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, (byte)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, (byte)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, (byte)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, (byte)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, (byte)123, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, (byte)123, (byte)103, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, (byte)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, (byte)123, (byte)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(byte[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi(byte[] a, byte b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi(byte[] a, byte[] b, byte c, byte d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(byte[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi_neg(byte[] a, byte b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(byte[] a, byte[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(byte[] a, byte[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_neg(byte[] a, byte[] b, byte c, byte d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(byte[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ }
+ }
+ static void test_vi_oppos(byte[] a, byte b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(byte[] a, byte[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(byte[] a, byte[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_oppos(byte[] a, byte[] b, byte c, byte d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(byte[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ }
+ }
+ static void test_vi_off(byte[] a, byte b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ b[i+OFFSET] = -103;
+ }
+ }
+ static void test_2vi_off(byte[] a, byte[] b, byte c, byte d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(byte[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ }
+ }
+ static void test_vi_inv(byte[] a, byte b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(byte[] a, byte[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(byte[] a, byte[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ b[i+k] = -103;
+ }
+ }
+ static void test_2vi_inv(byte[] a, byte[] b, byte c, byte d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(byte[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ }
+ }
+ static void test_vi_scl(byte[] a, byte b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(byte[] a, byte[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(byte[] a, byte[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ b[i*SCALE] = -103;
+ }
+ }
+ static void test_2vi_scl(byte[] a, byte[] b, byte c, byte d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_aln(byte[] a, byte[] b, byte c, byte d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(byte[] a, byte[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_unaln(byte[] a, byte[] b, byte c, byte d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, byte elem, byte val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestCharShortVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestCharShortVect
+ */
+
+public class TestCharShortVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Char + Short vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ char[] a1 = new char[ARRLEN];
+ char[] a2 = new char[ARRLEN];
+ short[] b1 = new short[ARRLEN];
+ short[] b2 = new short[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (char)123, (short)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (char)123, (short)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (char)123, (short)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (char)123, (short)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (char)123, (short)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ b1[i] = (short)-1;
+ b2[i] = (short)-1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (char)-123);
+ errn += verify("test_ci: b1", i, b1[i], (short)-103);
+ }
+ test_vi(a2, b2, (char)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (char)123);
+ errn += verify("test_vi: b2", i, b2[i], (short)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (char)123);
+ errn += verify("test_cp: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ b1[i] = (short)-1;
+ b2[i] = (short)-1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (char)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (short)-103);
+ }
+ test_vi_neg(a2, b2, (char)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (char)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (short)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (char)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ b1[i] = (short)-1;
+ b2[i] = (short)-1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (char)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (short)-103);
+ }
+ test_vi_oppos(a2, b2, (char)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (char)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (short)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (char)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)123;
+ b1[i] = (short)-1;
+ b2[i] = (short)123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (char)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (char)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = (char)-123;
+ b2[i] = (short)-123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (short)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ b1[i] = (short)-1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (char)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (short)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (short)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ b1[i] = (short)-1;
+ }
+ test_vi_aln(a1, b1, (char)123, (short)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (char)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)123;
+ b1[i] = (short)-1;
+ b2[i] = (short)123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (char)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (char)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = (char)-123;
+ b2[i] = (short)-123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ b1[i] = (short)-1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (char)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (short)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (short)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ b1[i] = (short)-1;
+ }
+ test_vi_unaln(a1, b1, (char)123, (short)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (char)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (short)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (char)i;
+ b1[i] = (short)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ b1[i] = (short)-1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (char)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (short)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = (char)-1;
+ b1[i+ALIGN_OFF] = (short)-1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (short)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (char)i;
+ b1[i] = (short)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ b1[i] = (short)-1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (char)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (short)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = (char)-1;
+ b1[i+UNALIGN_OFF] = (short)-1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (short)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (char)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (char)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (char)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (char)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (char)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(char[] a, short[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = (char)-123;
+ b[i] = (short)-103;
+ }
+ }
+ static void test_vi(char[] a, short[] b, char c, short d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(char[] a, char[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(char[] a, short[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = (char)-123;
+ b[i] = (short)-103;
+ }
+ }
+ static void test_vi_neg(char[] a, short[] b, char c, short d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(char[] a, char[] b, short[] c, short[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(char[] a, short[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = (char)-123;
+ b[i] = (short)-103;
+ }
+ }
+ static void test_vi_oppos(char[] a, short[] b, char c, short d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(char[] a, char[] b, short[] c, short[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(char[] a, short[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = (char)-123;
+ b[i] = (short)-103;
+ }
+ }
+ static void test_vi_aln(char[] a, short[] b, char c, short d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(char[] a, char[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(char[] a, char[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(char[] a, short[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = (char)-123;
+ b[i] = (short)-103;
+ }
+ }
+ static void test_vi_unaln(char[] a, short[] b, char c, short d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(char[] a, char[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(char[] a, char[] b, short[] c, short[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, char elem, char val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestCharVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestCharVect
+ */
+
+public class TestCharVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Char vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ char[] a1 = new char[ARRLEN];
+ char[] a2 = new char[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, (char)123);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, (char)123, (char)103);
+ test_ci_neg(a1);
+ test_vi_neg(a2, (char)123);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, (char)123, (char)103);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, (char)123);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, (char)123, (char)103);
+ test_ci_off(a1);
+ test_vi_off(a2, (char)123);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, (char)123, (char)103);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, (char)123, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, (char)123, (char)103, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, (char)123);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, (char)123, (char)103);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, (char)123, (char)103);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, (char)123, (char)103);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (char)-123);
+ }
+ test_vi(a2, (char)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (char)123);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (char)123);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], (char)-123);
+ errn += verify("test_2ci: a2", i, a2[i], (char)-103);
+ }
+ test_2vi(a1, a2, (char)123, (char)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], (char)123);
+ errn += verify("test_2vi: a2", i, a2[i], (char)103);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (char)-123);
+ }
+ test_vi_neg(a2, (char)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (char)123);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (char)123);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], (char)-123);
+ errn += verify("test_2ci_neg: a2", i, a2[i], (char)-103);
+ }
+ test_2vi_neg(a1, a2, (char)123, (char)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], (char)123);
+ errn += verify("test_2vi_neg: a2", i, a2[i], (char)103);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (char)-123);
+ }
+ test_vi_oppos(a2, (char)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (char)123);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (char)123);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], (char)-123);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], (char)-103);
+ }
+ test_2vi_oppos(a1, a2, (char)123, (char)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], (char)123);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], (char)103);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], (char)-123);
+ }
+ test_vi_off(a2, (char)123);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], (char)123);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], (char)123);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], (char)-123);
+ errn += verify("test_2ci_off: a2", i, a2[i], (char)-103);
+ }
+ test_2vi_off(a1, a2, (char)123, (char)103);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (char)123);
+ errn += verify("test_2vi_off: a2", i, a2[i], (char)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (char)-1);
+ errn += verify("test_2vi_off: a2", i, a2[i], (char)-1);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], (char)-123);
+ }
+ test_vi_inv(a2, (char)123, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], (char)123);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], (char)123);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], (char)-123);
+ errn += verify("test_2ci_inv: a2", i, a2[i], (char)-103);
+ }
+ test_2vi_inv(a1, a2, (char)123, (char)103, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (char)123);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (char)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (char)-1);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (char)-1);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (char)val);
+ }
+ test_vi_scl(a2, (char)123);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (char)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (char)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], (char)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (char)-123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], (char)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (char)-103);
+ }
+ }
+ test_2vi_scl(a1, a2, (char)123, (char)103);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], (char)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (char)123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], (char)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (char)103);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_vi(a2, (char)123);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (char)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (char)123);
+ }
+ test_vi(a2, (char)-123);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (char)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (char)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (char)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (char)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_2vi_aln(a1, a2, (char)123, (char)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (char)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (char)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (char)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_vi(a2, (char)123);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (char)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (char)123);
+ }
+ test_vi(a2, (char)-123);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (char)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (char)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (char)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (char)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ a2[i] = (char)-1;
+ }
+ test_2vi_unaln(a1, a2, (char)123, (char)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (char)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (char)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (char)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (char)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (char)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (char)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = (char)-1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (char)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (char)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (char)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ }
+ test_2vi_aln(a1, a1, (char)123, (char)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (char)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (char)103);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (char)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (char)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = (char)-1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (char)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (char)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (char)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = (char)-1;
+ }
+ test_2vi_unaln(a1, a1, (char)123, (char)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (char)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (char)103);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, (char)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, (char)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, (char)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, (char)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, (char)123, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, (char)123, (char)103, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, (char)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, (char)123, (char)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(char[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = (char)-123;
+ }
+ }
+ static void test_vi(char[] a, char b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(char[] a, char[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(char[] a, char[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = (char)-123;
+ b[i] = (char)-103;
+ }
+ }
+ static void test_2vi(char[] a, char[] b, char c, char d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(char[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = (char)-123;
+ }
+ }
+ static void test_vi_neg(char[] a, char b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(char[] a, char[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(char[] a, char[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = (char)-123;
+ b[i] = (char)-103;
+ }
+ }
+ static void test_2vi_neg(char[] a, char[] b, char c, char d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(char[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = (char)-123;
+ }
+ }
+ static void test_vi_oppos(char[] a, char b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(char[] a, char[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(char[] a, char[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = (char)-123;
+ b[i] = (char)-103;
+ }
+ }
+ static void test_2vi_oppos(char[] a, char[] b, char c, char d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(char[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = (char)-123;
+ }
+ }
+ static void test_vi_off(char[] a, char b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(char[] a, char[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(char[] a, char[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = (char)-123;
+ b[i+OFFSET] = (char)-103;
+ }
+ }
+ static void test_2vi_off(char[] a, char[] b, char c, char d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(char[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = (char)-123;
+ }
+ }
+ static void test_vi_inv(char[] a, char b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(char[] a, char[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(char[] a, char[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = (char)-123;
+ b[i+k] = (char)-103;
+ }
+ }
+ static void test_2vi_inv(char[] a, char[] b, char c, char d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(char[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = (char)-123;
+ }
+ }
+ static void test_vi_scl(char[] a, char b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(char[] a, char[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(char[] a, char[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = (char)-123;
+ b[i*SCALE] = (char)-103;
+ }
+ }
+ static void test_2vi_scl(char[] a, char[] b, char c, char d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(char[] a, char[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(char[] a, char[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(char[] a, char[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = (char)-123;
+ b[i] = (char)-103;
+ }
+ }
+ static void test_2vi_aln(char[] a, char[] b, char c, char d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(char[] a, char[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(char[] a, char[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(char[] a, char[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = (char)-123;
+ b[i] = (char)-103;
+ }
+ }
+ static void test_2vi_unaln(char[] a, char[] b, char c, char d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, char elem, char val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestDoubleVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestDoubleVect
+ */
+
+public class TestDoubleVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Double vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ double[] a1 = new double[ARRLEN];
+ double[] a2 = new double[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, 123.);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, 123., 103.);
+ test_ci_neg(a1);
+ test_vi_neg(a2, 123.);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, 123., 103.);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, 123.);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, 123., 103.);
+ test_ci_off(a1);
+ test_vi_off(a2, 123.);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, 123., 103.);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, 123., OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, 123., 103., OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, 123.);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, 123., 103.);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, 123., 103.);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, 123., 103.);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], -123.);
+ }
+ test_vi(a2, 123.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], 123.);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], 123.);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], -123.);
+ errn += verify("test_2ci: a2", i, a2[i], -103.);
+ }
+ test_2vi(a1, a2, 123., 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], 123.);
+ errn += verify("test_2vi: a2", i, a2[i], 103.);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], -123.);
+ }
+ test_vi_neg(a2, 123.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], 123.);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], 123.);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], -123.);
+ errn += verify("test_2ci_neg: a2", i, a2[i], -103.);
+ }
+ test_2vi_neg(a1, a2, 123., 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], 123.);
+ errn += verify("test_2vi_neg: a2", i, a2[i], 103.);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], -123.);
+ }
+ test_vi_oppos(a2, 123.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], 123.);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], 123.);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], -123.);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], -103.);
+ }
+ test_2vi_oppos(a1, a2, 123., 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], 123.);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], 103.);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], -123.);
+ }
+ test_vi_off(a2, 123.);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], 123.);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], 123.);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], -123.);
+ errn += verify("test_2ci_off: a2", i, a2[i], -103.);
+ }
+ test_2vi_off(a1, a2, 123., 103.);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], 123.);
+ errn += verify("test_2vi_off: a2", i, a2[i], 103.);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], -1.);
+ errn += verify("test_2vi_off: a2", i, a2[i], -1.);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], -123.);
+ }
+ test_vi_inv(a2, 123., OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], 123.);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], 123.);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], -123.);
+ errn += verify("test_2ci_inv: a2", i, a2[i], -103.);
+ }
+ test_2vi_inv(a1, a2, 123., 103., OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], 123.);
+ errn += verify("test_2vi_inv: a2", i, a2[i], 103.);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], -1.);
+ errn += verify("test_2vi_inv: a2", i, a2[i], -1.);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (double)val);
+ }
+ test_vi_scl(a2, 123.);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (double)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (double)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], -1.);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], -123.);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], -1.);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], -103.);
+ }
+ }
+ test_2vi_scl(a1, a2, 123., 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], -1.);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], 123.);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], -1.);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], 103.);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, 123.);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], 123.);
+ }
+ test_vi(a2, -123.);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], -123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], -123.);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_aln(a1, a2, 123., 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], 123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], -1.);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, 123.);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], 123.);
+ }
+ test_vi(a2, -123.);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], -123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], -123.);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_unaln(a1, a2, 123., 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], 123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], -1.);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], 103.);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (double)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (double)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (double)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_aln(a1, a1, 123., 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 103.);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (double)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (double)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (double)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_unaln(a1, a1, 123., 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 103.);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, 123.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, 123.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, 123.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, 123.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, 123., OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, 123., 103., OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, 123.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, 123., 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(double[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123.;
+ }
+ }
+ static void test_vi(double[] a, double b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(double[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(double[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123.;
+ b[i] = -103.;
+ }
+ }
+ static void test_2vi(double[] a, double[] b, double c, double d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(double[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123.;
+ }
+ }
+ static void test_vi_neg(double[] a, double b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(double[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(double[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123.;
+ b[i] = -103.;
+ }
+ }
+ static void test_2vi_neg(double[] a, double[] b, double c, double d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(double[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123.;
+ }
+ }
+ static void test_vi_oppos(double[] a, double b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(double[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(double[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123.;
+ b[i] = -103.;
+ }
+ }
+ static void test_2vi_oppos(double[] a, double[] b, double c, double d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(double[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123.;
+ }
+ }
+ static void test_vi_off(double[] a, double b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(double[] a, double[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(double[] a, double[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123.;
+ b[i+OFFSET] = -103.;
+ }
+ }
+ static void test_2vi_off(double[] a, double[] b, double c, double d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(double[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123.;
+ }
+ }
+ static void test_vi_inv(double[] a, double b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(double[] a, double[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(double[] a, double[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123.;
+ b[i+k] = -103.;
+ }
+ }
+ static void test_2vi_inv(double[] a, double[] b, double c, double d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(double[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123.;
+ }
+ }
+ static void test_vi_scl(double[] a, double b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(double[] a, double[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(double[] a, double[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123.;
+ b[i*SCALE] = -103.;
+ }
+ }
+ static void test_2vi_scl(double[] a, double[] b, double c, double d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(double[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(double[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(double[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123.;
+ b[i] = -103.;
+ }
+ }
+ static void test_2vi_aln(double[] a, double[] b, double c, double d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(double[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(double[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(double[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123.;
+ b[i] = -103.;
+ }
+ }
+ static void test_2vi_unaln(double[] a, double[] b, double c, double d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, double elem, double val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestFloatDoubleVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestFloatDoubleVect
+ */
+
+public class TestFloatDoubleVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Float + Double vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ float[] a1 = new float[ARRLEN];
+ float[] a2 = new float[ARRLEN];
+ double[] b1 = new double[ARRLEN];
+ double[] b2 = new double[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, 123.f, 103.);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, 123.f, 103.);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, 123.f, 103.);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, 123.f, 103.);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, 123.f, 103.);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], -123.f);
+ errn += verify("test_ci: b1", i, b1[i], -103.);
+ }
+ test_vi(a2, b2, 123.f, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], 123.f);
+ errn += verify("test_vi: b2", i, b2[i], 103.);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], 123.f);
+ errn += verify("test_cp: b1", i, b1[i], 103.);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], -123.f);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+ }
+ test_vi_neg(a2, b2, 123.f, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], 123.f);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], 123.f);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], -123.f);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+ }
+ test_vi_oppos(a2, b2, 123.f, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], 123.f);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], 123.f);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], -1.f);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], 123.f);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], -123.f);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], 123.f);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], -123.f);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_vi_aln(a1, b1, 123.f, 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], 123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], -1.f);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], -1.f);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], 123.f);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], -123.f);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], 123.f);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], -123.f);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, 123.f, 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], 123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], -1.f);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (float)i;
+ b1[i] = (double)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (float)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], -1.f);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (float)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (float)i;
+ b1[i] = (double)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (float)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], -1.f);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (float)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, 123.f, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, 123.f, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, 123.f, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, 123.f, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, 123.f, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(float[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123.f;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi(float[] a, double[] b, float c, double d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(float[] a, float[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(float[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123.f;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_neg(float[] a, double[] b, float c, double d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(float[] a, float[] b, double[] c, double[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(float[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123.f;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_oppos(float[] a, double[] b, float c, double d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(float[] a, float[] b, double[] c, double[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(float[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123.f;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_aln(float[] a, double[] b, float c, double d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(float[] a, float[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(float[] a, float[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(float[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123.f;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_unaln(float[] a, double[] b, float c, double d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(float[] a, float[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(float[] a, float[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, float elem, float val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, double elem, double val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestFloatVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestFloatVect
+ */
+
+public class TestFloatVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Float vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ float[] a1 = new float[ARRLEN];
+ float[] a2 = new float[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, 123.f);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, 123.f, 103.f);
+ test_ci_neg(a1);
+ test_vi_neg(a2, 123.f);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, 123.f, 103.f);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, 123.f);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, 123.f, 103.f);
+ test_ci_off(a1);
+ test_vi_off(a2, 123.f);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, 123.f, 103.f);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, 123.f, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, 123.f, 103.f, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, 123.f);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, 123.f, 103.f);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, 123.f, 103.f);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, 123.f, 103.f);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], -123.f);
+ }
+ test_vi(a2, 123.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], 123.f);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], 123.f);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], -123.f);
+ errn += verify("test_2ci: a2", i, a2[i], -103.f);
+ }
+ test_2vi(a1, a2, 123.f, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], 123.f);
+ errn += verify("test_2vi: a2", i, a2[i], 103.f);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], -123.f);
+ }
+ test_vi_neg(a2, 123.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], 123.f);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], 123.f);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], -123.f);
+ errn += verify("test_2ci_neg: a2", i, a2[i], -103.f);
+ }
+ test_2vi_neg(a1, a2, 123.f, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], 123.f);
+ errn += verify("test_2vi_neg: a2", i, a2[i], 103.f);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], -123.f);
+ }
+ test_vi_oppos(a2, 123.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], 123.f);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], 123.f);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], -123.f);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], -103.f);
+ }
+ test_2vi_oppos(a1, a2, 123.f, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], 123.f);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], 103.f);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], -123.f);
+ }
+ test_vi_off(a2, 123.f);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], 123.f);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], 123.f);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], -123.f);
+ errn += verify("test_2ci_off: a2", i, a2[i], -103.f);
+ }
+ test_2vi_off(a1, a2, 123.f, 103.f);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], 123.f);
+ errn += verify("test_2vi_off: a2", i, a2[i], 103.f);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], -1.f);
+ errn += verify("test_2vi_off: a2", i, a2[i], -1.f);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], -123.f);
+ }
+ test_vi_inv(a2, 123.f, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], 123.f);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], 123.f);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], -123.f);
+ errn += verify("test_2ci_inv: a2", i, a2[i], -103.f);
+ }
+ test_2vi_inv(a1, a2, 123.f, 103.f, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], 123.f);
+ errn += verify("test_2vi_inv: a2", i, a2[i], 103.f);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], -1.f);
+ errn += verify("test_2vi_inv: a2", i, a2[i], -1.f);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (float)val);
+ }
+ test_vi_scl(a2, 123.f);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (float)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (float)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], -1.f);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], -123.f);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], -1.f);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], -103.f);
+ }
+ }
+ test_2vi_scl(a1, a2, 123.f, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], -1.f);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], 123.f);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], -1.f);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], 103.f);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, 123.f);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], 123.f);
+ }
+ test_vi(a2, -123.f);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], -123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], -123.f);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], -103.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], -1.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_aln(a1, a2, 123.f, 103.f);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], 123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], -1.f);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, 123.f);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], 123.f);
+ }
+ test_vi(a2, -123.f);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], -123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], -123.f);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], -103.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], -1.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_unaln(a1, a2, 123.f, 103.f);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], 123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], -1.f);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], 103.f);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (float)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (float)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (float)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -103.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], -123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_aln(a1, a1, 123.f, 103.f);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], 103.f);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (float)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (float)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (float)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -103.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], -123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_unaln(a1, a1, 123.f, 103.f);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], 103.f);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, 123.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, 123.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, 123.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, 123.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, 123.f, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, 123.f, 103.f, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, 123.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, 123.f, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(float[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123.f;
+ }
+ }
+ static void test_vi(float[] a, float b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(float[] a, float[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(float[] a, float[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123.f;
+ b[i] = -103.f;
+ }
+ }
+ static void test_2vi(float[] a, float[] b, float c, float d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(float[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123.f;
+ }
+ }
+ static void test_vi_neg(float[] a, float b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(float[] a, float[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(float[] a, float[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123.f;
+ b[i] = -103.f;
+ }
+ }
+ static void test_2vi_neg(float[] a, float[] b, float c, float d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(float[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123.f;
+ }
+ }
+ static void test_vi_oppos(float[] a, float b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(float[] a, float[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(float[] a, float[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123.f;
+ b[i] = -103.f;
+ }
+ }
+ static void test_2vi_oppos(float[] a, float[] b, float c, float d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(float[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123.f;
+ }
+ }
+ static void test_vi_off(float[] a, float b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(float[] a, float[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(float[] a, float[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123.f;
+ b[i+OFFSET] = -103.f;
+ }
+ }
+ static void test_2vi_off(float[] a, float[] b, float c, float d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(float[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123.f;
+ }
+ }
+ static void test_vi_inv(float[] a, float b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(float[] a, float[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(float[] a, float[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123.f;
+ b[i+k] = -103.f;
+ }
+ }
+ static void test_2vi_inv(float[] a, float[] b, float c, float d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(float[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123.f;
+ }
+ }
+ static void test_vi_scl(float[] a, float b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(float[] a, float[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(float[] a, float[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123.f;
+ b[i*SCALE] = -103.f;
+ }
+ }
+ static void test_2vi_scl(float[] a, float[] b, float c, float d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(float[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(float[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(float[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123.f;
+ b[i] = -103.f;
+ }
+ }
+ static void test_2vi_aln(float[] a, float[] b, float c, float d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(float[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(float[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(float[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123.f;
+ b[i] = -103.f;
+ }
+ }
+ static void test_2vi_unaln(float[] a, float[] b, float c, float d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, float elem, float val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestIntDoubleVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntDoubleVect
+ */
+
+public class TestIntDoubleVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Integer + Double vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ int[] a1 = new int[ARRLEN];
+ int[] a2 = new int[ARRLEN];
+ double[] b1 = new double[ARRLEN];
+ double[] b2 = new double[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (int)123, 103.);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (int)123, 103.);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (int)123, 103.);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (int)123, 103.);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (int)123, 103.);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.);
+ }
+ test_vi(a2, b2, (int)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (int)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (int)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+ }
+ test_vi_neg(a2, b2, (int)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+ }
+ test_vi_oppos(a2, b2, (int)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_vi_aln(a1, b1, (int)123, 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (int)123, 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ b1[i] = (double)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ b1[i] = (double)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (int)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (int)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (int)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (int)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (int)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(int[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi(int[] a, double[] b, int c, double d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(int[] a, int[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(int[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_neg(int[] a, double[] b, int c, double d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(int[] a, int[] b, double[] c, double[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(int[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_oppos(int[] a, double[] b, int c, double d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(int[] a, int[] b, double[] c, double[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(int[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_aln(int[] a, double[] b, int c, double d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(int[] a, int[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(int[] a, int[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(int[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_unaln(int[] a, double[] b, int c, double d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(int[] a, int[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(int[] a, int[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, double elem, double val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestIntFloatVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntFloatVect
+ */
+
+public class TestIntFloatVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Integer + Float vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ int[] a1 = new int[ARRLEN];
+ int[] a2 = new int[ARRLEN];
+ float[] b1 = new float[ARRLEN];
+ float[] b2 = new float[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (int)123, 103.f);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (int)123, 103.f);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (int)123, 103.f);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (int)123, 103.f);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (int)123, 103.f);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.f);
+ }
+ test_vi(a2, b2, (int)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (int)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.f);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (int)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+ }
+ test_vi_neg(a2, b2, (int)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+ }
+ test_vi_oppos(a2, b2, (int)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_vi_aln(a1, b1, (int)123, 103.f);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (int)123, 103.f);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ b1[i] = (float)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.f;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ b1[i] = (float)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.f;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (int)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (int)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (int)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (int)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (int)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(int[] a, float[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi(int[] a, float[] b, int c, float d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(int[] a, int[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(int[] a, float[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_neg(int[] a, float[] b, int c, float d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(int[] a, int[] b, float[] c, float[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(int[] a, float[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_oppos(int[] a, float[] b, int c, float d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(int[] a, int[] b, float[] c, float[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(int[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_aln(int[] a, float[] b, int c, float d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(int[] a, int[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(int[] a, int[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(int[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_unaln(int[] a, float[] b, int c, float d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(int[] a, int[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(int[] a, int[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, float elem, float val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestIntLongVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntLongVect
+ */
+
+public class TestIntLongVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Integer + Long vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ int[] a1 = new int[ARRLEN];
+ int[] a2 = new int[ARRLEN];
+ long[] b1 = new long[ARRLEN];
+ long[] b2 = new long[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (int)123, (long)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (int)123, (long)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (int)123, (long)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (int)123, (long)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (int)123, (long)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci: b1", i, b1[i], (long)-103);
+ }
+ test_vi(a2, b2, (int)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (int)123);
+ errn += verify("test_vi: b2", i, b2[i], (long)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (int)123);
+ errn += verify("test_cp: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (long)-103);
+ }
+ test_vi_neg(a2, b2, (int)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (long)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (long)-103);
+ }
+ test_vi_oppos(a2, b2, (int)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (long)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (long)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (long)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_aln(a1, b1, (int)123, (long)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (long)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (int)123, (long)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ b1[i] = (long)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (long)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ b1[i] = (long)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (long)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (int)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (int)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (int)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (int)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (int)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(int[] a, long[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi(int[] a, long[] b, int c, long d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(int[] a, int[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(int[] a, long[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_neg(int[] a, long[] b, int c, long d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(int[] a, int[] b, long[] c, long[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(int[] a, long[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_oppos(int[] a, long[] b, int c, long d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(int[] a, int[] b, long[] c, long[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(int[] a, long[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_aln(int[] a, long[] b, int c, long d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(int[] a, int[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(int[] a, int[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(int[] a, long[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_unaln(int[] a, long[] b, int c, long d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(int[] a, int[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(int[] a, int[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestIntVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestIntVect
+ */
+
+public class TestIntVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Integer vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ int[] a1 = new int[ARRLEN];
+ int[] a2 = new int[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, (int)123);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, (int)123, (int)103);
+ test_ci_neg(a1);
+ test_vi_neg(a2, (int)123);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, (int)123, (int)103);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, (int)123);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, (int)123, (int)103);
+ test_ci_off(a1);
+ test_vi_off(a2, (int)123);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, (int)123, (int)103);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, (int)123, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, (int)123, (int)103, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, (int)123);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, (int)123, (int)103);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, (int)123, (int)103);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, (int)123, (int)103);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (int)-123);
+ }
+ test_vi(a2, (int)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (int)123);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (int)123);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], (int)-123);
+ errn += verify("test_2ci: a2", i, a2[i], (int)-103);
+ }
+ test_2vi(a1, a2, (int)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], (int)123);
+ errn += verify("test_2vi: a2", i, a2[i], (int)103);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (int)-123);
+ }
+ test_vi_neg(a2, (int)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (int)123);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (int)123);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], (int)-123);
+ errn += verify("test_2ci_neg: a2", i, a2[i], (int)-103);
+ }
+ test_2vi_neg(a1, a2, (int)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], (int)123);
+ errn += verify("test_2vi_neg: a2", i, a2[i], (int)103);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (int)-123);
+ }
+ test_vi_oppos(a2, (int)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (int)123);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (int)123);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], (int)-123);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], (int)-103);
+ }
+ test_2vi_oppos(a1, a2, (int)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], (int)123);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], (int)103);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], (int)-123);
+ }
+ test_vi_off(a2, (int)123);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], (int)123);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], (int)123);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], (int)-123);
+ errn += verify("test_2ci_off: a2", i, a2[i], (int)-103);
+ }
+ test_2vi_off(a1, a2, (int)123, (int)103);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (int)123);
+ errn += verify("test_2vi_off: a2", i, a2[i], (int)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (int)-1);
+ errn += verify("test_2vi_off: a2", i, a2[i], (int)-1);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], (int)-123);
+ }
+ test_vi_inv(a2, (int)123, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], (int)123);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], (int)123);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], (int)-123);
+ errn += verify("test_2ci_inv: a2", i, a2[i], (int)-103);
+ }
+ test_2vi_inv(a1, a2, (int)123, (int)103, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (int)123);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (int)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (int)-1);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (int)-1);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (int)val);
+ }
+ test_vi_scl(a2, (int)123);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (int)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (int)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], (int)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (int)-123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], (int)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (int)-103);
+ }
+ }
+ test_2vi_scl(a1, a2, (int)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], (int)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (int)123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], (int)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (int)103);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (int)123);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (int)123);
+ }
+ test_vi(a2, (int)-123);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (int)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (int)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_aln(a1, a2, (int)123, (int)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (int)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (int)123);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (int)123);
+ }
+ test_vi(a2, (int)-123);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (int)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (int)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_unaln(a1, a2, (int)123, (int)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (int)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (int)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (int)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (int)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (int)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_aln(a1, a1, (int)123, (int)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (int)103);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (int)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (int)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (int)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (int)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (int)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_unaln(a1, a1, (int)123, (int)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (int)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (int)103);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, (int)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, (int)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, (int)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, (int)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, (int)123, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, (int)123, (int)103, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, (int)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, (int)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(int[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi(int[] a, int b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(int[] a, int[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(int[] a, int[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi(int[] a, int[] b, int c, int d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(int[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi_neg(int[] a, int b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(int[] a, int[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(int[] a, int[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_neg(int[] a, int[] b, int c, int d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(int[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ }
+ }
+ static void test_vi_oppos(int[] a, int b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(int[] a, int[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(int[] a, int[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_oppos(int[] a, int[] b, int c, int d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(int[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ }
+ }
+ static void test_vi_off(int[] a, int b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(int[] a, int[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(int[] a, int[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ b[i+OFFSET] = -103;
+ }
+ }
+ static void test_2vi_off(int[] a, int[] b, int c, int d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(int[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ }
+ }
+ static void test_vi_inv(int[] a, int b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(int[] a, int[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(int[] a, int[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ b[i+k] = -103;
+ }
+ }
+ static void test_2vi_inv(int[] a, int[] b, int c, int d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(int[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ }
+ }
+ static void test_vi_scl(int[] a, int b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(int[] a, int[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(int[] a, int[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ b[i*SCALE] = -103;
+ }
+ }
+ static void test_2vi_scl(int[] a, int[] b, int c, int d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(int[] a, int[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(int[] a, int[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(int[] a, int[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_aln(int[] a, int[] b, int c, int d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(int[] a, int[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(int[] a, int[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(int[] a, int[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_unaln(int[] a, int[] b, int c, int d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestLongDoubleVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongDoubleVect
+ */
+
+public class TestLongDoubleVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Long + Double vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ long[] a1 = new long[ARRLEN];
+ long[] a2 = new long[ARRLEN];
+ double[] b1 = new double[ARRLEN];
+ double[] b2 = new double[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (long)123, 103.);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (long)123, 103.);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (long)123, 103.);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (long)123, 103.);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (long)123, 103.);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (long)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.);
+ }
+ test_vi(a2, b2, (long)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (long)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (long)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (long)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+ }
+ test_vi_neg(a2, b2, (long)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (long)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (long)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+ }
+ test_vi_oppos(a2, b2, (long)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (long)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_vi_aln(a1, b1, (long)123, 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (long)123, 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (long)i;
+ b1[i] = (double)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (long)i;
+ b1[i] = (double)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (long)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (long)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (long)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (long)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (long)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(long[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi(long[] a, double[] b, long c, double d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(long[] a, long[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(long[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_neg(long[] a, double[] b, long c, double d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(long[] a, long[] b, double[] c, double[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(long[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_oppos(long[] a, double[] b, long c, double d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(long[] a, long[] b, double[] c, double[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(long[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_aln(long[] a, double[] b, long c, double d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(long[] a, long[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(long[] a, long[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(long[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_unaln(long[] a, double[] b, long c, double d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(long[] a, long[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(long[] a, long[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, double elem, double val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestLongFloatVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongFloatVect
+ */
+
+public class TestLongFloatVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Long + Float vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ long[] a1 = new long[ARRLEN];
+ long[] a2 = new long[ARRLEN];
+ float[] b1 = new float[ARRLEN];
+ float[] b2 = new float[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (long)123, 103.f);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (long)123, 103.f);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (long)123, 103.f);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (long)123, 103.f);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (long)123, 103.f);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (long)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.f);
+ }
+ test_vi(a2, b2, (long)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (long)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.f);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (long)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (long)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+ }
+ test_vi_neg(a2, b2, (long)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (long)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (long)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+ }
+ test_vi_oppos(a2, b2, (long)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (long)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_vi_aln(a1, b1, (long)123, 103.f);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (long)123, 103.f);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (long)i;
+ b1[i] = (float)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.f;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (long)i;
+ b1[i] = (float)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.f;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (long)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (long)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (long)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (long)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (long)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(long[] a, float[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi(long[] a, float[] b, long c, float d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(long[] a, long[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(long[] a, float[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_neg(long[] a, float[] b, long c, float d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(long[] a, long[] b, float[] c, float[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(long[] a, float[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_oppos(long[] a, float[] b, long c, float d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(long[] a, long[] b, float[] c, float[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(long[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_aln(long[] a, float[] b, long c, float d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(long[] a, long[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(long[] a, long[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(long[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_unaln(long[] a, float[] b, long c, float d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(long[] a, long[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(long[] a, long[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, float elem, float val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestLongVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestLongVect
+ */
+
+public class TestLongVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Long vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ long[] a1 = new long[ARRLEN];
+ long[] a2 = new long[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, (long)123);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, (long)123, (long)103);
+ test_ci_neg(a1);
+ test_vi_neg(a2, (long)123);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, (long)123, (long)103);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, (long)123);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, (long)123, (long)103);
+ test_ci_off(a1);
+ test_vi_off(a2, (long)123);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, (long)123, (long)103);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, (long)123, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, (long)123, (long)103, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, (long)123);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, (long)123, (long)103);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, (long)123, (long)103);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, (long)123, (long)103);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (long)-123);
+ }
+ test_vi(a2, (long)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (long)123);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (long)123);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], (long)-123);
+ errn += verify("test_2ci: a2", i, a2[i], (long)-103);
+ }
+ test_2vi(a1, a2, (long)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], (long)123);
+ errn += verify("test_2vi: a2", i, a2[i], (long)103);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (long)-123);
+ }
+ test_vi_neg(a2, (long)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (long)123);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (long)123);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], (long)-123);
+ errn += verify("test_2ci_neg: a2", i, a2[i], (long)-103);
+ }
+ test_2vi_neg(a1, a2, (long)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], (long)123);
+ errn += verify("test_2vi_neg: a2", i, a2[i], (long)103);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (long)-123);
+ }
+ test_vi_oppos(a2, (long)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (long)123);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (long)123);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], (long)-123);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], (long)-103);
+ }
+ test_2vi_oppos(a1, a2, (long)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], (long)123);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], (long)103);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], (long)-123);
+ }
+ test_vi_off(a2, (long)123);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], (long)123);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], (long)123);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], (long)-123);
+ errn += verify("test_2ci_off: a2", i, a2[i], (long)-103);
+ }
+ test_2vi_off(a1, a2, (long)123, (long)103);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (long)123);
+ errn += verify("test_2vi_off: a2", i, a2[i], (long)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (long)-1);
+ errn += verify("test_2vi_off: a2", i, a2[i], (long)-1);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], (long)-123);
+ }
+ test_vi_inv(a2, (long)123, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], (long)123);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], (long)123);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], (long)-123);
+ errn += verify("test_2ci_inv: a2", i, a2[i], (long)-103);
+ }
+ test_2vi_inv(a1, a2, (long)123, (long)103, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (long)123);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (long)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (long)-1);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (long)-1);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (long)val);
+ }
+ test_vi_scl(a2, (long)123);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (long)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (long)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], (long)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (long)-123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], (long)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (long)-103);
+ }
+ }
+ test_2vi_scl(a1, a2, (long)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], (long)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (long)123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], (long)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (long)103);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (long)123);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (long)123);
+ }
+ test_vi(a2, (long)-123);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (long)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (long)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_aln(a1, a2, (long)123, (long)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (long)123);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (long)123);
+ }
+ test_vi(a2, (long)-123);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (long)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (long)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_unaln(a1, a2, (long)123, (long)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (long)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (long)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (long)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (long)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (long)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (long)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_aln(a1, a1, (long)123, (long)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (long)103);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (long)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (long)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (long)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (long)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (long)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_unaln(a1, a1, (long)123, (long)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (long)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (long)103);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, (long)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, (long)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, (long)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, (long)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, (long)123, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, (long)123, (long)103, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, (long)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, (long)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(long[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi(long[] a, long b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(long[] a, long[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(long[] a, long[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi(long[] a, long[] b, long c, long d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(long[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi_neg(long[] a, long b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(long[] a, long[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(long[] a, long[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_neg(long[] a, long[] b, long c, long d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(long[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ }
+ }
+ static void test_vi_oppos(long[] a, long b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(long[] a, long[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(long[] a, long[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_oppos(long[] a, long[] b, long c, long d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(long[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ }
+ }
+ static void test_vi_off(long[] a, long b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(long[] a, long[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(long[] a, long[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ b[i+OFFSET] = -103;
+ }
+ }
+ static void test_2vi_off(long[] a, long[] b, long c, long d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(long[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ }
+ }
+ static void test_vi_inv(long[] a, long b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(long[] a, long[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(long[] a, long[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ b[i+k] = -103;
+ }
+ }
+ static void test_2vi_inv(long[] a, long[] b, long c, long d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(long[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ }
+ }
+ static void test_vi_scl(long[] a, long b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(long[] a, long[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(long[] a, long[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ b[i*SCALE] = -103;
+ }
+ }
+ static void test_2vi_scl(long[] a, long[] b, long c, long d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(long[] a, long[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(long[] a, long[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(long[] a, long[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_aln(long[] a, long[] b, long c, long d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(long[] a, long[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(long[] a, long[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(long[] a, long[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_unaln(long[] a, long[] b, long c, long d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestShortDoubleVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortDoubleVect
+ */
+
+public class TestShortDoubleVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Short + Double vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ short[] a1 = new short[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ double[] b1 = new double[ARRLEN];
+ double[] b2 = new double[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (short)123, 103.);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (short)123, 103.);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (short)123, 103.);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (short)123, 103.);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (short)123, 103.);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.);
+ }
+ test_vi(a2, b2, (short)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (short)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (short)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.);
+ }
+ test_vi_neg(a2, b2, (short)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.;
+ b2[i] = -1.;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.);
+ }
+ test_vi_oppos(a2, b2, (short)123, 103.);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_vi_aln(a1, b1, (short)123, 103.);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.;
+ b2[i] = 123.;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (short)123, 103.);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (double)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (double)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (double)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (double)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (short)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (short)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (short)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (short)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (short)123, 103.);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(short[] a, double[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi(short[] a, double[] b, short c, double d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(short[] a, short[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(short[] a, double[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_neg(short[] a, double[] b, short c, double d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(short[] a, short[] b, double[] c, double[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(short[] a, double[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_oppos(short[] a, double[] b, short c, double d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(short[] a, short[] b, double[] c, double[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(short[] a, double[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_aln(short[] a, double[] b, short c, double d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(short[] a, short[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(short[] a, short[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(short[] a, double[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.;
+ }
+ }
+ static void test_vi_unaln(short[] a, double[] b, short c, double d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(short[] a, short[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(short[] a, short[] b, double[] c, double[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, double elem, double val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestShortFloatVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortFloatVect
+ */
+
+public class TestShortFloatVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Short + Float vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ short[] a1 = new short[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ float[] b1 = new float[ARRLEN];
+ float[] b2 = new float[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (short)123, 103.f);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (short)123, 103.f);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (short)123, 103.f);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (short)123, 103.f);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (short)123, 103.f);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci: b1", i, b1[i], -103.f);
+ }
+ test_vi(a2, b2, (short)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (short)123);
+ errn += verify("test_vi: b2", i, b2[i], 103.f);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (short)123);
+ errn += verify("test_cp: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], -103.f);
+ }
+ test_vi_neg(a2, b2, (short)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], 103.f);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1.f;
+ b2[i] = -1.f;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], -103.f);
+ }
+ test_vi_oppos(a2, b2, (short)123, 103.f);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], 103.f);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], 123.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], -1.f);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_vi_aln(a1, b1, (short)123, 103.f);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1.f;
+ b2[i] = 123.f;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123.f;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], -123.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], 123.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -103.f);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (short)123, 103.f);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], 103.f);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (float)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1.f;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (float)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1.f;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (float)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1.f;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], -1.f);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (float)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (short)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (short)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (short)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (short)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (short)123, 103.f);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(short[] a, float[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi(short[] a, float[] b, short c, float d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(short[] a, short[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(short[] a, float[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_neg(short[] a, float[] b, short c, float d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(short[] a, short[] b, float[] c, float[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(short[] a, float[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_oppos(short[] a, float[] b, short c, float d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(short[] a, short[] b, float[] c, float[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(short[] a, float[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_aln(short[] a, float[] b, short c, float d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(short[] a, short[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(short[] a, short[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(short[] a, float[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103.f;
+ }
+ }
+ static void test_vi_unaln(short[] a, float[] b, short c, float d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(short[] a, short[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(short[] a, short[] b, float[] c, float[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, float elem, float val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestShortIntVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortIntVect
+ */
+
+public class TestShortIntVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Short + Integer vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ short[] a1 = new short[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ int[] b1 = new int[ARRLEN];
+ int[] b2 = new int[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (short)123, (int)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (short)123, (int)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (short)123, (int)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (short)123, (int)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (short)123, (int)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci: b1", i, b1[i], (int)-103);
+ }
+ test_vi(a2, b2, (short)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (short)123);
+ errn += verify("test_vi: b2", i, b2[i], (int)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (short)123);
+ errn += verify("test_cp: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (int)-103);
+ }
+ test_vi_neg(a2, b2, (short)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (int)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (int)-103);
+ }
+ test_vi_oppos(a2, b2, (short)123, (int)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (int)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (int)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (int)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (int)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_aln(a1, b1, (short)123, (int)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (int)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (int)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (int)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (short)123, (int)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (int)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (int)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (int)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (int)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (int)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (int)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (int)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (short)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (short)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (short)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (short)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (short)123, (int)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(short[] a, int[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi(short[] a, int[] b, short c, int d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(short[] a, short[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(short[] a, int[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_neg(short[] a, int[] b, short c, int d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(short[] a, short[] b, int[] c, int[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(short[] a, int[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_oppos(short[] a, int[] b, short c, int d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(short[] a, short[] b, int[] c, int[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(short[] a, int[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_aln(short[] a, int[] b, short c, int d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(short[] a, short[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(short[] a, short[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(short[] a, int[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_unaln(short[] a, int[] b, short c, int d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(short[] a, short[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(short[] a, short[] b, int[] c, int[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, int elem, int val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestShortLongVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortLongVect
+ */
+
+public class TestShortLongVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Short + Long vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ short[] a1 = new short[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ long[] b1 = new long[ARRLEN];
+ long[] b2 = new long[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ test_vi(a2, b2, (short)123, (long)103);
+ test_cp(a1, a2, b1, b2);
+ test_ci_neg(a1, b1);
+ test_vi_neg(a1, b1, (short)123, (long)103);
+ test_cp_neg(a1, a2, b1, b2);
+ test_ci_oppos(a1, b1);
+ test_vi_oppos(a1, b1, (short)123, (long)103);
+ test_cp_oppos(a1, a2, b1, b2);
+ test_ci_aln(a1, b1);
+ test_vi_aln(a1, b1, (short)123, (long)103);
+ test_cp_alndst(a1, a2, b1, b2);
+ test_cp_alnsrc(a1, a2, b1, b2);
+ test_ci_unaln(a1, b1);
+ test_vi_unaln(a1, b1, (short)123, (long)103);
+ test_cp_unalndst(a1, a2, b1, b2);
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci: b1", i, b1[i], (long)-103);
+ }
+ test_vi(a2, b2, (short)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (short)123);
+ errn += verify("test_vi: b2", i, b2[i], (long)103);
+ }
+ test_cp(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (short)123);
+ errn += verify("test_cp: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_neg(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_neg: b1", i, b1[i], (long)-103);
+ }
+ test_vi_neg(a2, b2, (short)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_neg: b2", i, b2[i], (long)103);
+ }
+ test_cp_neg(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_neg: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ b1[i] = -1;
+ b2[i] = -1;
+ }
+ test_ci_oppos(a1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+ errn += verify("test_ci_oppos: b1", i, b1[i], (long)-103);
+ }
+ test_vi_oppos(a2, b2, (short)123, (long)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+ errn += verify("test_vi_oppos: b2", i, b2[i], (long)103);
+ }
+ test_cp_oppos(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_oppos: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_alndst(a1, a2, b1, b2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alndst: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_alnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_alnsrc: b1", i, b1[i], (long)123);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_aln(a1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (long)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_aln: b1", i, b1[i], (long)-1);
+ }
+
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_aln(a1, b1, (short)123, (long)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_aln: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = 123;
+ b1[i] = -1;
+ b2[i] = 123;
+ }
+ test_cp_unalndst(a1, a2, b1, b2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalndst: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a2[i] = -123;
+ b2[i] = -123;
+ }
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+ errn += verify("test_cp_unalnsrc: b1", i, b1[i], (long)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_ci_unaln(a1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (long)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_ci_unaln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_vi_unaln(a1, b1, (short)123, (long)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_vi_unaln: b1", i, b1[i], (long)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (long)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_alndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alndst_overlap: b1", i, b1[i], (long)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ b1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1, b1, b1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_alnsrc_overlap: b1", i, b1[i], (long)v);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ b1[i] = (long)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ b1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1, b1, b1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalndst_overlap: b1", i, b1[i], (long)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ b1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1, b1, b1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+ errn += verify("test_cp_unalnsrc_overlap: b1", i, b1[i], (long)v);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, b2, (short)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a1, b1, (short)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a1, b1, (short)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_aln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_aln(a1, b1, (short)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_unaln(a1, b1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_unaln(a1, b1, (short)123, (long)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2, b1, b2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ return errn;
+ }
+
+ static void test_ci(short[] a, long[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi(short[] a, long[] b, short c, long d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp(short[] a, short[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_neg(short[] a, long[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_neg(short[] a, long[] b, short c, long d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_cp_neg(short[] a, short[] b, long[] c, long[] d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ c[i] = d[i];
+ }
+ }
+ static void test_ci_oppos(short[] a, long[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_oppos(short[] a, long[] b, short c, long d) {
+ int limit = a.length-1;
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_cp_oppos(short[] a, short[] b, long[] c, long[] d) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ c[limit-i] = d[i];
+ }
+ }
+ static void test_ci_aln(short[] a, long[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_aln(short[] a, long[] b, short c, long d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_alndst(short[] a, short[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ c[i+ALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_alnsrc(short[] a, short[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ c[i] = d[i+ALIGN_OFF];
+ }
+ }
+ static void test_ci_unaln(short[] a, long[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_vi_unaln(short[] a, long[] b, short c, long d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(short[] a, short[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ c[i+UNALIGN_OFF] = d[i];
+ }
+ }
+ static void test_cp_unalnsrc(short[] a, short[] b, long[] c, long[] d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ c[i] = d[i+UNALIGN_OFF];
+ }
+ }
+
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+ static int verify(String text, int i, long elem, long val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7119644/TestShortVect.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,953 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7119644
+ * @summary Increase superword's vector size up to 256 bits
+ *
+ * @run main/othervm/timeout=300 -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-OptimizeFill TestShortVect
+ */
+
+public class TestShortVect {
+ private static final int ARRLEN = 997;
+ private static final int ITERS = 11000;
+ private static final int OFFSET = 3;
+ private static final int SCALE = 2;
+ private static final int ALIGN_OFF = 8;
+ private static final int UNALIGN_OFF = 5;
+
+ public static void main(String args[]) {
+ System.out.println("Testing Short vectors");
+ int errn = test();
+ if (errn > 0) {
+ System.err.println("FAILED: " + errn + " errors");
+ System.exit(97);
+ }
+ System.out.println("PASSED");
+ }
+
+ static int test() {
+ short[] a1 = new short[ARRLEN];
+ short[] a2 = new short[ARRLEN];
+ System.out.println("Warmup");
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ test_vi(a2, (short)123);
+ test_cp(a1, a2);
+ test_2ci(a1, a2);
+ test_2vi(a1, a2, (short)123, (short)103);
+ test_ci_neg(a1);
+ test_vi_neg(a2, (short)123);
+ test_cp_neg(a1, a2);
+ test_2ci_neg(a1, a2);
+ test_2vi_neg(a1, a2, (short)123, (short)103);
+ test_ci_oppos(a1);
+ test_vi_oppos(a2, (short)123);
+ test_cp_oppos(a1, a2);
+ test_2ci_oppos(a1, a2);
+ test_2vi_oppos(a1, a2, (short)123, (short)103);
+ test_ci_off(a1);
+ test_vi_off(a2, (short)123);
+ test_cp_off(a1, a2);
+ test_2ci_off(a1, a2);
+ test_2vi_off(a1, a2, (short)123, (short)103);
+ test_ci_inv(a1, OFFSET);
+ test_vi_inv(a2, (short)123, OFFSET);
+ test_cp_inv(a1, a2, OFFSET);
+ test_2ci_inv(a1, a2, OFFSET);
+ test_2vi_inv(a1, a2, (short)123, (short)103, OFFSET);
+ test_ci_scl(a1);
+ test_vi_scl(a2, (short)123);
+ test_cp_scl(a1, a2);
+ test_2ci_scl(a1, a2);
+ test_2vi_scl(a1, a2, (short)123, (short)103);
+ test_cp_alndst(a1, a2);
+ test_cp_alnsrc(a1, a2);
+ test_2ci_aln(a1, a2);
+ test_2vi_aln(a1, a2, (short)123, (short)103);
+ test_cp_unalndst(a1, a2);
+ test_cp_unalnsrc(a1, a2);
+ test_2ci_unaln(a1, a2);
+ test_2vi_unaln(a1, a2, (short)123, (short)103);
+ }
+ // Initialize
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ // Test and verify results
+ System.out.println("Verification");
+ int errn = 0;
+ {
+ test_ci(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci: a1", i, a1[i], (short)-123);
+ }
+ test_vi(a2, (short)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi: a2", i, a2[i], (short)123);
+ }
+ test_cp(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp: a1", i, a1[i], (short)123);
+ }
+ test_2ci(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci: a1", i, a1[i], (short)-123);
+ errn += verify("test_2ci: a2", i, a2[i], (short)-103);
+ }
+ test_2vi(a1, a2, (short)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi: a1", i, a1[i], (short)123);
+ errn += verify("test_2vi: a2", i, a2[i], (short)103);
+ }
+ // Reset for negative stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_neg(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_neg: a1", i, a1[i], (short)-123);
+ }
+ test_vi_neg(a2, (short)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_neg: a2", i, a2[i], (short)123);
+ }
+ test_cp_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_neg: a1", i, a1[i], (short)123);
+ }
+ test_2ci_neg(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_neg: a1", i, a1[i], (short)-123);
+ errn += verify("test_2ci_neg: a2", i, a2[i], (short)-103);
+ }
+ test_2vi_neg(a1, a2, (short)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_neg: a1", i, a1[i], (short)123);
+ errn += verify("test_2vi_neg: a2", i, a2[i], (short)103);
+ }
+ // Reset for opposite stride
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_oppos(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_ci_oppos: a1", i, a1[i], (short)-123);
+ }
+ test_vi_oppos(a2, (short)123);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_vi_oppos: a2", i, a2[i], (short)123);
+ }
+ test_cp_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_cp_oppos: a1", i, a1[i], (short)123);
+ }
+ test_2ci_oppos(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2ci_oppos: a1", i, a1[i], (short)-123);
+ errn += verify("test_2ci_oppos: a2", i, a2[i], (short)-103);
+ }
+ test_2vi_oppos(a1, a2, (short)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ errn += verify("test_2vi_oppos: a1", i, a1[i], (short)123);
+ errn += verify("test_2vi_oppos: a2", i, a2[i], (short)103);
+ }
+ // Reset for indexing with offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_off(a1);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_off: a1", i, a1[i], (short)-123);
+ }
+ test_vi_off(a2, (short)123);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_off: a2", i, a2[i], (short)123);
+ }
+ test_cp_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_off: a1", i, a1[i], (short)123);
+ }
+ test_2ci_off(a1, a2);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_off: a1", i, a1[i], (short)-123);
+ errn += verify("test_2ci_off: a2", i, a2[i], (short)-103);
+ }
+ test_2vi_off(a1, a2, (short)123, (short)103);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (short)123);
+ errn += verify("test_2vi_off: a2", i, a2[i], (short)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_off: a1", i, a1[i], (short)-1);
+ errn += verify("test_2vi_off: a2", i, a2[i], (short)-1);
+ }
+ // Reset for indexing with invariant offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_inv(a1, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_ci_inv: a1", i, a1[i], (short)-123);
+ }
+ test_vi_inv(a2, (short)123, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_vi_inv: a2", i, a2[i], (short)123);
+ }
+ test_cp_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_cp_inv: a1", i, a1[i], (short)123);
+ }
+ test_2ci_inv(a1, a2, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2ci_inv: a1", i, a1[i], (short)-123);
+ errn += verify("test_2ci_inv: a2", i, a2[i], (short)-103);
+ }
+ test_2vi_inv(a1, a2, (short)123, (short)103, OFFSET);
+ for (int i=OFFSET; i<ARRLEN; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (short)123);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (short)103);
+ }
+ for (int i=0; i<OFFSET; i++) {
+ errn += verify("test_2vi_inv: a1", i, a1[i], (short)-1);
+ errn += verify("test_2vi_inv: a2", i, a2[i], (short)-1);
+ }
+ // Reset for indexing with scale
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_ci_scl(a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : -123;
+ errn += verify("test_ci_scl: a1", i, a1[i], (short)val);
+ }
+ test_vi_scl(a2, (short)123);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_vi_scl: a2", i, a2[i], (short)val);
+ }
+ test_cp_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ int val = (i%SCALE != 0) ? -1 : 123;
+ errn += verify("test_cp_scl: a1", i, a1[i], (short)val);
+ }
+ test_2ci_scl(a1, a2);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a1", i, a1[i], (short)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a1", i*SCALE, a1[i*SCALE], (short)-123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2ci_scl: a2", i, a2[i], (short)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2ci_scl: a2", i*SCALE, a2[i*SCALE], (short)-103);
+ }
+ }
+ test_2vi_scl(a1, a2, (short)123, (short)103);
+ for (int i=0; i<ARRLEN; i++) {
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a1", i, a1[i], (short)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a1", i*SCALE, a1[i*SCALE], (short)123);
+ }
+ if (i%SCALE != 0) {
+ errn += verify("test_2vi_scl: a2", i, a2[i], (short)-1);
+ } else if (i*SCALE < ARRLEN) {
+ errn += verify("test_2vi_scl: a2", i*SCALE, a2[i*SCALE], (short)103);
+ }
+ }
+ // Reset for 2 arrays with relative aligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (short)123);
+ test_cp_alndst(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alndst: a1", i, a1[i], (short)123);
+ }
+ test_vi(a2, (short)-123);
+ test_cp_alnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)-123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_alnsrc: a1", i, a1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_aln(a1, a2);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (short)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln: a2", i, a2[i], (short)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_aln(a1, a2, (short)123, (short)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln: a2", i, a2[i], (short)103);
+ }
+
+ // Reset for 2 arrays with relative unaligned offset
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_vi(a2, (short)123);
+ test_cp_unalndst(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalndst: a1", i, a1[i], (short)123);
+ }
+ test_vi(a2, (short)-123);
+ test_cp_unalnsrc(a1, a2);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)-123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_cp_unalnsrc: a1", i, a1[i], (short)123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2ci_unaln(a1, a2);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (short)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln: a2", i, a2[i], (short)-1);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ a2[i] = -1;
+ }
+ test_2vi_unaln(a1, a2, (short)123, (short)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a1", i, a1[i], (short)-1);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln: a2", i, a2[i], (short)103);
+ }
+
+ // Reset for aligned overlap initialization
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_alndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alndst_overlap: a1", i, a1[i], (short)v);
+ }
+ for (int i=0; i<ALIGN_OFF; i++) {
+ a1[i+ALIGN_OFF] = -1;
+ }
+ test_cp_alnsrc(a1, a1);
+ for (int i=0; i<ALIGN_OFF; i++) {
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)-1);
+ }
+ for (int i=ALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%ALIGN_OFF;
+ errn += verify("test_cp_alnsrc_overlap: a1", i, a1[i], (short)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_aln(a1, a1);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (short)-103);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_aln_overlap: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_aln(a1, a1, (short)123, (short)103);
+ for (int i=0; i<ARRLEN-ALIGN_OFF; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-ALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_aln_overlap: a1", i, a1[i], (short)103);
+ }
+
+ // Reset for unaligned overlap initialization
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i] = (short)i;
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_cp_unalndst(a1, a1);
+ for (int i=0; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalndst_overlap: a1", i, a1[i], (short)v);
+ }
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ a1[i+UNALIGN_OFF] = -1;
+ }
+ test_cp_unalnsrc(a1, a1);
+ for (int i=0; i<UNALIGN_OFF; i++) {
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)-1);
+ }
+ for (int i=UNALIGN_OFF; i<ARRLEN; i++) {
+ int v = i%UNALIGN_OFF;
+ errn += verify("test_cp_unalnsrc_overlap: a1", i, a1[i], (short)v);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2ci_unaln(a1, a1);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (short)-103);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2ci_unaln_overlap: a1", i, a1[i], (short)-123);
+ }
+ for (int i=0; i<ARRLEN; i++) {
+ a1[i] = -1;
+ }
+ test_2vi_unaln(a1, a1, (short)123, (short)103);
+ for (int i=0; i<ARRLEN-UNALIGN_OFF; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (short)123);
+ }
+ for (int i=ARRLEN-UNALIGN_OFF; i<ARRLEN; i++) {
+ errn += verify("test_2vi_unaln_overlap: a1", i, a1[i], (short)103);
+ }
+
+ }
+
+ if (errn > 0)
+ return errn;
+
+ System.out.println("Time");
+ long start, end;
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi(a2, (short)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_neg(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_neg(a2, (short)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_neg(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_neg: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_neg(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_neg: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_oppos(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_oppos(a2, (short)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_oppos(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_oppos: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_oppos(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_oppos: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_off(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_off(a2, (short)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_off(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_off: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_off(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_off: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_inv(a1, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_inv(a2, (short)123, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_inv(a1, a2, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_inv: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_inv(a1, a2, (short)123, (short)103, OFFSET);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_inv: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_ci_scl(a1);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_vi_scl(a2, (short)123);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_vi_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_scl(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_scl: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_scl(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_scl: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_alnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_alnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_aln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_aln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_aln(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_aln: " + (end - start));
+
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalndst(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalndst: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_cp_unalnsrc(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_cp_unalnsrc: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2ci_unaln(a1, a2);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2ci_unaln: " + (end - start));
+ start = System.currentTimeMillis();
+ for (int i=0; i<ITERS; i++) {
+ test_2vi_unaln(a1, a2, (short)123, (short)103);
+ }
+ end = System.currentTimeMillis();
+ System.out.println("test_2vi_unaln: " + (end - start));
+
+ return errn;
+ }
+
+ static void test_ci(short[] a) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi(short[] a, short b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp(short[] a, short[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci(short[] a, short[] b) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi(short[] a, short[] b, short c, short d) {
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_neg(short[] a) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ }
+ }
+ static void test_vi_neg(short[] a, short b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b;
+ }
+ }
+ static void test_cp_neg(short[] a, short[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = b[i];
+ }
+ }
+ static void test_2ci_neg(short[] a, short[] b) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_neg(short[] a, short[] b, short c, short d) {
+ for (int i = a.length-1; i >= 0; i-=1) {
+ a[i] = c;
+ b[i] = d;
+ }
+ }
+ static void test_ci_oppos(short[] a) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ }
+ }
+ static void test_vi_oppos(short[] a, short b) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[limit-i] = b;
+ }
+ }
+ static void test_cp_oppos(short[] a, short[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[i] = b[limit-i];
+ }
+ }
+ static void test_2ci_oppos(short[] a, short[] b) {
+ int limit = a.length-1;
+ for (int i = 0; i < a.length; i+=1) {
+ a[limit-i] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_oppos(short[] a, short[] b, short c, short d) {
+ int limit = a.length-1;
+ for (int i = limit; i >= 0; i-=1) {
+ a[i] = c;
+ b[limit-i] = d;
+ }
+ }
+ static void test_ci_off(short[] a) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ }
+ }
+ static void test_vi_off(short[] a, short b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b;
+ }
+ }
+ static void test_cp_off(short[] a, short[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = b[i+OFFSET];
+ }
+ }
+ static void test_2ci_off(short[] a, short[] b) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = -123;
+ b[i+OFFSET] = -103;
+ }
+ }
+ static void test_2vi_off(short[] a, short[] b, short c, short d) {
+ for (int i = 0; i < a.length-OFFSET; i+=1) {
+ a[i+OFFSET] = c;
+ b[i+OFFSET] = d;
+ }
+ }
+ static void test_ci_inv(short[] a, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ }
+ }
+ static void test_vi_inv(short[] a, short b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b;
+ }
+ }
+ static void test_cp_inv(short[] a, short[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = b[i+k];
+ }
+ }
+ static void test_2ci_inv(short[] a, short[] b, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = -123;
+ b[i+k] = -103;
+ }
+ }
+ static void test_2vi_inv(short[] a, short[] b, short c, short d, int k) {
+ for (int i = 0; i < a.length-k; i+=1) {
+ a[i+k] = c;
+ b[i+k] = d;
+ }
+ }
+ static void test_ci_scl(short[] a) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ }
+ }
+ static void test_vi_scl(short[] a, short b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b;
+ }
+ }
+ static void test_cp_scl(short[] a, short[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = b[i*SCALE];
+ }
+ }
+ static void test_2ci_scl(short[] a, short[] b) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = -123;
+ b[i*SCALE] = -103;
+ }
+ }
+ static void test_2vi_scl(short[] a, short[] b, short c, short d) {
+ for (int i = 0; i*SCALE < a.length; i+=1) {
+ a[i*SCALE] = c;
+ b[i*SCALE] = d;
+ }
+ }
+ static void test_cp_alndst(short[] a, short[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_alnsrc(short[] a, short[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = b[i+ALIGN_OFF];
+ }
+ }
+ static void test_2ci_aln(short[] a, short[] b) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i+ALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_aln(short[] a, short[] b, short c, short d) {
+ for (int i = 0; i < a.length-ALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+ALIGN_OFF] = d;
+ }
+ }
+ static void test_cp_unalndst(short[] a, short[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = b[i];
+ }
+ }
+ static void test_cp_unalnsrc(short[] a, short[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = b[i+UNALIGN_OFF];
+ }
+ }
+ static void test_2ci_unaln(short[] a, short[] b) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i+UNALIGN_OFF] = -123;
+ b[i] = -103;
+ }
+ }
+ static void test_2vi_unaln(short[] a, short[] b, short c, short d) {
+ for (int i = 0; i < a.length-UNALIGN_OFF; i+=1) {
+ a[i] = c;
+ b[i+UNALIGN_OFF] = d;
+ }
+ }
+
+ static int verify(String text, int i, short elem, short val) {
+ if (elem != val) {
+ System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+ return 1;
+ }
+ return 0;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7174363/Test7174363.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7174363
+ * @summary crash with Arrays.copyOfRange(original, from, to) when from > original.length
+ *
+ * @run main/othervm -XX:-BackgroundCompilation Test7174363
+ */
+
+import java.util.*;
+
+public class Test7174363 {
+
+ static Object[] m(Object[] original, int from, int to) {
+ return Arrays.copyOfRange(original, from, to, Object[].class);
+ }
+
+ static public void main(String[] args) {
+ Object[] orig = new Object[10];
+ for (int i = 0; i < 20000; i++) {
+ try {
+ m(orig, 15, 20);
+ } catch(ArrayIndexOutOfBoundsException excp) {}
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7179138/Test7179138_1.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2012 Skip Balk. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 7179138
+ * @summary Incorrect result with String concatenation optimization
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation Test7179138_1
+ *
+ * @author Skip Balk
+ */
+
+public class Test7179138_1 {
+ public static void main(String[] args) throws Exception {
+ System.out.println("Java Version: " + System.getProperty("java.vm.version"));
+ long[] durations = new long[60];
+ for (int i = 0; i < 100000; i++) {
+ // this empty for-loop is required to reproduce this bug
+ for (long duration : durations) {
+ // do nothing
+ }
+ {
+ String s = "test";
+ int len = s.length();
+
+ s = new StringBuilder(String.valueOf(s)).append(s).toString();
+ len = len + len;
+
+ s = new StringBuilder(String.valueOf(s)).append(s).toString();
+ len = len + len;
+
+ s = new StringBuilder(String.valueOf(s)).append(s).toString();
+ len = len + len;
+
+ if (s.length() != len) {
+ System.out.println("Failed at iteration: " + i);
+ System.out.println("Length mismatch: " + s.length() + " <> " + len);
+ System.out.println("Expected: \"" + "test" + "test" + "test" + "test" + "test" + "test" + "test" + "test" + "\"");
+ System.out.println("Actual: \"" + s + "\"");
+ System.exit(97);
+ }
+ }
+ }
+ }
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/7179138/Test7179138_2.java Thu Jun 28 10:35:28 2012 -0700
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2012 Skip Balk. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 7179138
+ * @summary Incorrect result with String concatenation optimization
+ * @run main/othervm -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation Test7179138_2
+ *
+ * @author Skip Balk
+ */
+
+public class Test7179138_2 {
+ public static void main(String[] args) throws Exception {
+ System.out.println("Java Version: " + System.getProperty("java.vm.version"));
+ long[] durations = new long[60];
+ for (int i = 0; i < 100000; i++) {
+ // this empty for-loop is required to reproduce this bug
+ for (long duration : durations) {
+ // do nothing
+ }
+ {
+ String s = "test";
+ int len = s.length();
+
+ s = s + s;
+ len = len + len;
+
+ s = s + s;
+ len = len + len;
+
+ s = s + s;
+ len = len + len;
+
+ if (s.length() != len) {
+ System.out.println("Failed at iteration: " + i);
+ System.out.println("Length mismatch: " + s.length() + " <> " + len);
+ System.out.println("Expected: \"" + "test" + "test" + "test" + "test" + "test" + "test" + "test" + "test" + "\"");
+ System.out.println("Actual: \"" + s + "\"");
+ System.exit(0);
+ }
+ }
+ }
+ }
+}
+